2025-07-13 15:42:38 -04:00
|
|
|
"""
|
|
|
|
|
ぁあぃいぅうぇえぉおかがきぎくぐけげこごさざしじすずせぜそぞただちぢっつづてでとどなにぬねのはばぱひびぴふぶぷへべぺほぼぽまみむめもゃやゅゆょよらりるれろわをんーゎゐゑゕゖゔゝゞ・「」。、
|
|
|
|
|
ァアィイゥウェエォオカガキギクグケゲコゴサザシジスズセゼソゾタダチヂッツヅテデトドナニヌネノハバパヒビピフブプヘベペホボポマミムメモャヤュユョヨラリルレロワヲンーヮヰヱヵヶヴヽヾ・「」。、
|
|
|
|
|
"""
|
2025-07-26 17:07:57 -04:00
|
|
|
# vi: filetype=python
|
2025-07-13 15:42:38 -04:00
|
|
|
|
|
|
|
|
import fugashi
|
|
|
|
|
import sys
|
2025-07-26 17:07:57 -04:00
|
|
|
import re
|
2025-07-13 15:42:38 -04:00
|
|
|
|
|
|
|
|
if len(sys.argv) != 2:
|
2025-07-26 17:07:57 -04:00
|
|
|
print("usage: furigana <word>")
|
2025-07-13 15:42:38 -04:00
|
|
|
exit(1)
|
|
|
|
|
|
2025-07-26 17:07:57 -04:00
|
|
|
H, K = __doc__.strip().split("\n")
|
|
|
|
|
trans = {ord(a): ord(b) for a, b in zip(K, H)}
|
2025-07-13 15:42:38 -04:00
|
|
|
|
2025-07-26 17:07:57 -04:00
|
|
|
|
|
|
|
|
def align_kana(s: list[str], k: str):
|
|
|
|
|
if not s:
|
|
|
|
|
return ""
|
|
|
|
|
if not k:
|
|
|
|
|
return "".join(s)
|
|
|
|
|
if s[-1][0] in __doc__:
|
|
|
|
|
return align_kana(s[:-1], k[:-len(s[-1])]) + s[-1]
|
|
|
|
|
if s[0][0] in __doc__:
|
|
|
|
|
return s[0] + align_kana(s[1:], k[len(s[0]):])
|
|
|
|
|
if len(s) == 1:
|
|
|
|
|
return f" {s[0]}[{k}]"
|
|
|
|
|
if any(x[0] in __doc__ for x in s):
|
|
|
|
|
i = next(i for i, x in enumerate(s) if x[0] in __doc__)
|
|
|
|
|
j = k.find(s[i].translate(trans))
|
|
|
|
|
if j != -1:
|
|
|
|
|
x = align_kana(s[:i], k[:j])
|
|
|
|
|
y = align_kana(s[i+1:], k[j+len(s[i]):])
|
|
|
|
|
return x + s[i] + y
|
|
|
|
|
return f" {''.join(s)}[{k}]"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
output = []
|
2025-07-13 15:42:38 -04:00
|
|
|
for x in fugashi.Tagger().parseToNodeList(sys.argv[1]):
|
2025-07-26 17:07:57 -04:00
|
|
|
if x.feature.kana:
|
|
|
|
|
s = re.findall("[ぁ-ゔァ-ヴー]+|[^ぁ-ゔァ-ヴー]+", x.surface)
|
|
|
|
|
k = x.feature.kana.translate(trans)
|
|
|
|
|
output.append(align_kana(s, k))
|
2025-07-13 15:42:38 -04:00
|
|
|
else:
|
2025-07-26 17:07:57 -04:00
|
|
|
output.append(x.surface)
|
|
|
|
|
print("".join(output).strip())
|