""" ぁあぃいぅうぇえぉおかがきぎくぐけげこごさざしじすずせぜそぞただちぢっつづてでとどなにぬねのはばぱひびぴふぶぷへべぺほぼぽまみむめもゃやゅゆょよらりるれろわをんーゎゐゑゕゖゔゝゞ・「」。、 ァアィイゥウェエォオカガキギクグケゲコゴサザシジスズセゼソゾタダチヂッツヅテデトドナニヌネノハバパヒビピフブプヘベペホボポマミムメモャヤュユョヨラリルレロワヲンーヮヰヱヵヶヴヽヾ・「」。、 """ # vi: filetype=python import fugashi import sys import re if len(sys.argv) != 2: print("usage: furigana ") exit(1) H, K = __doc__.strip().split("\n") trans = {ord(a): ord(b) for a, b in zip(K, H)} def align_kana(s: list[str], k: str): if not s: return "" if not k: return "".join(s) if s[-1][0] in __doc__: return align_kana(s[:-1], k[:-len(s[-1])]) + s[-1] if s[0][0] in __doc__: return s[0] + align_kana(s[1:], k[len(s[0]):]) if len(s) == 1: return f" {s[0]}[{k}]" if any(x[0] in __doc__ for x in s): i = next(i for i, x in enumerate(s) if x[0] in __doc__) j = k.find(s[i].translate(trans)) if j != -1: x = align_kana(s[:i], k[:j]) y = align_kana(s[i+1:], k[j+len(s[i]):]) return x + s[i] + y return f" {''.join(s)}[{k}]" output = [] for x in fugashi.Tagger().parseToNodeList(sys.argv[1]): if x.feature.kana: s = re.findall("[ぁ-ゔァ-ヴー]+|[^ぁ-ゔァ-ヴー]+", x.surface) k = x.feature.kana.translate(trans) output.append(align_kana(s, k)) else: output.append(x.surface) print("".join(output).strip())