diff --git a/user/bin/default.nix b/user/bin/default.nix index 0774ac4..774eb37 100644 --- a/user/bin/default.nix +++ b/user/bin/default.nix @@ -21,7 +21,7 @@ in { // lib.optionalAttrs config.u.has.jp { anki-update = [libnotify inotify-tools pulseaudio ffmpeg jq pyp (writers.writePython3Bin "furigana" { - libraries = [python312Packages.fugashi python312Packages.unidic-lite]; + libraries = [python3Packages.fugashi python3Packages.unidic-lite]; } (builtins.readFile ./furigana)) ]; }; diff --git a/user/bin/furigana b/user/bin/furigana index 020146c..df1838c 100755 --- a/user/bin/furigana +++ b/user/bin/furigana @@ -2,21 +2,47 @@ ぁあぃいぅうぇえぉおかがきぎくぐけげこごさざしじすずせぜそぞただちぢっつづてでとどなにぬねのはばぱひびぴふぶぷへべぺほぼぽまみむめもゃやゅゆょよらりるれろわをんーゎゐゑゕゖゔゝゞ・「」。、 ァアィイゥウェエォオカガキギクグケゲコゴサザシジスズセゼソゾタダチヂッツヅテデトドナニヌネノハバパヒビピフブプヘベペホボポマミムメモャヤュユョヨラリルレロワヲンーヮヰヱヵヶヴヽヾ・「」。、 """ +# vi: filetype=python import fugashi import sys +import re if len(sys.argv) != 2: - print('usage: furigana ') + print("usage: furigana ") exit(1) -H, K = __doc__.strip().split('\n') -t = {ord(a): ord(b) for a, b in zip(K, H)} +H, K = __doc__.strip().split("\n") +trans = {ord(a): ord(b) for a, b in zip(K, H)} -r = [] + +def align_kana(s: list[str], k: str): + if not s: + return "" + if not k: + return "".join(s) + if s[-1][0] in __doc__: + return align_kana(s[:-1], k[:-len(s[-1])]) + s[-1] + if s[0][0] in __doc__: + return s[0] + align_kana(s[1:], k[len(s[0]):]) + if len(s) == 1: + return f" {s[0]}[{k}]" + if any(x[0] in __doc__ for x in s): + i = next(i for i, x in enumerate(s) if x[0] in __doc__) + j = k.find(s[i].translate(trans)) + if j != -1: + x = align_kana(s[:i], k[:j]) + y = align_kana(s[i+1:], k[j+len(s[i]):]) + return x + s[i] + y + return f" {''.join(s)}[{k}]" + + +output = [] for x in fugashi.Tagger().parseToNodeList(sys.argv[1]): - if all(c in H + K for c in x.surface) or not x.feature.kana: - r.append(x.surface) + if x.feature.kana: + s = re.findall("[ぁ-ゔァ-ヴー]+|[^ぁ-ゔァ-ヴー]+", x.surface) + k = x.feature.kana.translate(trans) + output.append(align_kana(s, k)) else: - r.append(f" {x.surface}[{x.feature.kana.translate(t)}]") -print(''.join(r).strip()) + output.append(x.surface) +print("".join(output).strip())