From 0bf0620c3579a5fe94c529673db105a83d6c3755 Mon Sep 17 00:00:00 2001 From: toasted-nutbread Date: Fri, 26 Feb 2021 23:23:16 -0500 Subject: Improve kana segmentation (#1446) * Improve edge case furigana distribution for mixed hiragana/katakana * Update/add tests --- test/test-japanese.js | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) (limited to 'test') diff --git a/test/test-japanese.js b/test/test-japanese.js index 590d3157..1a4fc494 100644 --- a/test/test-japanese.js +++ b/test/test-japanese.js @@ -402,7 +402,8 @@ function testDistributeFurigana() { [ ['スズメの涙', 'すずめのなみだ'], [ - {text: 'スズメの', furigana: 'すずめの'}, + {text: 'スズメ', furigana: 'すずめ'}, + {text: 'の', furigana: ''}, {text: '涙', furigana: 'なみだ'} ] ], @@ -464,14 +465,16 @@ function testDistributeFurigana() { [ ['くノ一', 'くのいち'], [ - {text: 'くノ', furigana: 'くの'}, + {text: 'く', furigana: ''}, + {text: 'ノ', furigana: 'の'}, {text: '一', furigana: 'いち'} ] ], [ ['くノ一', 'くのいち'], [ - {text: 'くノ', furigana: 'くの'}, + {text: 'く', furigana: ''}, + {text: 'ノ', furigana: 'の'}, {text: '一', furigana: 'いち'} ] ], @@ -691,9 +694,19 @@ function testDistributeFurigana() { [ ['ページ違反', 'ぺーじいはん'], [ - {text: 'ページ', furigana: 'ぺーじ'}, + {text: 'ペ', furigana: 'ぺ'}, + {text: 'ー', furigana: ''}, + {text: 'ジ', furigana: 'じ'}, {text: '違反', furigana: 'いはん'} ] + ], + // Mismatched kana + [ + ['サボる', 'サボル'], + [ + {text: 'サボ', furigana: ''}, + {text: 'る', furigana: 'ル'} + ] ] ]; -- cgit v1.2.3