From 93f7278586f7b943ae49c00cd14559a2f4b99561 Mon Sep 17 00:00:00 2001 From: toasted-nutbread Date: Sun, 23 Feb 2020 14:03:37 -0500 Subject: Update dictionary schema to support pitch accent data --- .../dictionaries/valid-dictionary1/tag_bank_3.json | 4 +++ .../valid-dictionary1/term_meta_bank_1.json | 36 +++++++++++++++++++++- test/test-database.js | 9 +++--- 3 files changed, 44 insertions(+), 5 deletions(-) create mode 100644 test/data/dictionaries/valid-dictionary1/tag_bank_3.json (limited to 'test') diff --git a/test/data/dictionaries/valid-dictionary1/tag_bank_3.json b/test/data/dictionaries/valid-dictionary1/tag_bank_3.json new file mode 100644 index 00000000..572221fe --- /dev/null +++ b/test/data/dictionaries/valid-dictionary1/tag_bank_3.json @@ -0,0 +1,4 @@ +[ + ["ptag1", "pcategory1", 0, "ptag1 notes", 0], + ["ptag2", "pcategory2", 0, "ptag2 notes", 0] +] \ No newline at end of file diff --git a/test/data/dictionaries/valid-dictionary1/term_meta_bank_1.json b/test/data/dictionaries/valid-dictionary1/term_meta_bank_1.json index 78096502..26922394 100644 --- a/test/data/dictionaries/valid-dictionary1/term_meta_bank_1.json +++ b/test/data/dictionaries/valid-dictionary1/term_meta_bank_1.json @@ -1,5 +1,39 @@ [ ["打", "freq", 1], ["打つ", "freq", 2], - ["打ち込む", "freq", 3] + ["打ち込む", "freq", 3], + [ + "打ち込む", + "pitch", + { + "reading": "うちこむ", + "pitches": [ + {"position": 0}, + {"position": 3} + ] + } + ], + [ + "打ち込む", + "pitch", + { + "reading": "ぶちこむ", + "pitches": [ + {"position": 0}, + {"position": 3} + ] + } + ], + [ + "お手前", + "pitch", + { + "reading": "おてまえ", + "pitches": [ + {"position": 2, "tags": ["ptag1"]}, + {"position": 2, "tags": ["ptag2"]}, + {"position": 0, "tags": ["ptag2"]} + ] + } + ] ] \ No newline at end of file diff --git a/test/test-database.js b/test/test-database.js index 833aa75d..dbd67257 100644 --- a/test/test-database.js +++ b/test/test-database.js @@ -231,8 +231,8 @@ async function testDatabase1() { true ); vm.assert.deepStrictEqual(counts, { - counts: [{kanji: 2, kanjiMeta: 2, terms: 32, termMeta: 3, tagMeta: 12}], - total: {kanji: 2, kanjiMeta: 2, terms: 32, termMeta: 3, tagMeta: 12} + counts: [{kanji: 2, kanjiMeta: 2, terms: 32, termMeta: 6, tagMeta: 14}], + total: {kanji: 2, kanjiMeta: 2, terms: 32, termMeta: 6, tagMeta: 14} }); // Test find* functions @@ -648,9 +648,10 @@ async function testFindTermMetaBulk1(database, titles) { } ], expectedResults: { - total: 1, + total: 3, modes: [ - ['freq', 1] + ['freq', 1], + ['pitch', 2] ] } }, -- cgit v1.2.3 From 0d80fcdf86745da133e4510eeea809a4eeafe120 Mon Sep 17 00:00:00 2001 From: toasted-nutbread Date: Sat, 28 Mar 2020 10:47:02 -0400 Subject: Move Japanese utility functions out of display-generator.js --- ext/mixed/js/display-generator.js | 38 +++++---------------------- ext/mixed/js/japanese.js | 26 +++++++++++++++++- test/test-japanese.js | 55 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 86 insertions(+), 33 deletions(-) (limited to 'test') diff --git a/ext/mixed/js/display-generator.js b/ext/mixed/js/display-generator.js index 326f3f54..90361328 100644 --- a/ext/mixed/js/display-generator.js +++ b/ext/mixed/js/display-generator.js @@ -305,7 +305,7 @@ class DisplayGenerator { createPitch(details) { const {expressions, reading, position, tags} = details; - const morae = DisplayGenerator._jpGetKanaMorae(reading); + const morae = jp.getKanaMorae(reading); const node = this._templateHandler.instantiate('term-pitch-accent'); @@ -324,8 +324,8 @@ class DisplayGenerator { n = node.querySelector('.term-pitch-accent-characters'); for (let i = 0, ii = morae.length; i < ii; ++i) { const mora = morae[i]; - const highPitch = DisplayGenerator._jpIsMoraPitchHigh(i, position); - const highPitchNext = DisplayGenerator._jpIsMoraPitchHigh(i + 1, position); + const highPitch = jp.isMoraPitchHigh(i, position); + const highPitchNext = jp.isMoraPitchHigh(i + 1, position); const n1 = this._templateHandler.instantiate('term-pitch-accent-character'); const n2 = n1.querySelector('.term-pitch-accent-character-inner'); @@ -358,8 +358,8 @@ class DisplayGenerator { const pathPoints = []; for (let i = 0; i < ii; ++i) { - const highPitch = DisplayGenerator._jpIsMoraPitchHigh(i, position); - const highPitchNext = DisplayGenerator._jpIsMoraPitchHigh(i + 1, position); + const highPitch = jp.isMoraPitchHigh(i, position); + const highPitchNext = jp.isMoraPitchHigh(i + 1, position); const graphic = (highPitch && !highPitchNext ? '#term-pitch-accent-graph-dot-downstep' : '#term-pitch-accent-graph-dot'); const x = `${i * 50 + 25}`; const y = highPitch ? '25' : '75'; @@ -376,7 +376,7 @@ class DisplayGenerator { pathPoints.splice(0, ii - 1); { - const highPitch = DisplayGenerator._jpIsMoraPitchHigh(ii, position); + const highPitch = jp.isMoraPitchHigh(ii, position); const x = `${ii * 50 + 25}`; const y = highPitch ? '25' : '75'; const use = document.createElementNS(svgns, 'use'); @@ -532,30 +532,4 @@ class DisplayGenerator { return true; } - - static _jpGetKanaMorae(text) { - // This function splits Japanese kana reading into its individual mora - // components. It is assumed that the text is well-formed. - const smallKanaSet = DisplayGenerator._smallKanaSet; - const morae = []; - let i; - for (const c of text) { - if (smallKanaSet.has(c) && (i = morae.length) > 0) { - morae[i - 1] += c; - } else { - morae.push(c); - } - } - return morae; - } - - static _jpCreateSmallKanaSet() { - return new Set(Array.from('ぁぃぅぇぉゃゅょゎァィゥェォャュョヮ')); - } - - static _jpIsMoraPitchHigh(moraIndex, pitchAccentPosition) { - return pitchAccentPosition === 0 ? (moraIndex > 0) : (moraIndex < pitchAccentPosition); - } } - -DisplayGenerator._smallKanaSet = DisplayGenerator._jpCreateSmallKanaSet(); diff --git a/ext/mixed/js/japanese.js b/ext/mixed/js/japanese.js index 61a247b2..e6b9a8a0 100644 --- a/ext/mixed/js/japanese.js +++ b/ext/mixed/js/japanese.js @@ -64,6 +64,8 @@ const jp = (() => { [0xffe0, 0xffee] // Currency markers ]; + const SMALL_KANA_SET = new Set(Array.from('ぁぃぅぇぉゃゅょゎァィゥェォャュョヮ')); + // Character code testing functions @@ -112,6 +114,26 @@ const jp = (() => { } + // Mora functions + + function isMoraPitchHigh(moraIndex, pitchAccentPosition) { + return pitchAccentPosition === 0 ? (moraIndex > 0) : (moraIndex < pitchAccentPosition); + } + + function getKanaMorae(text) { + const morae = []; + let i; + for (const c of text) { + if (SMALL_KANA_SET.has(c) && (i = morae.length) > 0) { + morae[i - 1] += c; + } else { + morae.push(c); + } + } + return morae; + } + + // Exports return { @@ -119,6 +141,8 @@ const jp = (() => { isCodePointKana, isCodePointJapanese, isStringEntirelyKana, - isStringPartiallyJapanese + isStringPartiallyJapanese, + isMoraPitchHigh, + getKanaMorae }; })(); diff --git a/test/test-japanese.js b/test/test-japanese.js index c5d220e7..eab632bf 100644 --- a/test/test-japanese.js +++ b/test/test-japanese.js @@ -392,6 +392,59 @@ function testDistributeFuriganaInflected() { } } +function testIsMoraPitchHigh() { + const data = [ + [[0, 0], false], + [[1, 0], true], + [[2, 0], true], + [[3, 0], true], + + [[0, 1], true], + [[1, 1], false], + [[2, 1], false], + [[3, 1], false], + + [[0, 2], true], + [[1, 2], true], + [[2, 2], false], + [[3, 2], false], + + [[0, 3], true], + [[1, 3], true], + [[2, 3], true], + [[3, 3], false], + + [[0, 4], true], + [[1, 4], true], + [[2, 4], true], + [[3, 4], true] + ]; + + for (const [[moraIndex, pitchAccentPosition], expected] of data) { + const actual = jp.isMoraPitchHigh(moraIndex, pitchAccentPosition); + assert.strictEqual(actual, expected); + } +} + +function testGetKanaMorae() { + const data = [ + ['かこ', ['か', 'こ']], + ['かっこ', ['か', 'っ', 'こ']], + ['カコ', ['カ', 'コ']], + ['カッコ', ['カ', 'ッ', 'コ']], + ['コート', ['コ', 'ー', 'ト']], + ['ちゃんと', ['ちゃ', 'ん', 'と']], + ['とうきょう', ['と', 'う', 'きょ', 'う']], + ['ぎゅう', ['ぎゅ', 'う']], + ['ディスコ', ['ディ', 'ス', 'コ']] + ]; + + for (const [text, expected] of data) { + const actual = jp.getKanaMorae(text); + vm.assert.deepStrictEqual(actual, expected); + } +} + function main() { testIsCodePointKanji(); @@ -408,6 +461,8 @@ function main() { testConvertAlphabeticToKana(); testDistributeFurigana(); testDistributeFuriganaInflected(); + testIsMoraPitchHigh(); + testGetKanaMorae(); } -- cgit v1.2.3