From 445f87ebdb01badd5fd62a44bb165e23351e10c6 Mon Sep 17 00:00:00 2001 From: toasted-nutbread Date: Sun, 28 Feb 2021 13:26:34 -0500 Subject: Get categorization of pitch accents (#1462) --- ext/js/display/display-generator.js | 21 ++++++++++++++++++++- ext/js/language/japanese-util.js | 26 ++++++++++++++++++++++++++ 2 files changed, 46 insertions(+), 1 deletion(-) (limited to 'ext/js') diff --git a/ext/js/display/display-generator.js b/ext/js/display/display-generator.js index 32059d86..91dc0862 100644 --- a/ext/js/display/display-generator.js +++ b/ext/js/display/display-generator.js @@ -229,7 +229,7 @@ class DisplayGenerator { // Private _createTermExpression(details) { - const {termFrequency, furiganaSegments, expression, reading, termTags} = details; + const {termFrequency, furiganaSegments, expression, reading, termTags, pitches} = details; const searchQueries = []; if (expression) { searchQueries.push(expression); } @@ -243,6 +243,11 @@ class DisplayGenerator { node.dataset.readingIsSame = `${!reading || reading === expression}`; node.dataset.frequency = termFrequency; + const pitchAccentCategories = this._getPitchAccentCategories(pitches); + if (pitchAccentCategories !== null) { + node.dataset.pitchAccentCategories = pitchAccentCategories; + } + this._setTextContent(node.querySelector('.expression-reading'), reading.length > 0 ? reading : expression); this._appendFurigana(expressionContainer, furiganaSegments, this._appendKanjiLinks.bind(this)); @@ -716,4 +721,18 @@ class DisplayGenerator { node.lang = 'ja'; } } + + _getPitchAccentCategories(pitches) { + if (pitches.length === 0) { return null; } + const categories = []; + for (const {reading, pitches: pitches2} of pitches) { + for (const {position} of pitches2) { + const category = this._japaneseUtil.getPitchCategory(reading, position, false); + if (category !== null) { + categories.push(category); + } + } + } + return categories.length > 0 ? categories.join(' ') : null; + } } diff --git a/ext/js/language/japanese-util.js b/ext/js/language/japanese-util.js index e47cdf55..4d317396 100644 --- a/ext/js/language/japanese-util.js +++ b/ext/js/language/japanese-util.js @@ -232,6 +232,22 @@ const JapaneseUtil = (() => { } } + getPitchCategory(text, pitchAccentPosition, isVerbOrAdjective) { + if (pitchAccentPosition === 0) { + return 'heiban'; + } + if (isVerbOrAdjective) { + return pitchAccentPosition > 0 ? 'kifuku' : null; + } + if (pitchAccentPosition === 1) { + return 'atamadaka'; + } + if (pitchAccentPosition > 1) { + return pitchAccentPosition >= this.getKanaMoraCount(text) ? 'odaka' : 'nakadaka'; + } + return null; + } + getKanaMorae(text) { const morae = []; let i; @@ -245,6 +261,16 @@ const JapaneseUtil = (() => { return morae; } + getKanaMoraCount(text) { + let moraCount = 0; + for (const c of text) { + if (!(SMALL_KANA_SET.has(c) && moraCount > 0)) { + ++moraCount; + } + } + return moraCount; + } + // Conversion functions convertToKana(text) { -- cgit v1.2.3