diff options
Diffstat (limited to 'ext/bg')
-rw-r--r-- | ext/bg/background.html | 1 | ||||
-rw-r--r-- | ext/bg/data/dictionary-term-meta-bank-v3-schema.json | 64 | ||||
-rw-r--r-- | ext/bg/data/options-schema.json | 17 | ||||
-rw-r--r-- | ext/bg/js/dictionary.js | 32 | ||||
-rw-r--r-- | ext/bg/js/japanese.js | 31 | ||||
-rw-r--r-- | ext/bg/js/options.js | 5 | ||||
-rw-r--r-- | ext/bg/js/settings/main.js | 6 | ||||
-rw-r--r-- | ext/bg/js/text-source-map.js | 115 | ||||
-rw-r--r-- | ext/bg/js/translator.js | 52 | ||||
-rw-r--r-- | ext/bg/settings.html | 12 |
10 files changed, 258 insertions, 77 deletions
diff --git a/ext/bg/background.html b/ext/bg/background.html index 62802341..afe9c5d1 100644 --- a/ext/bg/background.html +++ b/ext/bg/background.html @@ -39,6 +39,7 @@ <script src="/bg/js/options.js"></script> <script src="/bg/js/profile-conditions.js"></script> <script src="/bg/js/request.js"></script> + <script src="/bg/js/text-source-map.js"></script> <script src="/bg/js/translator.js"></script> <script src="/bg/js/util.js"></script> <script src="/mixed/js/audio-system.js"></script> diff --git a/ext/bg/data/dictionary-term-meta-bank-v3-schema.json b/ext/bg/data/dictionary-term-meta-bank-v3-schema.json index 1cc0557f..8475db81 100644 --- a/ext/bg/data/dictionary-term-meta-bank-v3-schema.json +++ b/ext/bg/data/dictionary-term-meta-bank-v3-schema.json @@ -13,13 +13,71 @@ }, { "type": "string", - "enum": ["freq"], - "description": "Type of data. \"freq\" corresponds to frequency information." + "enum": ["freq", "pitch"], + "description": "Type of data. \"freq\" corresponds to frequency information; \"pitch\" corresponds to pitch information." }, { - "type": ["string", "number"], "description": "Data for the term/expression." } + ], + "oneOf": [ + { + "items": [ + {}, + {"enum": ["freq"]}, + { + "type": ["string", "number"], + "description": "Frequency information for the term or expression." + } + ] + }, + { + "items": [ + {}, + {"enum": ["pitch"]}, + { + "type": ["object"], + "description": "Pitch accent information for the term or expression.", + "required": [ + "reading", + "pitches" + ], + "additionalProperties": false, + "properties": { + "reading": { + "type": "string", + "description": "Reading for the term or expression." + }, + "pitches": { + "type": "array", + "description": "List of different pitch accent information for the term and reading combination.", + "additionalItems": { + "type": "object", + "required": [ + "position" + ], + "additionalProperties": false, + "properties": { + "position": { + "type": "integer", + "description": "Mora position of the pitch accent downstep. A value of 0 indicates that the word does not have a downstep (heiban).", + "minimum": 0 + }, + "tags": { + "type": "array", + "description": "List of tags for this pitch accent.", + "items": { + "type": "string", + "description": "Tag for this pitch accent. This typically corresponds to a certain type of part of speech." + } + } + } + } + } + } + } + ] + } ] } }
\ No newline at end of file diff --git a/ext/bg/data/options-schema.json b/ext/bg/data/options-schema.json index d6207952..cb759b72 100644 --- a/ext/bg/data/options-schema.json +++ b/ext/bg/data/options-schema.json @@ -105,7 +105,10 @@ "customPopupCss", "customPopupOuterCss", "enableWanakana", - "enableClipboardMonitor" + "enableClipboardMonitor", + "showPitchAccentDownstepNotation", + "showPitchAccentPositionNotation", + "showPitchAccentGraph" ], "properties": { "enable": { @@ -227,6 +230,18 @@ "enableClipboardMonitor": { "type": "boolean", "default": false + }, + "showPitchAccentDownstepNotation": { + "type": "boolean", + "default": true + }, + "showPitchAccentPositionNotation": { + "type": "boolean", + "default": true + }, + "showPitchAccentGraph": { + "type": "boolean", + "default": false } } }, diff --git a/ext/bg/js/dictionary.js b/ext/bg/js/dictionary.js index 3dd1d0c1..74bd5a64 100644 --- a/ext/bg/js/dictionary.js +++ b/ext/bg/js/dictionary.js @@ -137,30 +137,6 @@ function dictTermsGroup(definitions, dictionaries) { return dictTermsSort(results); } -function dictAreSetsEqual(set1, set2) { - if (set1.size !== set2.size) { - return false; - } - - for (const value of set1) { - if (!set2.has(value)) { - return false; - } - } - - return true; -} - -function dictGetSetIntersection(set1, set2) { - const result = []; - for (const value of set1) { - if (set2.has(value)) { - result.push(value); - } - } - return result; -} - function dictTermsMergeBySequence(definitions, mainDictionary) { const sequencedDefinitions = new Map(); const nonSequencedDefinitions = []; @@ -281,11 +257,11 @@ function dictTermsMergeByGloss(result, definitions, appendTo=null, mergedIndices const only = []; const expressionSet = definition.expression; const readingSet = definition.reading; - if (!dictAreSetsEqual(expressionSet, resultExpressionSet)) { - only.push(...dictGetSetIntersection(expressionSet, resultExpressionSet)); + if (!areSetsEqual(expressionSet, resultExpressionSet)) { + only.push(...getSetIntersection(expressionSet, resultExpressionSet)); } - if (!dictAreSetsEqual(readingSet, resultReadingSet)) { - only.push(...dictGetSetIntersection(readingSet, resultReadingSet)); + if (!areSetsEqual(readingSet, resultReadingSet)) { + only.push(...getSetIntersection(readingSet, resultReadingSet)); } definition.only = only; } diff --git a/ext/bg/js/japanese.js b/ext/bg/js/japanese.js index c5873cf1..2a2b39fd 100644 --- a/ext/bg/js/japanese.js +++ b/ext/bg/js/japanese.js @@ -158,9 +158,8 @@ return result; } - function convertHalfWidthKanaToFullWidth(text, sourceMapping) { + function convertHalfWidthKanaToFullWidth(text, sourceMap=null) { let result = ''; - const hasSourceMapping = Array.isArray(sourceMapping); // This function is safe to use charCodeAt instead of codePointAt, since all // the relevant characters are represented with a single UTF-16 character code. @@ -192,10 +191,8 @@ } } - if (hasSourceMapping && index > 0) { - index = result.length; - const v = sourceMapping.splice(index + 1, 1)[0]; - sourceMapping[index] += v; + if (sourceMap !== null && index > 0) { + sourceMap.combine(result.length, 1); } result += c2; } @@ -203,7 +200,7 @@ return result; } - function convertAlphabeticToKana(text, sourceMapping) { + function convertAlphabeticToKana(text, sourceMap=null) { let part = ''; let result = ''; @@ -222,7 +219,7 @@ c = 0x2d; // '-' } else { if (part.length > 0) { - result += convertAlphabeticPartToKana(part, sourceMapping, result.length); + result += convertAlphabeticPartToKana(part, sourceMap, result.length); part = ''; } result += char; @@ -232,17 +229,16 @@ } if (part.length > 0) { - result += convertAlphabeticPartToKana(part, sourceMapping, result.length); + result += convertAlphabeticPartToKana(part, sourceMap, result.length); } return result; } - function convertAlphabeticPartToKana(text, sourceMapping, sourceMappingStart) { + function convertAlphabeticPartToKana(text, sourceMap, sourceMapStart) { const result = wanakana.toHiragana(text); // Generate source mapping - if (Array.isArray(sourceMapping)) { - if (typeof sourceMappingStart !== 'number') { sourceMappingStart = 0; } + if (sourceMap !== null) { let i = 0; let resultPos = 0; const ii = text.length; @@ -262,18 +258,15 @@ // Merge characters const removals = iNext - i - 1; if (removals > 0) { - let sum = 0; - const vs = sourceMapping.splice(sourceMappingStart + 1, removals); - for (const v of vs) { sum += v; } - sourceMapping[sourceMappingStart] += sum; + sourceMap.combine(sourceMapStart, removals); } - ++sourceMappingStart; + ++sourceMapStart; // Empty elements const additions = resultPosNext - resultPos - 1; for (let j = 0; j < additions; ++j) { - sourceMapping.splice(sourceMappingStart, 0, 0); - ++sourceMappingStart; + sourceMap.insert(sourceMapStart, 0); + ++sourceMapStart; } i = iNext; diff --git a/ext/bg/js/options.js b/ext/bg/js/options.js index bd0bbe0e..b36fe812 100644 --- a/ext/bg/js/options.js +++ b/ext/bg/js/options.js @@ -124,7 +124,10 @@ function profileOptionsCreateDefaults() { customPopupCss: '', customPopupOuterCss: '', enableWanakana: true, - enableClipboardMonitor: false + enableClipboardMonitor: false, + showPitchAccentDownstepNotation: true, + showPitchAccentPositionNotation: true, + showPitchAccentGraph: false }, audio: { diff --git a/ext/bg/js/settings/main.js b/ext/bg/js/settings/main.js index ebc443df..7caeaea0 100644 --- a/ext/bg/js/settings/main.js +++ b/ext/bg/js/settings/main.js @@ -84,6 +84,9 @@ async function formRead(options) { options.general.popupScalingFactor = parseFloat($('#popup-scaling-factor').val()); options.general.popupScaleRelativeToPageZoom = $('#popup-scale-relative-to-page-zoom').prop('checked'); options.general.popupScaleRelativeToVisualViewport = $('#popup-scale-relative-to-visual-viewport').prop('checked'); + options.general.showPitchAccentDownstepNotation = $('#show-pitch-accent-downstep-notation').prop('checked'); + options.general.showPitchAccentPositionNotation = $('#show-pitch-accent-position-notation').prop('checked'); + options.general.showPitchAccentGraph = $('#show-pitch-accent-graph').prop('checked'); options.general.popupTheme = $('#popup-theme').val(); options.general.popupOuterTheme = $('#popup-outer-theme').val(); options.general.customPopupCss = $('#custom-popup-css').val(); @@ -161,6 +164,9 @@ async function formWrite(options) { $('#popup-scaling-factor').val(options.general.popupScalingFactor); $('#popup-scale-relative-to-page-zoom').prop('checked', options.general.popupScaleRelativeToPageZoom); $('#popup-scale-relative-to-visual-viewport').prop('checked', options.general.popupScaleRelativeToVisualViewport); + $('#show-pitch-accent-downstep-notation').prop('checked', options.general.showPitchAccentDownstepNotation); + $('#show-pitch-accent-position-notation').prop('checked', options.general.showPitchAccentPositionNotation); + $('#show-pitch-accent-graph').prop('checked', options.general.showPitchAccentGraph); $('#popup-theme').val(options.general.popupTheme); $('#popup-outer-theme').val(options.general.popupOuterTheme); $('#custom-popup-css').val(options.general.customPopupCss); diff --git a/ext/bg/js/text-source-map.js b/ext/bg/js/text-source-map.js new file mode 100644 index 00000000..24970978 --- /dev/null +++ b/ext/bg/js/text-source-map.js @@ -0,0 +1,115 @@ +/* + * Copyright (C) 2020 Alex Yatskov <alex@foosoft.net> + * Author: Alex Yatskov <alex@foosoft.net> + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <https://www.gnu.org/licenses/>. + */ + +class TextSourceMap { + constructor(source, mapping=null) { + this._source = source; + this._mapping = (Array.isArray(mapping) ? TextSourceMap._normalizeMapping(mapping) : null); + } + + get source() { + return this._source; + } + + equals(other) { + if (this === other) { + return true; + } + + const source = this._source; + if (!(other instanceof TextSourceMap && source === other._source)) { + return false; + } + + let mapping = this._mapping; + let otherMapping = other._mapping; + if (mapping === null) { + if (otherMapping === null) { + return true; + } + mapping = TextSourceMap._createMapping(source); + } else if (otherMapping === null) { + otherMapping = TextSourceMap._createMapping(source); + } + + const mappingLength = mapping.length; + if (mappingLength !== otherMapping.length) { + return false; + } + + for (let i = 0; i < mappingLength; ++i) { + if (mapping[i] !== otherMapping[i]) { + return false; + } + } + + return true; + } + + getSourceLength(finalLength) { + const mapping = this._mapping; + if (mapping === null) { + return finalLength; + } + + let sourceLength = 0; + for (let i = 0; i < finalLength; ++i) { + sourceLength += mapping[i]; + } + return sourceLength; + } + + combine(index, count) { + if (count <= 0) { return; } + + if (this._mapping === null) { + this._mapping = TextSourceMap._createMapping(this._source); + } + + let sum = this._mapping[index]; + const parts = this._mapping.splice(index + 1, count); + for (const part of parts) { + sum += part; + } + this._mapping[index] = sum; + } + + insert(index, ...items) { + if (this._mapping === null) { + this._mapping = TextSourceMap._createMapping(this._source); + } + + this._mapping.splice(index, 0, ...items); + } + + static _createMapping(text) { + return new Array(text.length).fill(1); + } + + static _normalizeMapping(mapping) { + const result = []; + for (const value of mapping) { + result.push( + (typeof value === 'number' && Number.isFinite(value)) ? + Math.floor(value) : + 0 + ); + } + return result; + } +} diff --git a/ext/bg/js/translator.js b/ext/bg/js/translator.js index df19eee1..27f91c05 100644 --- a/ext/bg/js/translator.js +++ b/ext/bg/js/translator.js @@ -18,6 +18,7 @@ /* global * Deinflector + * TextSourceMap * dictEnabledSet * dictTagBuildSource * dictTagSanitize @@ -359,17 +360,15 @@ class Translator { const used = new Set(); for (const [halfWidth, numeric, alphabetic, katakana, hiragana] of Translator.getArrayVariants(textOptionVariantArray)) { let text2 = text; - let sourceMapping = null; + const sourceMap = new TextSourceMap(text2); if (halfWidth) { - if (sourceMapping === null) { sourceMapping = Translator.createTextSourceMapping(text2); } - text2 = jp.convertHalfWidthKanaToFullWidth(text2, sourceMapping); + text2 = jp.convertHalfWidthKanaToFullWidth(text2, sourceMap); } if (numeric) { text2 = jp.convertNumericToFullWidth(text2); } if (alphabetic) { - if (sourceMapping === null) { sourceMapping = Translator.createTextSourceMapping(text2); } - text2 = jp.convertAlphabeticToKana(text2, sourceMapping); + text2 = jp.convertAlphabeticToKana(text2, sourceMap); } if (katakana) { text2 = jp.convertHiraganaToKatakana(text2); @@ -383,7 +382,7 @@ class Translator { if (used.has(text2Substring)) { break; } used.add(text2Substring); for (const deinflection of this.deinflector.deinflect(text2Substring)) { - deinflection.rawSource = Translator.getDeinflectionRawSource(text, i, sourceMapping); + deinflection.rawSource = sourceMap.source.substring(0, sourceMap.getSourceLength(i)); deinflections.push(deinflection); } } @@ -399,25 +398,6 @@ class Translator { } } - static getDeinflectionRawSource(source, length, sourceMapping) { - if (sourceMapping === null) { - return source.substring(0, length); - } - - let result = ''; - let index = 0; - for (let i = 0; i < length; ++i) { - const c = sourceMapping[i]; - result += source.substring(index, index + c); - index += c; - } - return result; - } - - static createTextSourceMapping(text) { - return new Array(text.length).fill(1); - } - async findKanji(text, options) { const dictionaries = dictEnabledSet(options); const kanjiUnique = new Set(); @@ -482,6 +462,7 @@ class Translator { // New data term.frequencies = []; + term.pitches = []; } const metas = await this.database.findTermMetaBulk(expressionsUnique, dictionaries); @@ -492,6 +473,13 @@ class Translator { term.frequencies.push({expression, frequency: data, dictionary}); } break; + case 'pitch': + for (const term of termsUnique[index]) { + const pitchData = await this.getPitchData(expression, data, dictionary, term); + if (pitchData === null) { continue; } + term.pitches.push(pitchData); + } + break; } } } @@ -575,6 +563,20 @@ class Translator { return tagMetaList; } + async getPitchData(expression, data, dictionary, term) { + const reading = data.reading; + const termReading = term.reading || expression; + if (reading !== termReading) { return null; } + + const pitches = []; + for (let {position, tags} of data.pitches) { + tags = Array.isArray(tags) ? await this.getTagMetaList(tags, dictionary) : []; + pitches.push({position, tags}); + } + + return {reading, pitches, dictionary}; + } + static createExpression(expression, reading, termTags=null, termFrequency=null) { const furiganaSegments = jp.distributeFurigana(expression, reading); return { diff --git a/ext/bg/settings.html b/ext/bg/settings.html index cfe20be4..0b2e4f9c 100644 --- a/ext/bg/settings.html +++ b/ext/bg/settings.html @@ -163,6 +163,18 @@ </div> <div class="checkbox options-advanced"> + <label><input type="checkbox" id="show-pitch-accent-downstep-notation"> Show downstep notation for pitch accents</label> + </div> + + <div class="checkbox options-position"> + <label><input type="checkbox" id="show-pitch-accent-position-notation"> Show position notation for pitch accents</label> + </div> + + <div class="checkbox options-advanced"> + <label><input type="checkbox" id="show-pitch-accent-graph"> Show graph for pitch accents</label> + </div> + + <div class="checkbox options-advanced"> <label><input type="checkbox" id="show-debug-info"> Show debug information</label> </div> |