diff options
Diffstat (limited to 'ext')
-rw-r--r-- | ext/js/background/backend.js | 13 | ||||
-rw-r--r-- | ext/js/background/offscreen-proxy.js | 6 | ||||
-rw-r--r-- | ext/js/background/offscreen.js | 4 | ||||
-rw-r--r-- | ext/js/language/en/english-transforms.json | 39 | ||||
-rw-r--r-- | ext/js/language/ja/japanese-transforms.json (renamed from ext/data/language/japanese-transforms.json) | 0 | ||||
-rw-r--r-- | ext/js/language/language-descriptors.js | 6 | ||||
-rw-r--r-- | ext/js/language/language-transformer.js | 50 | ||||
-rwxr-xr-x | ext/js/language/languages.js | 4 | ||||
-rw-r--r-- | ext/js/language/multi-language-transformer.js | 77 | ||||
-rw-r--r-- | ext/js/language/translator.js | 34 |
10 files changed, 181 insertions, 52 deletions
diff --git a/ext/js/background/backend.js b/ext/js/background/backend.js index 6340d021..dbcbdd62 100644 --- a/ext/js/background/backend.js +++ b/ext/js/background/backend.js @@ -274,9 +274,16 @@ export class Backend { log.error(e); } - /** @type {import('language-transformer').LanguageTransformDescriptor} */ - const descriptor = await fetchJson('/data/language/japanese-transforms.json'); - void this._translator.prepare(descriptor); + /** @type {import('language-transformer').LanguageTransformDescriptor[]} */ + const descriptors = []; + const languageSummaries = getLanguageSummaries(); + for (const {languageTransformsFile} of languageSummaries) { + if (!languageTransformsFile) { continue; } + /** @type {import('language-transformer').LanguageTransformDescriptor} */ + const descriptor = await fetchJson(languageTransformsFile); + descriptors.push(descriptor); + } + void this._translator.prepare(descriptors); await this._optionsUtil.prepare(); this._defaultAnkiFieldTemplates = (await fetchText('/data/templates/default-anki-field-templates.handlebars')).trim(); diff --git a/ext/js/background/offscreen-proxy.js b/ext/js/background/offscreen-proxy.js index c888fe29..102a9eed 100644 --- a/ext/js/background/offscreen-proxy.js +++ b/ext/js/background/offscreen-proxy.js @@ -184,10 +184,10 @@ export class TranslatorProxy { } /** - * @param {import('language-transformer').LanguageTransformDescriptor} descriptor + * @param {import('language-transformer').LanguageTransformDescriptor[]} descriptors */ - async prepare(descriptor) { - await this._offscreen.sendMessagePromise({action: 'translatorPrepareOffscreen', params: {descriptor}}); + async prepare(descriptors) { + await this._offscreen.sendMessagePromise({action: 'translatorPrepareOffscreen', params: {descriptors}}); } /** diff --git a/ext/js/background/offscreen.js b/ext/js/background/offscreen.js index 3b8b6a3e..e38d4ba3 100644 --- a/ext/js/background/offscreen.js +++ b/ext/js/background/offscreen.js @@ -111,8 +111,8 @@ export class Offscreen { } /** @type {import('offscreen').ApiHandler<'translatorPrepareOffscreen'>} */ - _prepareTranslatorHandler({descriptor}) { - this._translator.prepare(descriptor); + _prepareTranslatorHandler({descriptors}) { + this._translator.prepare(descriptors); } /** @type {import('offscreen').ApiHandler<'findKanjiOffscreen'>} */ diff --git a/ext/js/language/en/english-transforms.json b/ext/js/language/en/english-transforms.json new file mode 100644 index 00000000..623a4ddf --- /dev/null +++ b/ext/js/language/en/english-transforms.json @@ -0,0 +1,39 @@ +{ + "language": "en", + "conditions": { + "v": { + "name": "Verb", + "isDictionaryForm": true + }, + "n": { + "name": "Noun", + "isDictionaryForm": true, + "subConditions": ["np", "ns"] + }, + "np": { + "name": "Noun plural", + "isDictionaryForm": true + }, + "ns": { + "name": "Noun singular", + "isDictionaryForm": true + }, + "adj": { + "name": "Adjective", + "isDictionaryForm": true + } + }, + "transforms": [ + { + "name": "plural", + "description": "Plural form of a noun", + "rules": [ + {"suffixIn": "s", "suffixOut": "", "conditionsIn": ["np"], "conditionsOut": ["ns"]}, + {"suffixIn": "es", "suffixOut": "", "conditionsIn": ["np"], "conditionsOut": ["ns"]}, + {"suffixIn": "ies", "suffixOut": "y", "conditionsIn": ["np"], "conditionsOut": ["ns"]}, + {"suffixIn": "ves", "suffixOut": "fe", "conditionsIn": ["np"], "conditionsOut": ["ns"]}, + {"suffixIn": "ves", "suffixOut": "f", "conditionsIn": ["np"], "conditionsOut": ["ns"]} + ] + } + ] +} diff --git a/ext/data/language/japanese-transforms.json b/ext/js/language/ja/japanese-transforms.json index 0812f01e..0812f01e 100644 --- a/ext/data/language/japanese-transforms.json +++ b/ext/js/language/ja/japanese-transforms.json diff --git a/ext/js/language/language-descriptors.js b/ext/js/language/language-descriptors.js index d78a96e5..6675f78e 100644 --- a/ext/js/language/language-descriptors.js +++ b/ext/js/language/language-descriptors.js @@ -57,7 +57,8 @@ const languageDescriptors = [ iso: 'en', name: 'English', exampleText: 'read', - textPreprocessors: capitalizationPreprocessors + textPreprocessors: capitalizationPreprocessors, + languageTransformsFile: '/js/language/en/english-transforms.json' }, { iso: 'es', @@ -122,7 +123,8 @@ const languageDescriptors = [ convertAlphabeticCharacters, convertHiraganaToKatakana, collapseEmphaticSequences - } + }, + languageTransformsFile: '/js/language/ja/japanese-transforms.json' }, { iso: 'km', diff --git a/ext/js/language/language-transformer.js b/ext/js/language/language-transformer.js index 08a2166f..8a82e4d2 100644 --- a/ext/js/language/language-transformer.js +++ b/ext/js/language/language-transformer.js @@ -77,11 +77,11 @@ export class LanguageTransformer { this._transforms.push(transform); } - for (const [type, condition] of conditionEntries) { + for (const [type, {isDictionaryForm}] of conditionEntries) { const flags = conditionFlagsMap.get(type); if (typeof flags === 'undefined') { continue; } // This case should never happen this._conditionTypeToConditionFlagsMap.set(type, flags); - if (condition.isDictionaryForm) { + if (isDictionaryForm) { this._partOfSpeechToConditionFlagsMap.set(type, flags); } } @@ -116,7 +116,7 @@ export class LanguageTransformer { * @returns {import('language-transformer-internal').TransformedText[]} */ transform(sourceText) { - const results = [this._createTransformedText(sourceText, 0, [])]; + const results = [LanguageTransformer.createTransformedText(sourceText, 0, [])]; for (let i = 0; i < results.length; ++i) { const {text, conditions, trace} = results[i]; for (const transform of this._transforms) { @@ -128,7 +128,7 @@ export class LanguageTransformer { if (!LanguageTransformer.conditionsMatch(conditions, rule.conditionsIn)) { continue; } const {suffixIn, suffixOut} = rule; if (!text.endsWith(suffixIn) || (text.length - suffixIn.length + suffixOut.length) <= 0) { continue; } - results.push(this._createTransformedText( + results.push(LanguageTransformer.createTransformedText( text.substring(0, text.length - suffixIn.length) + suffixOut, rule.conditionsOut, this._extendTrace(trace, {transform: name, ruleIndex: j}) @@ -140,6 +140,27 @@ export class LanguageTransformer { } /** + * @param {string} text + * @param {number} conditions + * @param {import('language-transformer-internal').Trace} trace + * @returns {import('language-transformer-internal').TransformedText} + */ + static createTransformedText(text, conditions, trace) { + return {text, conditions, trace}; + } + + /** + * If `currentConditions` is `0`, then `nextConditions` is ignored and `true` is returned. + * Otherwise, there must be at least one shared condition between `currentConditions` and `nextConditions`. + * @param {number} currentConditions + * @param {number} nextConditions + * @returns {boolean} + */ + static conditionsMatch(currentConditions, nextConditions) { + return currentConditions === 0 || (currentConditions & nextConditions) !== 0; + } + + /** * @param {import('language-transformer').ConditionMapEntries} conditions * @param {number} nextFlagIndex * @returns {{conditionFlagsMap: Map<string, number>, nextFlagIndex: number}} @@ -218,16 +239,6 @@ export class LanguageTransformer { } /** - * @param {string} text - * @param {number} conditions - * @param {import('language-transformer-internal').Trace} trace - * @returns {import('language-transformer-internal').TransformedText} - */ - _createTransformedText(text, conditions, trace) { - return {text, conditions, trace}; - } - - /** * @param {import('language-transformer-internal').Trace} trace * @param {import('language-transformer-internal').TraceFrame} newFrame * @returns {import('language-transformer-internal').Trace} @@ -239,15 +250,4 @@ export class LanguageTransformer { } return newTrace; } - - /** - * If `currentConditions` is `0`, then `nextConditions` is ignored and `true` is returned. - * Otherwise, there must be at least one shared condition between `currentConditions` and `nextConditions`. - * @param {number} currentConditions - * @param {number} nextConditions - * @returns {boolean} - */ - static conditionsMatch(currentConditions, nextConditions) { - return currentConditions === 0 || (currentConditions & nextConditions) !== 0; - } } diff --git a/ext/js/language/languages.js b/ext/js/language/languages.js index fd58477d..63bbc791 100755 --- a/ext/js/language/languages.js +++ b/ext/js/language/languages.js @@ -22,8 +22,8 @@ import {languageDescriptorMap} from './language-descriptors.js'; */ export function getLanguageSummaries() { const results = []; - for (const {name, iso, exampleText} of languageDescriptorMap.values()) { - results.push({name, iso, exampleText}); + for (const {name, iso, exampleText, languageTransformsFile} of languageDescriptorMap.values()) { + results.push({name, iso, exampleText, languageTransformsFile}); } return results; } diff --git a/ext/js/language/multi-language-transformer.js b/ext/js/language/multi-language-transformer.js new file mode 100644 index 00000000..692e1718 --- /dev/null +++ b/ext/js/language/multi-language-transformer.js @@ -0,0 +1,77 @@ +/* + * Copyright (C) 2024 Yomitan Authors + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <https://www.gnu.org/licenses/>. + */ + +import {LanguageTransformer} from './language-transformer.js'; + +export class MultiLanguageTransformer { + constructor() { + /** @type {Map<string, LanguageTransformer>} */ + this._languageTransformers = new Map(); + } + + /** + * @param {import('language-transformer').LanguageTransformDescriptor[]} languageTransformDescriptors + */ + prepare(languageTransformDescriptors) { + for (const descriptor of languageTransformDescriptors) { + const languageTransformer = new LanguageTransformer(); + languageTransformer.addDescriptor(descriptor); + this._languageTransformers.set(descriptor.language, languageTransformer); + } + } + + /** + * @param {string} language + * @param {string[]} partsOfSpeech + * @returns {number} + */ + getConditionFlagsFromPartsOfSpeech(language, partsOfSpeech) { + const languageTransformer = this._languageTransformers.get(language); + return typeof languageTransformer !== 'undefined' ? languageTransformer.getConditionFlagsFromPartsOfSpeech(partsOfSpeech) : 0; + } + + /** + * @param {string} language + * @param {string[]} conditionTypes + * @returns {number} + */ + getConditionFlagsFromConditionTypes(language, conditionTypes) { + const languageTransformer = this._languageTransformers.get(language); + return typeof languageTransformer !== 'undefined' ? languageTransformer.getConditionFlagsFromConditionTypes(conditionTypes) : 0; + } + + /** + * @param {string} language + * @param {string} conditionType + * @returns {number} + */ + getConditionFlagsFromConditionType(language, conditionType) { + const languageTransformer = this._languageTransformers.get(language); + return typeof languageTransformer !== 'undefined' ? languageTransformer.getConditionFlagsFromConditionType(conditionType) : 0; + } + + /** + * @param {string} language + * @param {string} sourceText + * @returns {import('language-transformer-internal').TransformedText[]} + */ + transform(language, sourceText) { + const languageTransformer = this._languageTransformers.get(language); + if (typeof languageTransformer === 'undefined') { return [LanguageTransformer.createTransformedText(sourceText, 0, [])]; } + return languageTransformer.transform(sourceText); + } +} diff --git a/ext/js/language/translator.js b/ext/js/language/translator.js index b10af226..04319e60 100644 --- a/ext/js/language/translator.js +++ b/ext/js/language/translator.js @@ -21,6 +21,7 @@ import {TextSourceMap} from '../general/text-source-map.js'; import {isCodePointJapanese} from './ja/japanese.js'; import {LanguageTransformer} from './language-transformer.js'; import {getAllLanguageTextPreprocessors} from './languages.js'; +import {MultiLanguageTransformer} from './multi-language-transformer.js'; /** * Class which finds term and kanji dictionary entries for text. @@ -32,8 +33,8 @@ export class Translator { constructor(database) { /** @type {import('../dictionary/dictionary-database.js').DictionaryDatabase} */ this._database = database; - /** @type {LanguageTransformer} */ - this._languageTransformer = new LanguageTransformer(); + /** @type {MultiLanguageTransformer} */ + this._multiLanguageTransformer = new MultiLanguageTransformer(); /** @type {import('translator').DictionaryTagCache} */ this._tagCache = new Map(); /** @type {Intl.Collator} */ @@ -46,10 +47,10 @@ export class Translator { /** * Initializes the instance for use. The public API should not be used until this function has been called. - * @param {import('language-transformer').LanguageTransformDescriptor} descriptor + * @param {import('language-transformer').LanguageTransformDescriptor[]} languageTransformDescriptors */ - prepare(descriptor) { - this._languageTransformer.addDescriptor(descriptor); + prepare(languageTransformDescriptors) { + this._multiLanguageTransformer.prepare(languageTransformDescriptors); for (const {iso, textPreprocessors} of getAllLanguageTextPreprocessors()) { /** @type {Map<string, import('language').TextPreprocessorOptions<unknown>>} */ const optionSpace = new Map(); @@ -316,11 +317,11 @@ export class Translator { ); if (deinflections.length === 0) { return []; } - const {matchType} = options; + const {matchType, language} = options; - await this._addEntriesToDeinflections(deinflections, enabledDictionaryMap, matchType); + await this._addEntriesToDeinflections(language, deinflections, enabledDictionaryMap, matchType); - const dictionaryDeinflections = await this._getDictionaryDeinflections(deinflections, enabledDictionaryMap, matchType); + const dictionaryDeinflections = await this._getDictionaryDeinflections(language, deinflections, enabledDictionaryMap, matchType); deinflections.push(...dictionaryDeinflections); for (const deinflection of deinflections) { @@ -335,12 +336,13 @@ export class Translator { } /** + * @param {string} language * @param {import('translation-internal').DatabaseDeinflection[]} deinflections * @param {Map<string, import('translation').FindTermDictionary>} enabledDictionaryMap * @param {import('dictionary').TermSourceMatchType} matchType * @returns {Promise<import('translation-internal').DatabaseDeinflection[]>} */ - async _getDictionaryDeinflections(deinflections, enabledDictionaryMap, matchType) { + async _getDictionaryDeinflections(language, deinflections, enabledDictionaryMap, matchType) { /** @type {import('translation-internal').DatabaseDeinflection[]} */ const dictionaryDeinflections = []; for (const deinflection of deinflections) { @@ -369,23 +371,24 @@ export class Translator { } } - await this._addEntriesToDeinflections(dictionaryDeinflections, enabledDictionaryMap, matchType); + await this._addEntriesToDeinflections(language, dictionaryDeinflections, enabledDictionaryMap, matchType); return dictionaryDeinflections; } /** + * @param {string} language * @param {import('translation-internal').DatabaseDeinflection[]} deinflections * @param {Map<string, import('translation').FindTermDictionary>} enabledDictionaryMap * @param {import('dictionary').TermSourceMatchType} matchType */ - async _addEntriesToDeinflections(deinflections, enabledDictionaryMap, matchType) { + async _addEntriesToDeinflections(language, deinflections, enabledDictionaryMap, matchType) { const uniqueDeinflectionsMap = this._groupDeinflectionsByTerm(deinflections); const uniqueDeinflectionArrays = [...uniqueDeinflectionsMap.values()]; const uniqueDeinflectionTerms = [...uniqueDeinflectionsMap.keys()]; const databaseEntries = await this._database.findTermsBulk(uniqueDeinflectionTerms, enabledDictionaryMap, matchType); - this._matchEntriesToDeinflections(databaseEntries, uniqueDeinflectionArrays, enabledDictionaryMap); + this._matchEntriesToDeinflections(language, databaseEntries, uniqueDeinflectionArrays, enabledDictionaryMap); } /** @@ -407,16 +410,17 @@ export class Translator { } /** + * @param {string} language * @param {import('dictionary-database').TermEntry[]} databaseEntries * @param {import('translation-internal').DatabaseDeinflection[][]} uniqueDeinflectionArrays * @param {Map<string, import('translation').FindTermDictionary>} enabledDictionaryMap */ - _matchEntriesToDeinflections(databaseEntries, uniqueDeinflectionArrays, enabledDictionaryMap) { + _matchEntriesToDeinflections(language, databaseEntries, uniqueDeinflectionArrays, enabledDictionaryMap) { for (const databaseEntry of databaseEntries) { const entryDictionary = /** @type {import('translation').FindTermDictionary} */ (enabledDictionaryMap.get(databaseEntry.dictionary)); const {partsOfSpeechFilter} = entryDictionary; - const definitionConditions = this._languageTransformer.getConditionFlagsFromPartsOfSpeech(databaseEntry.rules); + const definitionConditions = this._multiLanguageTransformer.getConditionFlagsFromPartsOfSpeech(language, databaseEntry.rules); for (const deinflection of uniqueDeinflectionArrays[databaseEntry.index]) { if (!partsOfSpeechFilter || LanguageTransformer.conditionsMatch(deinflection.conditions, definitionConditions)) { deinflection.databaseEntries.push(databaseEntry); @@ -475,7 +479,7 @@ export class Translator { if (used.has(source)) { break; } used.add(source); const rawSource = sourceMap.source.substring(0, sourceMap.getSourceLength(i)); - for (const {text: transformedText, conditions, trace} of this._languageTransformer.transform(source)) { + for (const {text: transformedText, conditions, trace} of this._multiLanguageTransformer.transform(language, source)) { /** @type {import('dictionary').InflectionRuleChainCandidate} */ const inflectionRuleChainCandidate = { source: 'algorithm', |