diff options
Diffstat (limited to 'ext')
| -rw-r--r-- | ext/js/background/backend.js | 13 | ||||
| -rw-r--r-- | ext/js/background/offscreen-proxy.js | 6 | ||||
| -rw-r--r-- | ext/js/background/offscreen.js | 4 | ||||
| -rw-r--r-- | ext/js/language/en/english-transforms.json | 39 | ||||
| -rw-r--r-- | ext/js/language/ja/japanese-transforms.json (renamed from ext/data/language/japanese-transforms.json) | 0 | ||||
| -rw-r--r-- | ext/js/language/language-descriptors.js | 6 | ||||
| -rw-r--r-- | ext/js/language/language-transformer.js | 50 | ||||
| -rwxr-xr-x | ext/js/language/languages.js | 4 | ||||
| -rw-r--r-- | ext/js/language/multi-language-transformer.js | 77 | ||||
| -rw-r--r-- | ext/js/language/translator.js | 34 | 
10 files changed, 181 insertions, 52 deletions
| diff --git a/ext/js/background/backend.js b/ext/js/background/backend.js index 6340d021..dbcbdd62 100644 --- a/ext/js/background/backend.js +++ b/ext/js/background/backend.js @@ -274,9 +274,16 @@ export class Backend {                  log.error(e);              } -            /** @type {import('language-transformer').LanguageTransformDescriptor} */ -            const descriptor = await fetchJson('/data/language/japanese-transforms.json'); -            void this._translator.prepare(descriptor); +            /** @type {import('language-transformer').LanguageTransformDescriptor[]} */ +            const descriptors = []; +            const languageSummaries = getLanguageSummaries(); +            for (const {languageTransformsFile} of languageSummaries) { +                if (!languageTransformsFile) { continue; } +                /** @type {import('language-transformer').LanguageTransformDescriptor} */ +                const descriptor = await fetchJson(languageTransformsFile); +                descriptors.push(descriptor); +            } +            void this._translator.prepare(descriptors);              await this._optionsUtil.prepare();              this._defaultAnkiFieldTemplates = (await fetchText('/data/templates/default-anki-field-templates.handlebars')).trim(); diff --git a/ext/js/background/offscreen-proxy.js b/ext/js/background/offscreen-proxy.js index c888fe29..102a9eed 100644 --- a/ext/js/background/offscreen-proxy.js +++ b/ext/js/background/offscreen-proxy.js @@ -184,10 +184,10 @@ export class TranslatorProxy {      }      /** -     * @param {import('language-transformer').LanguageTransformDescriptor} descriptor +     * @param {import('language-transformer').LanguageTransformDescriptor[]} descriptors       */ -    async prepare(descriptor) { -        await this._offscreen.sendMessagePromise({action: 'translatorPrepareOffscreen', params: {descriptor}}); +    async prepare(descriptors) { +        await this._offscreen.sendMessagePromise({action: 'translatorPrepareOffscreen', params: {descriptors}});      }      /** diff --git a/ext/js/background/offscreen.js b/ext/js/background/offscreen.js index 3b8b6a3e..e38d4ba3 100644 --- a/ext/js/background/offscreen.js +++ b/ext/js/background/offscreen.js @@ -111,8 +111,8 @@ export class Offscreen {      }      /** @type {import('offscreen').ApiHandler<'translatorPrepareOffscreen'>} */ -    _prepareTranslatorHandler({descriptor}) { -        this._translator.prepare(descriptor); +    _prepareTranslatorHandler({descriptors}) { +        this._translator.prepare(descriptors);      }      /** @type {import('offscreen').ApiHandler<'findKanjiOffscreen'>} */ diff --git a/ext/js/language/en/english-transforms.json b/ext/js/language/en/english-transforms.json new file mode 100644 index 00000000..623a4ddf --- /dev/null +++ b/ext/js/language/en/english-transforms.json @@ -0,0 +1,39 @@ +{ +    "language": "en", +    "conditions": { +        "v": { +            "name": "Verb", +            "isDictionaryForm": true +        }, +        "n": { +            "name": "Noun", +            "isDictionaryForm": true, +            "subConditions": ["np", "ns"] +        }, +        "np": { +            "name": "Noun plural", +            "isDictionaryForm": true +        }, +        "ns": { +            "name": "Noun singular", +            "isDictionaryForm": true +        }, +        "adj": { +            "name": "Adjective", +            "isDictionaryForm": true +        } +    }, +    "transforms": [ +        { +            "name": "plural", +            "description": "Plural form of a noun", +            "rules": [ +                {"suffixIn": "s", "suffixOut": "", "conditionsIn": ["np"], "conditionsOut": ["ns"]}, +                {"suffixIn": "es", "suffixOut": "", "conditionsIn": ["np"], "conditionsOut": ["ns"]}, +                {"suffixIn": "ies", "suffixOut": "y", "conditionsIn": ["np"], "conditionsOut": ["ns"]}, +                {"suffixIn": "ves", "suffixOut": "fe", "conditionsIn": ["np"], "conditionsOut": ["ns"]}, +                {"suffixIn": "ves", "suffixOut": "f", "conditionsIn": ["np"], "conditionsOut": ["ns"]} +            ] +        } +    ] +} diff --git a/ext/data/language/japanese-transforms.json b/ext/js/language/ja/japanese-transforms.json index 0812f01e..0812f01e 100644 --- a/ext/data/language/japanese-transforms.json +++ b/ext/js/language/ja/japanese-transforms.json diff --git a/ext/js/language/language-descriptors.js b/ext/js/language/language-descriptors.js index d78a96e5..6675f78e 100644 --- a/ext/js/language/language-descriptors.js +++ b/ext/js/language/language-descriptors.js @@ -57,7 +57,8 @@ const languageDescriptors = [          iso: 'en',          name: 'English',          exampleText: 'read', -        textPreprocessors: capitalizationPreprocessors +        textPreprocessors: capitalizationPreprocessors, +        languageTransformsFile: '/js/language/en/english-transforms.json'      },      {          iso: 'es', @@ -122,7 +123,8 @@ const languageDescriptors = [              convertAlphabeticCharacters,              convertHiraganaToKatakana,              collapseEmphaticSequences -        } +        }, +        languageTransformsFile: '/js/language/ja/japanese-transforms.json'      },      {          iso: 'km', diff --git a/ext/js/language/language-transformer.js b/ext/js/language/language-transformer.js index 08a2166f..8a82e4d2 100644 --- a/ext/js/language/language-transformer.js +++ b/ext/js/language/language-transformer.js @@ -77,11 +77,11 @@ export class LanguageTransformer {              this._transforms.push(transform);          } -        for (const [type, condition] of conditionEntries) { +        for (const [type, {isDictionaryForm}] of conditionEntries) {              const flags = conditionFlagsMap.get(type);              if (typeof flags === 'undefined') { continue; } // This case should never happen              this._conditionTypeToConditionFlagsMap.set(type, flags); -            if (condition.isDictionaryForm) { +            if (isDictionaryForm) {                  this._partOfSpeechToConditionFlagsMap.set(type, flags);              }          } @@ -116,7 +116,7 @@ export class LanguageTransformer {       * @returns {import('language-transformer-internal').TransformedText[]}       */      transform(sourceText) { -        const results = [this._createTransformedText(sourceText, 0, [])]; +        const results = [LanguageTransformer.createTransformedText(sourceText, 0, [])];          for (let i = 0; i < results.length; ++i) {              const {text, conditions, trace} = results[i];              for (const transform of this._transforms) { @@ -128,7 +128,7 @@ export class LanguageTransformer {                      if (!LanguageTransformer.conditionsMatch(conditions, rule.conditionsIn)) { continue; }                      const {suffixIn, suffixOut} = rule;                      if (!text.endsWith(suffixIn) || (text.length - suffixIn.length + suffixOut.length) <= 0) { continue; } -                    results.push(this._createTransformedText( +                    results.push(LanguageTransformer.createTransformedText(                          text.substring(0, text.length - suffixIn.length) + suffixOut,                          rule.conditionsOut,                          this._extendTrace(trace, {transform: name, ruleIndex: j}) @@ -140,6 +140,27 @@ export class LanguageTransformer {      }      /** +     * @param {string} text +     * @param {number} conditions +     * @param {import('language-transformer-internal').Trace} trace +     * @returns {import('language-transformer-internal').TransformedText} +     */ +    static createTransformedText(text, conditions, trace) { +        return {text, conditions, trace}; +    } + +    /** +     * If `currentConditions` is `0`, then `nextConditions` is ignored and `true` is returned. +     * Otherwise, there must be at least one shared condition between `currentConditions` and `nextConditions`. +     * @param {number} currentConditions +     * @param {number} nextConditions +     * @returns {boolean} +     */ +    static conditionsMatch(currentConditions, nextConditions) { +        return currentConditions === 0 || (currentConditions & nextConditions) !== 0; +    } + +    /**       * @param {import('language-transformer').ConditionMapEntries} conditions       * @param {number} nextFlagIndex       * @returns {{conditionFlagsMap: Map<string, number>, nextFlagIndex: number}} @@ -218,16 +239,6 @@ export class LanguageTransformer {      }      /** -     * @param {string} text -     * @param {number} conditions -     * @param {import('language-transformer-internal').Trace} trace -     * @returns {import('language-transformer-internal').TransformedText} -     */ -    _createTransformedText(text, conditions, trace) { -        return {text, conditions, trace}; -    } - -    /**       * @param {import('language-transformer-internal').Trace} trace       * @param {import('language-transformer-internal').TraceFrame} newFrame       * @returns {import('language-transformer-internal').Trace} @@ -239,15 +250,4 @@ export class LanguageTransformer {          }          return newTrace;      } - -    /** -     * If `currentConditions` is `0`, then `nextConditions` is ignored and `true` is returned. -     * Otherwise, there must be at least one shared condition between `currentConditions` and `nextConditions`. -     * @param {number} currentConditions -     * @param {number} nextConditions -     * @returns {boolean} -     */ -    static conditionsMatch(currentConditions, nextConditions) { -        return currentConditions === 0 || (currentConditions & nextConditions) !== 0; -    }  } diff --git a/ext/js/language/languages.js b/ext/js/language/languages.js index fd58477d..63bbc791 100755 --- a/ext/js/language/languages.js +++ b/ext/js/language/languages.js @@ -22,8 +22,8 @@ import {languageDescriptorMap} from './language-descriptors.js';   */  export function getLanguageSummaries() {      const results = []; -    for (const {name, iso, exampleText} of languageDescriptorMap.values()) { -        results.push({name, iso, exampleText}); +    for (const {name, iso, exampleText, languageTransformsFile} of languageDescriptorMap.values()) { +        results.push({name, iso, exampleText, languageTransformsFile});      }      return results;  } diff --git a/ext/js/language/multi-language-transformer.js b/ext/js/language/multi-language-transformer.js new file mode 100644 index 00000000..692e1718 --- /dev/null +++ b/ext/js/language/multi-language-transformer.js @@ -0,0 +1,77 @@ +/* + * Copyright (C) 2024  Yomitan Authors + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program.  If not, see <https://www.gnu.org/licenses/>. + */ + +import {LanguageTransformer} from './language-transformer.js'; + +export class MultiLanguageTransformer { +    constructor() { +        /** @type {Map<string, LanguageTransformer>} */ +        this._languageTransformers = new Map(); +    } + +    /** +     * @param {import('language-transformer').LanguageTransformDescriptor[]} languageTransformDescriptors +     */ +    prepare(languageTransformDescriptors) { +        for (const descriptor of languageTransformDescriptors) { +            const languageTransformer = new LanguageTransformer(); +            languageTransformer.addDescriptor(descriptor); +            this._languageTransformers.set(descriptor.language, languageTransformer); +        } +    } + +    /** +     * @param {string} language +     * @param {string[]} partsOfSpeech +     * @returns {number} +     */ +    getConditionFlagsFromPartsOfSpeech(language, partsOfSpeech) { +        const languageTransformer = this._languageTransformers.get(language); +        return typeof languageTransformer !== 'undefined' ? languageTransformer.getConditionFlagsFromPartsOfSpeech(partsOfSpeech) : 0; +    } + +    /** +     * @param {string} language +     * @param {string[]} conditionTypes +     * @returns {number} +     */ +    getConditionFlagsFromConditionTypes(language, conditionTypes) { +        const languageTransformer = this._languageTransformers.get(language); +        return typeof languageTransformer !== 'undefined' ? languageTransformer.getConditionFlagsFromConditionTypes(conditionTypes) : 0; +    } + +    /** +     * @param {string} language +     * @param {string} conditionType +     * @returns {number} +     */ +    getConditionFlagsFromConditionType(language, conditionType) { +        const languageTransformer = this._languageTransformers.get(language); +        return typeof languageTransformer !== 'undefined' ? languageTransformer.getConditionFlagsFromConditionType(conditionType) : 0; +    } + +    /** +     * @param {string} language +     * @param {string} sourceText +     * @returns {import('language-transformer-internal').TransformedText[]} +     */ +    transform(language, sourceText) { +        const languageTransformer = this._languageTransformers.get(language); +        if (typeof languageTransformer === 'undefined') { return [LanguageTransformer.createTransformedText(sourceText, 0, [])]; } +        return languageTransformer.transform(sourceText); +    } +} diff --git a/ext/js/language/translator.js b/ext/js/language/translator.js index b10af226..04319e60 100644 --- a/ext/js/language/translator.js +++ b/ext/js/language/translator.js @@ -21,6 +21,7 @@ import {TextSourceMap} from '../general/text-source-map.js';  import {isCodePointJapanese} from './ja/japanese.js';  import {LanguageTransformer} from './language-transformer.js';  import {getAllLanguageTextPreprocessors} from './languages.js'; +import {MultiLanguageTransformer} from './multi-language-transformer.js';  /**   * Class which finds term and kanji dictionary entries for text. @@ -32,8 +33,8 @@ export class Translator {      constructor(database) {          /** @type {import('../dictionary/dictionary-database.js').DictionaryDatabase} */          this._database = database; -        /** @type {LanguageTransformer} */ -        this._languageTransformer = new LanguageTransformer(); +        /** @type {MultiLanguageTransformer} */ +        this._multiLanguageTransformer = new MultiLanguageTransformer();          /** @type {import('translator').DictionaryTagCache} */          this._tagCache = new Map();          /** @type {Intl.Collator} */ @@ -46,10 +47,10 @@ export class Translator {      /**       * Initializes the instance for use. The public API should not be used until this function has been called. -     * @param {import('language-transformer').LanguageTransformDescriptor} descriptor +     * @param {import('language-transformer').LanguageTransformDescriptor[]} languageTransformDescriptors       */ -    prepare(descriptor) { -        this._languageTransformer.addDescriptor(descriptor); +    prepare(languageTransformDescriptors) { +        this._multiLanguageTransformer.prepare(languageTransformDescriptors);          for (const {iso, textPreprocessors} of getAllLanguageTextPreprocessors()) {              /** @type {Map<string, import('language').TextPreprocessorOptions<unknown>>} */              const optionSpace = new Map(); @@ -316,11 +317,11 @@ export class Translator {          );          if (deinflections.length === 0) { return []; } -        const {matchType} = options; +        const {matchType, language} = options; -        await this._addEntriesToDeinflections(deinflections, enabledDictionaryMap, matchType); +        await this._addEntriesToDeinflections(language, deinflections, enabledDictionaryMap, matchType); -        const dictionaryDeinflections = await this._getDictionaryDeinflections(deinflections, enabledDictionaryMap, matchType); +        const dictionaryDeinflections = await this._getDictionaryDeinflections(language, deinflections, enabledDictionaryMap, matchType);          deinflections.push(...dictionaryDeinflections);          for (const deinflection of deinflections) { @@ -335,12 +336,13 @@ export class Translator {      }      /** +     * @param {string} language       * @param {import('translation-internal').DatabaseDeinflection[]} deinflections       * @param {Map<string, import('translation').FindTermDictionary>} enabledDictionaryMap       * @param {import('dictionary').TermSourceMatchType} matchType       * @returns {Promise<import('translation-internal').DatabaseDeinflection[]>}       */ -    async _getDictionaryDeinflections(deinflections, enabledDictionaryMap, matchType) { +    async _getDictionaryDeinflections(language, deinflections, enabledDictionaryMap, matchType) {          /** @type {import('translation-internal').DatabaseDeinflection[]} */          const dictionaryDeinflections = [];          for (const deinflection of deinflections) { @@ -369,23 +371,24 @@ export class Translator {              }          } -        await this._addEntriesToDeinflections(dictionaryDeinflections, enabledDictionaryMap, matchType); +        await this._addEntriesToDeinflections(language, dictionaryDeinflections, enabledDictionaryMap, matchType);          return dictionaryDeinflections;      }      /** +     * @param {string} language       * @param {import('translation-internal').DatabaseDeinflection[]} deinflections       * @param {Map<string, import('translation').FindTermDictionary>} enabledDictionaryMap       * @param {import('dictionary').TermSourceMatchType} matchType       */ -    async _addEntriesToDeinflections(deinflections, enabledDictionaryMap, matchType) { +    async _addEntriesToDeinflections(language, deinflections, enabledDictionaryMap, matchType) {          const uniqueDeinflectionsMap = this._groupDeinflectionsByTerm(deinflections);          const uniqueDeinflectionArrays = [...uniqueDeinflectionsMap.values()];          const uniqueDeinflectionTerms = [...uniqueDeinflectionsMap.keys()];          const databaseEntries = await this._database.findTermsBulk(uniqueDeinflectionTerms, enabledDictionaryMap, matchType); -        this._matchEntriesToDeinflections(databaseEntries, uniqueDeinflectionArrays, enabledDictionaryMap); +        this._matchEntriesToDeinflections(language, databaseEntries, uniqueDeinflectionArrays, enabledDictionaryMap);      }      /** @@ -407,16 +410,17 @@ export class Translator {      }      /** +     * @param {string} language       * @param {import('dictionary-database').TermEntry[]} databaseEntries       * @param {import('translation-internal').DatabaseDeinflection[][]} uniqueDeinflectionArrays       * @param {Map<string, import('translation').FindTermDictionary>} enabledDictionaryMap       */ -    _matchEntriesToDeinflections(databaseEntries, uniqueDeinflectionArrays, enabledDictionaryMap) { +    _matchEntriesToDeinflections(language, databaseEntries, uniqueDeinflectionArrays, enabledDictionaryMap) {          for (const databaseEntry of databaseEntries) {              const entryDictionary = /** @type {import('translation').FindTermDictionary} */ (enabledDictionaryMap.get(databaseEntry.dictionary));              const {partsOfSpeechFilter} = entryDictionary; -            const definitionConditions = this._languageTransformer.getConditionFlagsFromPartsOfSpeech(databaseEntry.rules); +            const definitionConditions = this._multiLanguageTransformer.getConditionFlagsFromPartsOfSpeech(language, databaseEntry.rules);              for (const deinflection of uniqueDeinflectionArrays[databaseEntry.index]) {                  if (!partsOfSpeechFilter || LanguageTransformer.conditionsMatch(deinflection.conditions, definitionConditions)) {                      deinflection.databaseEntries.push(databaseEntry); @@ -475,7 +479,7 @@ export class Translator {                  if (used.has(source)) { break; }                  used.add(source);                  const rawSource = sourceMap.source.substring(0, sourceMap.getSourceLength(i)); -                for (const {text: transformedText, conditions, trace} of this._languageTransformer.transform(source)) { +                for (const {text: transformedText, conditions, trace} of this._multiLanguageTransformer.transform(language, source)) {                      /** @type {import('dictionary').InflectionRuleChainCandidate} */                      const inflectionRuleChainCandidate = {                          source: 'algorithm', |