diff options
| author | StefanVukovic99 <stefanvukovic44@gmail.com> | 2024-02-17 02:45:24 +0100 | 
|---|---|---|
| committer | GitHub <noreply@github.com> | 2024-02-17 01:45:24 +0000 | 
| commit | 4aaa9f15d97668203741c1731f15e710ae8b8294 (patch) | |
| tree | d1885f7fbd7d1510a71176597169d6847ae26572 | |
| parent | 4e77741d22778bd09b772fc53f1cbd64107e3d24 (diff) | |
add language select, abstract text transformations (#584)
* Copy functions from JapaneseUtil
* Remove JapaneseUtil
* Update usages of JapaneseUtil functions
* part1
* frotend done?
* fix tests
* offscreen and type complications
* add tests
* start fixing tests
* keep fixing tests
* fix tests
* Copy functions from JapaneseUtil
* Remove JapaneseUtil
* Update usages of JapaneseUtil functions
* delete pt
* renames
* add tests
* kebab-case filenames
* lint
* minor fixes
* merge
* fixes
* fix part of comments
* fix more comments
* delete unused types
* comment
* comment
* do backend
* other files
* move fetch utils to own file
* remove extra line
* add extra line
* remove unnecessary export
* simplify folder structure
* remove redundant async
* fix param type in api
* fix language index
* undo changes to cssStyleApplier
* undo changes to utilities.js
* undo changes to utilities.js
* simplify language util
* lint
* undo phantom changes to anki integration
* require textTransformations options
* explicit locale in localeCompare
* punctuate notes
* prefer early exit
* rename LanguageOptionsObjectMap
* rename to textPreprocessor
* tuple with names instead of boolean array
* safe data setting
* optional chaining
* simplify LanguageOptions
* encapsulate languages
* delete language util
* nullable language in text preprocessors controller
* rename transform to process
* remove settings
* make translation advanced again
* remove unused getTextTransformations api call
* comments
* change language types
* RIP flags
* comments
* fix tests
* lint
* Text preprocessor type changes (#10)
* Add types
* Update types
* Simplify type check
* Refactor typing and structuring of language definitions
* lint
* update translator benchmark
* undo markdown changes
* undo markdown changes
* undo markdown changes
* more merge
* simplify language controller
---------
Co-authored-by: toasted-nutbread <toasted-nutbread@users.noreply.github.com>
Co-authored-by: Darius Jahandarie <djahandarie@gmail.com>
32 files changed, 1648 insertions, 387 deletions
| diff --git a/.eslintrc.json b/.eslintrc.json index 361e5f24..a5f0f85e 100644 --- a/.eslintrc.json +++ b/.eslintrc.json @@ -637,9 +637,13 @@                  "ext/js/general/object-property-accessor.js",                  "ext/js/general/regex-util.js",                  "ext/js/general/text-source-map.js", +                "ext/js/language/en/language-english.js",                  "ext/js/language/ja/japanese-wanakana.js",                  "ext/js/language/ja/japanese.js", +                "ext/js/language/ja/language-japanese.js",                  "ext/js/language/language-transformer.js", +                "ext/js/language/languages.js", +                "ext/js/language/text-preprocessors.js",                  "ext/js/language/translator.js",                  "ext/js/media/audio-downloader.js",                  "ext/js/media/media-util.js", diff --git a/benches/translator.bench.js b/benches/translator.bench.js index 1231c31c..8b9b5118 100644 --- a/benches/translator.bench.js +++ b/benches/translator.bench.js @@ -20,8 +20,8 @@ import {fileURLToPath} from 'node:url';  import path from 'path';  import {bench, describe} from 'vitest';  import {parseJson} from '../dev/json.js'; -import {createFindKanjiOptions, createFindTermsOptions} from '../test/utilities/translator.js';  import {createTranslatorContext} from '../test/fixtures/translator-test.js'; +import {createFindKanjiOptions, createFindTermsOptions} from '../test/utilities/translator.js';  const dirname = path.dirname(fileURLToPath(import.meta.url));  const dictionaryName = 'Test Dictionary 2'; @@ -33,10 +33,9 @@ describe('Translator', () => {      const {optionsPresets, tests} = parseJson(readFileSync(testInputsFilePath, {encoding: 'utf8'}));      const findKanjiTests = tests.filter((data) => data.options === 'kanji'); -    const findTermTests = tests.filter((data) => data.options === 'default'); -    const findTermWithTextTransformationsTests = tests.filter((data) => data.options !== 'kanji' && data.options !== 'default'); +    const findTermTests = tests.filter((data) => data.options !== 'kanji'); -    bench(`Translator.prototype.findTerms - no text transformations  (n=${findTermTests.length})`, async () => { +    bench(`Translator.prototype.findTerms - (n=${findTermTests.length})`, async () => {          for (const data of /** @type {import('test/translator').TestInputFindTerm[]} */ (findTermTests)) {              const {mode, text} = data;              const options = createFindTermsOptions(dictionaryName, optionsPresets, data.options); @@ -44,14 +43,6 @@ describe('Translator', () => {          }      }); -    bench(`Translator.prototype.findTerms - text transformations  (n=${findTermWithTextTransformationsTests.length})`, async () => { -        for (const data of /** @type {import('test/translator').TestInputFindTerm[]} */ (findTermWithTextTransformationsTests)) { -            const {mode, text} = data; -            const options = createFindTermsOptions(dictionaryName, optionsPresets, data.options); -            await translator.findTerms(mode, text, options); -        } -    }); -      bench(`Translator.prototype.findKanji - (n=${findKanjiTests.length})`, async () => {          for (const data of /** @type {import('test/translator').TestInputFindKanji[]} */ (findKanjiTests)) {              const {text} = data; diff --git a/dev/jsconfig.json b/dev/jsconfig.json index d9465108..6a5fb13b 100644 --- a/dev/jsconfig.json +++ b/dev/jsconfig.json @@ -28,6 +28,9 @@              "error": ["../types/ext/error"],              "event-listener-collection": ["../types/ext/event-listener-collection"],              "japanese-util": ["../types/ext/japanese-util"], +            "language": ["../types/ext/language"], +            "language-english": ["../types/ext/language-english"], +            "language-japanese": ["../types/ext/language-japanese"],              "ext/json-schema": ["../types/ext/json-schema"],              "language-transformer": ["../types/ext/language-transformer"],              "language-transformer-internal": ["../types/ext/language-transformer-internal"], diff --git a/docs/anki-integration.md b/docs/anki-integration.md index 2bd9fad9..9bd5bd94 100644 --- a/docs/anki-integration.md +++ b/docs/anki-integration.md @@ -23,71 +23,71 @@ Flashcard fields can be configured with the following steps:      #### Markers for Term Cards -    | Marker                     | Description                                                                                                              | -    | -------------------------- | ------------------------------------------------------------------------------------------------------------------------ | -    | `{audio}`                  | Audio sample of a native speaker's pronunciation in MP3 format (if available).                                           | -    | `{clipboard-image}`        | An image which is stored in the system clipboard, if present.                                                            | -    | `{clipboard-text}`         | Text which is stored in the system clipboard, if present.                                                                | -    | `{cloze-body}`             | Raw, inflected term as it appeared before being reduced to dictionary form by Yomitan.                                   | -    | `{cloze-body-kana}`        | Kana reading for `{cloze-body}`.                                                                                         | -    | `{cloze-prefix}`           | Fragment of the containing `{sentence}` starting at the beginning of `{sentence}` until the beginning of `{cloze-body}`. | -    | `{cloze-suffix}`           | Fragment of the containing `{sentence}` starting at the end of `{cloze-body}` until the end of `{sentence}`.             | -    | `{conjugation}`            | Conjugation path from the raw inflected term to the source term.                                                         | -    | `{dictionary}`             | Name of the dictionary from which the card is being created (unavailable in _grouped_ mode).                             | -    | `{document-title}`         | Title of the web page that the term appeared in.                                                                         | -    | `{expression}`             | Term expressed as kanji (will be displayed in kana if kanji is not available).                                           | -    | `{frequencies}`            | Frequency information for the term.                                                                                      | -    | `{frequency-harmonic-rank}` | The harmonic mean of frequency data for the current term. Defaults to rank 9999999 when frequency data is not found, indicating extremely low rank-based term usage. | +    | Marker                            | Description                                                                                                                                                            | +    | --------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +    | `{audio}`                         | Audio sample of a native speaker's pronunciation in MP3 format (if available).                                                                                         | +    | `{clipboard-image}`               | An image which is stored in the system clipboard, if present.                                                                                                          | +    | `{clipboard-text}`                | Text which is stored in the system clipboard, if present.                                                                                                              | +    | `{cloze-body}`                    | Raw, inflected term as it appeared before being reduced to dictionary form by Yomitan.                                                                                 | +    | `{cloze-body-kana}`               | Kana reading for `{cloze-body}`.                                                                                                                                       | +    | `{cloze-prefix}`                  | Fragment of the containing `{sentence}` starting at the beginning of `{sentence}` until the beginning of `{cloze-body}`.                                               | +    | `{cloze-suffix}`                  | Fragment of the containing `{sentence}` starting at the end of `{cloze-body}` until the end of `{sentence}`.                                                           | +    | `{conjugation}`                   | Conjugation path from the raw inflected term to the source term.                                                                                                       | +    | `{dictionary}`                    | Name of the dictionary from which the card is being created (unavailable in _grouped_ mode).                                                                           | +    | `{document-title}`                | Title of the web page that the term appeared in.                                                                                                                       | +    | `{expression}`                    | Term expressed as kanji (will be displayed in kana if kanji is not available).                                                                                         | +    | `{frequencies}`                   | Frequency information for the term.                                                                                                                                    | +    | `{frequency-harmonic-rank}`       | The harmonic mean of frequency data for the current term. Defaults to rank 9999999 when frequency data is not found, indicating extremely low rank-based term usage.   |      | `{frequency-harmonic-occurrence}` | The harmonic mean of frequency data for the current term. Defaults to 0 occurrences when frequency data is not found, the lowest possible occurrence-based term usage. | -    | `{frequency-average-rank}` | The average of frequency data for the current term. Defaults to rank 9999999 when frequency data is not found, indicating extremely low rank-based term usage. | -    | `{frequency-average-occurrence}` | The average of frequency data for the current term. Defaults to 0 occurrences when frequency data is not found, the lowest possible occurrence-based term usage. | -    | `{furigana}`               | Term expressed as kanji with furigana displayed above it (e.g. <ruby>日本語<rt>にほんご</rt></ruby>).                    | -    | `{furigana-plain}`         | Term expressed as kanji with furigana displayed next to it in brackets (e.g. 日本語[にほんご]).                          | -    | `{glossary}`               | List of definitions for the term (output format depends on whether running in _grouped_ mode).                           | -    | `{glossary-brief}`         | List of definitions for the term in a more compact format.                                                               | -    | `{glossary-no-dictionary}` | List of definitions for the term, except the dictionary tag is omitted.                                                  | -    | `{part-of-speech}`         | Part of speech information for the term.                                                                                 | -    | `{phonetic-transcriptions}`| List of phonetic transcriptions for the term.                                                                            | -    | `{pitch-accents}`          | List of pitch accent downstep notations for the term.                                                                    | -    | `{pitch-accent-graphs}`    | List of pitch accent graphs for the term.                                                                                | -    | `{pitch-accent-positions}` | List of accent downstep positions for the term as a number.                                                              | -    | `{pitch-accent-categories}`| List of pitch accent categories for the term (e.g. heiban, kifuku, atamadaka, odaka, nakadaka).                          | -    | `{reading}`                | Kana reading for the term (empty for terms where the expression is the reading).                                         | -    | `{screenshot}`             | Screenshot of the web page taken at the time the term was added.                                                         | -    | `{search-query}`           | The full search query shown on the search page.                                                                          | -    | `{selection-text}`         | The selected text on the search page or popup.                                                                           | -    | `{sentence}`               | Sentence, quote, or phrase that the term appears in from the source content.                                             | -    | `{sentence-furigana}`      | Sentence, quote, or phrase that the term appears in from the source content, with furigana added.                        | -    | `{tags}`                   | Grammar and usage tags providing information about the term (unavailable in _grouped_ mode).                             | -    | `{url}`                    | Address of the web page in which the term appeared in.                                                                   | +    | `{frequency-average-rank}`        | The average of frequency data for the current term. Defaults to rank 9999999 when frequency data is not found, indicating extremely low rank-based term usage.         | +    | `{frequency-average-occurrence}`  | The average of frequency data for the current term. Defaults to 0 occurrences when frequency data is not found, the lowest possible occurrence-based term usage.       | +    | `{furigana}`                      | Term expressed as kanji with furigana displayed above it (e.g. <ruby>日本語<rt>にほんご</rt></ruby>).                                                                  | +    | `{furigana-plain}`                | Term expressed as kanji with furigana displayed next to it in brackets (e.g. 日本語[にほんご]).                                                                        | +    | `{glossary}`                      | List of definitions for the term (output format depends on whether running in _grouped_ mode).                                                                         | +    | `{glossary-brief}`                | List of definitions for the term in a more compact format.                                                                                                             | +    | `{glossary-no-dictionary}`        | List of definitions for the term, except the dictionary tag is omitted.                                                                                                | +    | `{part-of-speech}`                | Part of speech information for the term.                                                                                                                               | +    | `{phonetic-transcriptions}`       | List of phonetic transcriptions for the term.                                                                                                                          | +    | `{pitch-accents}`                 | List of pitch accent downstep notations for the term.                                                                                                                  | +    | `{pitch-accent-graphs}`           | List of pitch accent graphs for the term.                                                                                                                              | +    | `{pitch-accent-positions}`        | List of accent downstep positions for the term as a number.                                                                                                            | +    | `{pitch-accent-categories}`       | List of pitch accent categories for the term (e.g. heiban, kifuku, atamadaka, odaka, nakadaka).                                                                        | +    | `{reading}`                       | Kana reading for the term (empty for terms where the expression is the reading).                                                                                       | +    | `{screenshot}`                    | Screenshot of the web page taken at the time the term was added.                                                                                                       | +    | `{search-query}`                  | The full search query shown on the search page.                                                                                                                        | +    | `{selection-text}`                | The selected text on the search page or popup.                                                                                                                         | +    | `{sentence}`                      | Sentence, quote, or phrase that the term appears in from the source content.                                                                                           | +    | `{sentence-furigana}`             | Sentence, quote, or phrase that the term appears in from the source content, with furigana added.                                                                      | +    | `{tags}`                          | Grammar and usage tags providing information about the term (unavailable in _grouped_ mode).                                                                           | +    | `{url}`                           | Address of the web page in which the term appeared in.                                                                                                                 |      #### Markers for Kanji Cards -    | Marker                | Description                                                                                                              | -    | --------------------- | ------------------------------------------------------------------------------------------------------------------------ | -    | `{character}`         | Unicode glyph representing the current kanji.                                                                            | -    | `{clipboard-image}`   | An image which is stored in the system clipboard, if present.                                                            | -    | `{clipboard-text}`    | Text which is stored in the system clipboard, if present.                                                                | -    | `{cloze-body}`        | Raw, inflected parent term as it appeared before being reduced to dictionary form by Yomitan.                            | -    | `{cloze-prefix}`      | Fragment of the containing `{sentence}` starting at the beginning of `{sentence}` until the beginning of `{cloze-body}`. | -    | `{cloze-suffix}`      | Fragment of the containing `{sentence}` starting at the end of `{cloze-body}` until the end of `{sentence}`.             | -    | `{dictionary}`        | Name of the dictionary from which the card is being created.                                                             | -    | `{document-title}`    | Title of the web page that the kanji appeared in.                                                                        | -    | `{frequencies}`       | Frequency information for the kanji.                                                                                     | -    | `{frequency-harmonic-rank}` | The harmonic mean of frequency data for the current kanji. Defaults to rank 9999999 when frequency data is not found, indicating extremely low rank-based kanji usage. | +    | Marker                            | Description                                                                                                                                                              | +    | --------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +    | `{character}`                     | Unicode glyph representing the current kanji.                                                                                                                            | +    | `{clipboard-image}`               | An image which is stored in the system clipboard, if present.                                                                                                            | +    | `{clipboard-text}`                | Text which is stored in the system clipboard, if present.                                                                                                                | +    | `{cloze-body}`                    | Raw, inflected parent term as it appeared before being reduced to dictionary form by Yomitan.                                                                            | +    | `{cloze-prefix}`                  | Fragment of the containing `{sentence}` starting at the beginning of `{sentence}` until the beginning of `{cloze-body}`.                                                 | +    | `{cloze-suffix}`                  | Fragment of the containing `{sentence}` starting at the end of `{cloze-body}` until the end of `{sentence}`.                                                             | +    | `{dictionary}`                    | Name of the dictionary from which the card is being created.                                                                                                             | +    | `{document-title}`                | Title of the web page that the kanji appeared in.                                                                                                                        | +    | `{frequencies}`                   | Frequency information for the kanji.                                                                                                                                     | +    | `{frequency-harmonic-rank}`       | The harmonic mean of frequency data for the current kanji. Defaults to rank 9999999 when frequency data is not found, indicating extremely low rank-based kanji usage.   |      | `{frequency-harmonic-occurrence}` | The harmonic mean of frequency data for the current kanji. Defaults to 0 occurrences when frequency data is not found, the lowest possible occurrence-based kanji usage. | -    | `{frequency-average-rank}` | The average of frequency data for the current kanji. Defaults to rank 9999999 when frequency data is not found, indicating extremely low rank-based kanji usage. | -    | `{frequency-average-occurrence}` | The average of frequency data for the current kanji. Defaults to 0 occurrences when frequency data is not found, the lowest possible occurrence-based kanji usage. | -    | `{glossary}`          | List of definitions for the kanji.                                                                                       | -    | `{kunyomi}`           | Kunyomi (Japanese reading) for the kanji expressed as katakana.                                                          | -    | `{onyomi}`            | Onyomi (Chinese reading) for the kanji expressed as hiragana.                                                            | -    | `{screenshot}`        | Screenshot of the web page taken at the time the kanji was added.                                                        | -    | `{search-query}`      | The full search query shown on the search page.                                                                          | -    | `{selection-text}`    | The selected text on the search page or popup.                                                                           | -    | `{sentence}`          | Sentence, quote, or phrase that the character appears in from the source content.                                        | -    | `{sentence-furigana}` | Sentence, quote, or phrase that the character appears in from the source content, with furigana added.                   | -    | `{stroke-count}`      | Number of strokes that the kanji character has.                                                                          | -    | `{url}`               | Address of the web page in which the kanji appeared in.                                                                  | +    | `{frequency-average-rank}`        | The average of frequency data for the current kanji. Defaults to rank 9999999 when frequency data is not found, indicating extremely low rank-based kanji usage.         | +    | `{frequency-average-occurrence}`  | The average of frequency data for the current kanji. Defaults to 0 occurrences when frequency data is not found, the lowest possible occurrence-based kanji usage.       | +    | `{glossary}`                      | List of definitions for the kanji.                                                                                                                                       | +    | `{kunyomi}`                       | Kunyomi (Japanese reading) for the kanji expressed as katakana.                                                                                                          | +    | `{onyomi}`                        | Onyomi (Chinese reading) for the kanji expressed as hiragana.                                                                                                            | +    | `{screenshot}`                    | Screenshot of the web page taken at the time the kanji was added.                                                                                                        | +    | `{search-query}`                  | The full search query shown on the search page.                                                                                                                          | +    | `{selection-text}`                | The selected text on the search page or popup.                                                                                                                           | +    | `{sentence}`                      | Sentence, quote, or phrase that the character appears in from the source content.                                                                                        | +    | `{sentence-furigana}`             | Sentence, quote, or phrase that the character appears in from the source content, with furigana added.                                                                   | +    | `{stroke-count}`                  | Number of strokes that the kanji character has.                                                                                                                          | +    | `{url}`                           | Address of the web page in which the kanji appeared in.                                                                                                                  |  When creating your model for Yomitan, _make sure that you pick a unique field to be first_; fields that will  contain `{expression}` or `{character}` are ideal candidates for this. Anki does not allow duplicate flashcards to be diff --git a/ext/data/schemas/options-schema.json b/ext/data/schemas/options-schema.json index ea7caf0f..d86eedf7 100644 --- a/ext/data/schemas/options-schema.json +++ b/ext/data/schemas/options-schema.json @@ -81,6 +81,7 @@                                  "type": "object",                                  "required": [                                      "enable", +                                    "language",                                      "resultOutputMode",                                      "debugInfo",                                      "maxResults", @@ -126,6 +127,10 @@                                          "type": "boolean",                                          "default": true                                      }, +                                    "language": { +                                        "type": "string", +                                        "default": "ja" +                                    },                                      "resultOutputMode": {                                          "type": "string",                                          "enum": ["group", "merge", "split"], @@ -722,12 +727,6 @@                              "translation": {                                  "type": "object",                                  "required": [ -                                    "convertHalfWidthCharacters", -                                    "convertNumericCharacters", -                                    "convertAlphabeticCharacters", -                                    "convertHiraganaToKatakana", -                                    "convertKatakanaToHiragana", -                                    "collapseEmphaticSequences",                                      "textReplacements",                                      "searchResolution"                                  ], @@ -740,36 +739,6 @@                                          ],                                          "default": "letter"                                      }, -                                    "convertHalfWidthCharacters": { -                                        "type": "string", -                                        "enum": ["false", "true", "variant"], -                                        "default": "false" -                                    }, -                                    "convertNumericCharacters": { -                                        "type": "string", -                                        "enum": ["false", "true", "variant"], -                                        "default": "false" -                                    }, -                                    "convertAlphabeticCharacters": { -                                        "type": "string", -                                        "enum": ["false", "true", "variant"], -                                        "default": "false" -                                    }, -                                    "convertHiraganaToKatakana": { -                                        "type": "string", -                                        "enum": ["false", "true", "variant"], -                                        "default": "false" -                                    }, -                                    "convertKatakanaToHiragana": { -                                        "type": "string", -                                        "enum": ["false", "true", "variant"], -                                        "default": "variant" -                                    }, -                                    "collapseEmphaticSequences": { -                                        "type": "string", -                                        "enum": ["false", "true", "full"], -                                        "default": "false" -                                    },                                      "textReplacements": {                                          "type": "object",                                          "required": [ diff --git a/ext/js/background/backend.js b/ext/js/background/backend.js index e246f0bb..31191612 100644 --- a/ext/js/background/backend.js +++ b/ext/js/background/backend.js @@ -34,6 +34,7 @@ import {DictionaryDatabase} from '../dictionary/dictionary-database.js';  import {Environment} from '../extension/environment.js';  import {ObjectPropertyAccessor} from '../general/object-property-accessor.js';  import {distributeFuriganaInflected, isCodePointJapanese, isStringPartiallyJapanese, convertKatakanaToHiragana as jpConvertKatakanaToHiragana} from '../language/ja/japanese.js'; +import {getLanguageSummaries} from '../language/languages.js';  import {Translator} from '../language/translator.js';  import {AudioDownloader} from '../media/audio-downloader.js';  import {getFileExtensionFromAudioMediaType, getFileExtensionFromImageMediaType} from '../media/media-util.js'; @@ -183,7 +184,8 @@ export class Backend {              ['textHasJapaneseCharacters',    this._onApiTextHasJapaneseCharacters.bind(this)],              ['getTermFrequencies',           this._onApiGetTermFrequencies.bind(this)],              ['findAnkiNotes',                this._onApiFindAnkiNotes.bind(this)], -            ['openCrossFramePort',           this._onApiOpenCrossFramePort.bind(this)] +            ['openCrossFramePort',           this._onApiOpenCrossFramePort.bind(this)], +            ['getLanguageSummaries',         this._onApiGetLanguageSummaries.bind(this)]          ]);          /* eslint-enable @stylistic/no-multi-spaces */ @@ -906,6 +908,11 @@ export class Backend {          return {targetTabId, targetFrameId};      } +    /** @type {import('api').ApiHandler<'getLanguageSummaries'>} */ +    _onApiGetLanguageSummaries() { +        return getLanguageSummaries(); +    } +      // Command handlers      /** @@ -2361,15 +2368,9 @@ export class Backend {          if (typeof deinflect !== 'boolean') { deinflect = true; }          const enabledDictionaryMap = this._getTranslatorEnabledDictionaryMap(options);          const { -            general: {mainDictionary, sortFrequencyDictionary, sortFrequencyDictionaryOrder}, +            general: {mainDictionary, sortFrequencyDictionary, sortFrequencyDictionaryOrder, language},              scanning: {alphanumeric},              translation: { -                convertHalfWidthCharacters, -                convertNumericCharacters, -                convertAlphabeticCharacters, -                convertHiraganaToKatakana, -                convertKatakanaToHiragana, -                collapseEmphaticSequences,                  textReplacements: textReplacementsOptions,                  searchResolution              } @@ -2394,16 +2395,11 @@ export class Backend {              sortFrequencyDictionary,              sortFrequencyDictionaryOrder,              removeNonJapaneseCharacters: !alphanumeric, -            convertHalfWidthCharacters, -            convertNumericCharacters, -            convertAlphabeticCharacters, -            convertHiraganaToKatakana, -            convertKatakanaToHiragana, -            collapseEmphaticSequences,              searchResolution,              textReplacements,              enabledDictionaryMap, -            excludeDictionaryDefinitions +            excludeDictionaryDefinitions, +            language          };      } diff --git a/ext/js/comm/api.js b/ext/js/comm/api.js index b4fdbeb5..40b8e252 100644 --- a/ext/js/comm/api.js +++ b/ext/js/comm/api.js @@ -361,6 +361,13 @@ export class API {          return this._invoke('openCrossFramePort', {targetTabId, targetFrameId});      } +    /** +     * @returns {Promise<import('api').ApiReturn<'getLanguageSummaries'>>} +     */ +    getLanguageSummaries() { +        return this._invoke('getLanguageSummaries', void 0); +    } +      // Utilities      /** diff --git a/ext/js/data/options-util.js b/ext/js/data/options-util.js index 1644df2f..7952eafc 100644 --- a/ext/js/data/options-util.js +++ b/ext/js/data/options-util.js @@ -522,7 +522,8 @@ export class OptionsUtil {              this._updateVersion22,              this._updateVersion23,              this._updateVersion24, -            this._updateVersion25 +            this._updateVersion25, +            this._updateVersion26          ];          /* eslint-enable @typescript-eslint/unbound-method */          if (typeof targetVersion === 'number' && targetVersion < result.length) { @@ -1155,6 +1156,31 @@ export class OptionsUtil {      }      /** +     * - Added general.language. +     * - Modularized text preprocessors. +     * @type {import('options-util').UpdateFunction} +     */ +    _updateVersion26(options) { +        const textPreprocessors = [ +            'convertHalfWidthCharacters', +            'convertNumericCharacters', +            'convertAlphabeticCharacters', +            'convertHiraganaToKatakana', +            'convertKatakanaToHiragana', +            'collapseEmphaticSequences' +        ]; + +        for (const {options: profileOptions} of options.profiles) { +            profileOptions.general.language = 'ja'; + +            for (const preprocessor of textPreprocessors) { +                delete profileOptions.translation[preprocessor]; +            } +        } +    } + + +    /**       * @param {string} url       * @returns {Promise<chrome.tabs.Tab>}       */ diff --git a/ext/js/language/en/language-english.js b/ext/js/language/en/language-english.js new file mode 100644 index 00000000..8268653f --- /dev/null +++ b/ext/js/language/en/language-english.js @@ -0,0 +1,29 @@ +/* + * Copyright (C) 2024  Yomitan Authors + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program.  If not, see <https://www.gnu.org/licenses/>. + */ + +import {capitalizeFirstLetter, decapitalize} from '../text-preprocessors.js'; + +/** @type {import('language-english').EnglishLanguageDescriptor} */ +export const descriptor = { +    name: 'English', +    iso: 'en', +    exampleText: 'read', +    textPreprocessors: { +        capitalizeFirstLetter, +        decapitalize +    } +}; diff --git a/ext/js/language/ja/language-japanese.js b/ext/js/language/ja/language-japanese.js new file mode 100644 index 00000000..ced34bcd --- /dev/null +++ b/ext/js/language/ja/language-japanese.js @@ -0,0 +1,77 @@ +/* + * Copyright (C) 2024  Yomitan Authors + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program.  If not, see <https://www.gnu.org/licenses/>. + */ + +import {basicTextPreprocessorOptions} from '../text-preprocessors.js'; +import {convertAlphabeticToKana} from './japanese-wanakana.js'; +import {collapseEmphaticSequences, convertHalfWidthKanaToFullWidth, convertHiraganaToKatakana, convertKatakanaToHiragana, convertNumericToFullWidth} from './japanese.js'; + +/** @type {import('language-japanese').JapaneseLanguageDescriptor} */ +export const descriptor = { +    name: 'Japanese', +    iso: 'ja', +    exampleText: '読め', +    textPreprocessors: { +        convertHalfWidthCharacters: { +            name: 'Convert half width characters to full width', +            description: 'ヨミチャン → ヨミチャン', +            options: basicTextPreprocessorOptions, +            /** @type {import('language').TextPreprocessorFunction<boolean>} */ +            process: (str, setting, sourceMap) => (setting ? convertHalfWidthKanaToFullWidth(str, sourceMap) : str) +        }, +        convertNumericCharacters: { +            name: 'Convert numeric characters to full width', +            description: '1234 → 1234', +            options: basicTextPreprocessorOptions, +            /** @type {import('language').TextPreprocessorFunction<boolean>} */ +            process: (str, setting) => (setting ? convertNumericToFullWidth(str) : str) +        }, +        convertAlphabeticCharacters: { +            name: 'Convert alphabetic characters to hiragana', +            description: 'yomichan → よみちゃん', +            options: basicTextPreprocessorOptions, +            /** @type {import('language').TextPreprocessorFunction<boolean>} */ +            process: (str, setting, sourceMap) => (setting ? convertAlphabeticToKana(str, sourceMap) : str) +        }, +        convertHiraganaToKatakana: { +            name: 'Convert hiragana to katakana', +            description: 'よみちゃん → ヨミチャン', +            options: basicTextPreprocessorOptions, +            /** @type {import('language').TextPreprocessorFunction<boolean>} */ +            process: (str, setting) => (setting ? convertHiraganaToKatakana(str) : str) +        }, +        convertKatakanaToHiragana: { +            name: 'Convert katakana to hiragana', +            description: 'ヨミチャン → よみちゃん', +            options: basicTextPreprocessorOptions, +            /** @type {import('language').TextPreprocessorFunction<boolean>} */ +            process: (str, setting) => (setting ? convertKatakanaToHiragana(str) : str) +        }, +        collapseEmphaticSequences: { +            name: 'Collapse emphatic character sequences', +            description: 'すっっごーーい → すっごーい / すごい', +            options: [[false, false], [true, false], [true, true]], +            /** @type {import('language').TextPreprocessorFunction<[collapseEmphatic: boolean, collapseEmphaticFull: boolean]>} */ +            process: (str, setting, sourceMap) => { +                const [collapseEmphatic, collapseEmphaticFull] = setting; +                if (collapseEmphatic) { +                    str = collapseEmphaticSequences(str, collapseEmphaticFull, sourceMap); +                } +                return str; +            } +        } +    } +}; diff --git a/ext/js/language/languages.js b/ext/js/language/languages.js new file mode 100755 index 00000000..f51ca163 --- /dev/null +++ b/ext/js/language/languages.js @@ -0,0 +1,61 @@ +/* + * Copyright (C) 2024  Yomitan Authors + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program.  If not, see <https://www.gnu.org/licenses/>. + */ + +import {descriptor as descriptorEnglish} from './en/language-english.js'; +import {descriptor as descriptorJapanese} from './ja/language-japanese.js'; + +const languageDescriptors = [ +    descriptorEnglish, +    descriptorJapanese +]; + +/** @type {Map<string, typeof languageDescriptors[0]>} */ +const languageDescriptorMap = new Map(); +for (const languageDescriptor of languageDescriptors) { +    languageDescriptorMap.set(languageDescriptor.iso, languageDescriptor); +} + +/** + * @returns {import('language').LanguageSummary[]} + */ +export function getLanguageSummaries() { +    const results = []; +    for (const {name, iso, exampleText} of languageDescriptorMap.values()) { +        results.push({name, iso, exampleText}); +    } +    return results; +} + +/** + * @returns {import('language').LanguageAndPreprocessors[]} + * @throws {Error} + */ +export function getAllLanguageTextPreprocessors() { +    const results = []; +    for (const {iso, textPreprocessors} of languageDescriptorMap.values()) { +        /** @type {import('language').TextPreprocessorWithId<unknown>[]} */ +        const textPreprocessorsArray = []; +        for (const [id, textPreprocessor] of Object.entries(textPreprocessors)) { +            textPreprocessorsArray.push({ +                id, +                textPreprocessor: /** @type {import('language').TextPreprocessor<unknown>} */ (textPreprocessor) +            }); +        } +        results.push({iso, textPreprocessors: textPreprocessorsArray}); +    } +    return results; +} diff --git a/ext/js/language/text-preprocessors.js b/ext/js/language/text-preprocessors.js new file mode 100755 index 00000000..12b3d1b6 --- /dev/null +++ b/ext/js/language/text-preprocessors.js @@ -0,0 +1,35 @@ +/* + * Copyright (C) 2024  Yomitan Authors + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program.  If not, see <https://www.gnu.org/licenses/>. + */ + +/** @type {import('language').TextPreprocessorOptions<boolean>} */ +export const basicTextPreprocessorOptions = [false, true]; + +/** @type {import('language').TextPreprocessor<boolean>} */ +export const decapitalize = { +    name: 'Decapitalize text', +    description: 'CAPITALIZED TEXT → capitalized text', +    options: basicTextPreprocessorOptions, +    process: (str, setting) => (setting ? str.toLowerCase() : str) +}; + +/** @type {import('language').TextPreprocessor<boolean>} */ +export const capitalizeFirstLetter = { +    name: 'Capitalize first letter', +    description: 'lowercase text → Lowercase text', +    options: basicTextPreprocessorOptions, +    process: (str, setting) => (setting ? str.charAt(0).toUpperCase() + str.slice(1) : str) +}; diff --git a/ext/js/language/translator.js b/ext/js/language/translator.js index b2342e8d..4f9304b5 100644 --- a/ext/js/language/translator.js +++ b/ext/js/language/translator.js @@ -18,9 +18,9 @@  import {applyTextReplacement} from '../general/regex-util.js';  import {TextSourceMap} from '../general/text-source-map.js'; -import {convertAlphabeticToKana} from './ja/japanese-wanakana.js'; -import {collapseEmphaticSequences, convertHalfWidthKanaToFullWidth, convertHiraganaToKatakana, convertKatakanaToHiragana, convertNumericToFullWidth, isCodePointJapanese} from './ja/japanese.js'; +import {isCodePointJapanese} from './ja/japanese.js';  import {LanguageTransformer} from './language-transformer.js'; +import {getAllLanguageTextPreprocessors} from './languages.js';  /**   * Class which finds term and kanji dictionary entries for text. @@ -41,6 +41,8 @@ export class Translator {          this._stringComparer = new Intl.Collator('en-US'); // Invariant locale          /** @type {RegExp} */          this._numberRegex = /[+-]?(\d+(\.\d*)?|\.\d+)([eE][+-]?\d+)?/; +        /** @type {Map<string, {textPreprocessors: import('language').TextPreprocessorWithId<unknown>[], optionSpace: import('translation-internal').PreprocessorOptionsSpace}>} */ +        this._textPreprocessors = new Map();      }      /** @@ -49,6 +51,14 @@ export class Translator {       */      prepare(descriptor) {          this._languageTransformer.addDescriptor(descriptor); +        for (const {iso, textPreprocessors} of getAllLanguageTextPreprocessors()) { +            /** @type {Map<string, import('language').TextPreprocessorOptions<unknown>>} */ +            const optionSpace = new Map(); +            for (const {id, textPreprocessor} of textPreprocessors) { +                optionSpace.set(id, textPreprocessor.options); +            } +            this._textPreprocessors.set(iso, {textPreprocessors, optionSpace}); +        }      }      /** @@ -415,51 +425,45 @@ export class Translator {          }      } -    // Deinflections and text transformations +    // Deinflections and text preprocessing      /**       * @param {string} text       * @param {import('translation').FindTermsOptions} options       * @returns {import('translation-internal').DatabaseDeinflection[]} +     * @throws {Error}       */      _getAlgorithmDeinflections(text, options) { -        /** @type {import('translation-internal').TextDeinflectionOptionsArrays} */ -        const textOptionVariantArray = [ -            this._getTextReplacementsVariants(options), -            this._getTextOptionEntryVariants(options.convertHalfWidthCharacters), -            this._getTextOptionEntryVariants(options.convertNumericCharacters), -            this._getTextOptionEntryVariants(options.convertAlphabeticCharacters), -            this._getTextOptionEntryVariants(options.convertHiraganaToKatakana), -            this._getTextOptionEntryVariants(options.convertKatakanaToHiragana), -            this._getCollapseEmphaticOptions(options) -        ]; +        const {language} = options; +        const info = this._textPreprocessors.get(language); +        if (typeof info === 'undefined') { throw new Error(`Unsupported language: ${language}`); } +        const {textPreprocessors, optionSpace: textPreprocessorOptionsSpace} = info; + +        /** @type {Map<string, import('language').TextPreprocessorOptions<unknown>>} */ +        const variantSpace = new Map(); +        variantSpace.set('textReplacements', this._getTextReplacementsVariants(options)); +        for (const [key, value] of textPreprocessorOptionsSpace) { +            variantSpace.set(key, value); +        }          /** @type {import('translation-internal').DatabaseDeinflection[]} */          const deinflections = [];          const used = new Set(); -        for (const [textReplacements, halfWidth, numeric, alphabetic, katakana, hiragana, [collapseEmphatic, collapseEmphaticFull]] of /** @type {Generator<import('translation-internal').TextDeinflectionOptions, void, unknown>} */ (this._getArrayVariants(textOptionVariantArray))) { + +        for (const arrayVariant of this._generateArrayVariants(variantSpace)) { +            const textReplacements = /** @type {import('translation').FindTermsTextReplacement[] | null} */ (arrayVariant.get('textReplacements')); +              let text2 = text;              const sourceMap = new TextSourceMap(text2); +              if (textReplacements !== null) {                  text2 = this._applyTextReplacements(text2, sourceMap, textReplacements);              } -            if (halfWidth) { -                text2 = convertHalfWidthKanaToFullWidth(text2, sourceMap); -            } -            if (numeric) { -                text2 = convertNumericToFullWidth(text2); -            } -            if (alphabetic) { -                text2 = convertAlphabeticToKana(text2, sourceMap); -            } -            if (katakana) { -                text2 = convertHiraganaToKatakana(text2); -            } -            if (hiragana) { -                text2 = convertKatakanaToHiragana(text2); -            } -            if (collapseEmphatic) { -                text2 = collapseEmphaticSequences(text2, collapseEmphaticFull, sourceMap); + +            for (const preprocessor of textPreprocessors.values()) { +                const {id, textPreprocessor} = preprocessor; +                const setting = arrayVariant.get(id); +                text2 = textPreprocessor.process(text2, setting, sourceMap);              }              for ( @@ -527,36 +531,6 @@ export class Translator {      }      /** -     * @param {import('translation').FindTermsVariantMode} value -     * @returns {boolean[]} -     */ -    _getTextOptionEntryVariants(value) { -        switch (value) { -            case 'true': return [true]; -            case 'variant': return [false, true]; -            default: return [false]; -        } -    } - -    /** -     * @param {import('translation').FindTermsOptions} options -     * @returns {[collapseEmphatic: boolean, collapseEmphaticFull: boolean][]} -     */ -    _getCollapseEmphaticOptions(options) { -        /** @type {[collapseEmphatic: boolean, collapseEmphaticFull: boolean][]} */ -        const collapseEmphaticOptions = [[false, false]]; -        switch (options.collapseEmphaticSequences) { -            case 'true': -                collapseEmphaticOptions.push([true, false]); -                break; -            case 'full': -                collapseEmphaticOptions.push([true, false], [true, true]); -                break; -        } -        return collapseEmphaticOptions; -    } - -    /**       * @param {import('translation').FindTermsOptions} options       * @returns {(import('translation').FindTermsTextReplacement[] | null)[]}       */ @@ -1343,26 +1317,32 @@ export class Translator {      }      /** -     * @param {[...args: unknown[][]]} arrayVariants -     * @yields {[...args: unknown[]]} -     * @returns {Generator<unknown[], void, unknown>} +     * @param {Map<string, unknown[]>} arrayVariants +     * @yields {Map<string, unknown>} +     * @returns {Generator<Map<string, unknown>, void, void>}       */ -    *_getArrayVariants(arrayVariants) { -        const ii = arrayVariants.length; - -        let total = 1; -        for (let i = 0; i < ii; ++i) { -            total *= arrayVariants[i].length; +    *_generateArrayVariants(arrayVariants) { +        const variantKeys = [...arrayVariants.keys()]; +        const entryVariantLengths = []; +        for (const key of variantKeys) { +            const entryVariants = /** @type {unknown[]} */ (arrayVariants.get(key)); +            entryVariantLengths.push(entryVariants.length);          } +        const totalVariants = entryVariantLengths.reduce((acc, length) => acc * length, 1); + +        for (let variantIndex = 0; variantIndex < totalVariants; ++variantIndex) { +            /** @type {Map<string, unknown>} */ +            const variant = new Map(); +            let remainingIndex = variantIndex; -        for (let a = 0; a < total; ++a) { -            const variant = []; -            let index = a; -            for (let i = 0; i < ii; ++i) { -                const entryVariants = arrayVariants[i]; -                variant.push(entryVariants[index % entryVariants.length]); -                index = Math.floor(index / entryVariants.length); +            for (let keyIndex = 0; keyIndex < variantKeys.length; ++keyIndex) { +                const key = variantKeys[keyIndex]; +                const entryVariants = /** @type {unknown[]} */ (arrayVariants.get(key)); +                const entryIndex = remainingIndex % entryVariants.length; +                variant.set(key, entryVariants[entryIndex]); +                remainingIndex = Math.floor(remainingIndex / entryVariants.length);              } +              yield variant;          }      } diff --git a/ext/js/pages/settings/languages-controller.js b/ext/js/pages/settings/languages-controller.js new file mode 100755 index 00000000..78f036df --- /dev/null +++ b/ext/js/pages/settings/languages-controller.js @@ -0,0 +1,49 @@ +/* + * Copyright (C) 2023-2024  Yomitan Authors + * Copyright (C) 2021-2022  Yomichan Authors + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program.  If not, see <https://www.gnu.org/licenses/>. + */ + +import {querySelectorNotNull} from '../../dom/query-selector.js'; + +export class LanguagesController { +    /** +     * @param {import('./settings-controller.js').SettingsController} settingsController +     */ +    constructor(settingsController) { +        /** @type {import('./settings-controller.js').SettingsController} */ +        this._settingsController = settingsController; +    } + +    /** */ +    async prepare() { +        const languages = await this._settingsController.application.api.getLanguageSummaries(); +        languages.sort((a, b) => a.iso.localeCompare(b.iso, 'en')); +        this._fillSelect(languages); +    } + +    /** +     * @param {import('language').LanguageSummary[]} languages +     */ +    _fillSelect(languages) { +        const selectElement = querySelectorNotNull(document, '#language-select'); +        for (const {iso, name} of languages) { +            const option = document.createElement('option'); +            option.value = iso; +            option.text = `(${iso}) ${name}`; +            selectElement.appendChild(option); +        } +    } +} diff --git a/ext/js/pages/settings/settings-main.js b/ext/js/pages/settings/settings-main.js index dc4b36c9..0b115246 100644 --- a/ext/js/pages/settings/settings-main.js +++ b/ext/js/pages/settings/settings-main.js @@ -30,6 +30,7 @@ import {DictionaryImportController} from './dictionary-import-controller.js';  import {ExtensionKeyboardShortcutController} from './extension-keyboard-shortcuts-controller.js';  import {GenericSettingController} from './generic-setting-controller.js';  import {KeyboardShortcutController} from './keyboard-shortcuts-controller.js'; +import {LanguagesController} from './languages-controller.js';  import {MecabController} from './mecab-controller.js';  import {ModalController} from './modal-controller.js';  import {NestedPopupsController} from './nested-popups-controller.js'; @@ -137,6 +138,9 @@ await Application.main(async (application) => {      const secondarySearchDictionaryController = new SecondarySearchDictionaryController(settingsController);      secondarySearchDictionaryController.prepare(); +    const languagesController = new LanguagesController(settingsController); +    languagesController.prepare(); +      const translationTextReplacementsController = new TranslationTextReplacementsController(settingsController);      translationTextReplacementsController.prepare(); diff --git a/ext/settings.html b/ext/settings.html index 2af2a666..8dc70cf3 100644 --- a/ext/settings.html +++ b/ext/settings.html @@ -212,6 +212,19 @@                  <label class="toggle"><input type="checkbox" data-setting="general.enable"><span class="toggle-body"><span class="toggle-track"></span><span class="toggle-knob"></span></span></label>              </div>          </div></div> +        <div class="settings-item"><div class="settings-item-inner settings-item-inner-wrappable"> +            <div class="settings-item-left"> +                <div class="settings-item-label"> +                    Language +                </div> +                <div class="settings-item-description"> +                    Language of the text that is being looked up. +                </div> +            </div> +            <div class="settings-item-right"> +                <select id="language-select" data-setting="general.language"></select> +            </div> +        </div></div>          <div class="settings-item"><div class="settings-item-inner">              <div class="settings-item-left">                  <div class="settings-item-label">Show the <a href="/welcome.html" target="_blank" rel="noopener">welcome guide</a> on browser startup</div> @@ -1531,84 +1544,6 @@                  <button type="button" class="icon-button"><span class="icon-button-inner"><span class="icon" data-icon="material-right-arrow"></span></span></button>              </div>          </div></div> -        <div class="settings-item"><div class="settings-item-inner settings-item-inner-wrappable"> -            <div class="settings-item-left"> -                <div class="settings-item-label">Convert half width characters to full width</div> -                <div class="settings-item-description" lang="ja">ヨミタン → ヨミタン</div> -            </div> -            <div class="settings-item-right"> -                <select data-setting="translation.convertHalfWidthCharacters"> -                    <option value="false">Disabled</option> -                    <option value="true">Enabled</option> -                    <option value="variant">Use both variants</option> -                </select> -            </div> -        </div></div> -        <div class="settings-item"><div class="settings-item-inner settings-item-inner-wrappable"> -            <div class="settings-item-left"> -                <div class="settings-item-label">Convert numeric characters to full width</div> -                <div class="settings-item-description" lang="ja">1234 → 1234</div> -            </div> -            <div class="settings-item-right"> -                <select data-setting="translation.convertNumericCharacters"> -                    <option value="false">Disabled</option> -                    <option value="true">Enabled</option> -                    <option value="variant">Use both variants</option> -                </select> -            </div> -        </div></div> -        <div class="settings-item"><div class="settings-item-inner settings-item-inner-wrappable"> -            <div class="settings-item-left"> -                <div class="settings-item-label">Convert alphabetic characters to hiragana</div> -                <div class="settings-item-description" lang="ja">yomitan → よみたん</div> -            </div> -            <div class="settings-item-right"> -                <select data-setting="translation.convertAlphabeticCharacters"> -                    <option value="false">Disabled</option> -                    <option value="true">Enabled</option> -                    <option value="variant">Use both variants</option> -                </select> -            </div> -        </div></div> -        <div class="settings-item"><div class="settings-item-inner settings-item-inner-wrappable"> -            <div class="settings-item-left"> -                <div class="settings-item-label">Convert hiragana to katakana</div> -                <div class="settings-item-description" lang="ja">よみたん → ヨミタン</div> -            </div> -            <div class="settings-item-right"> -                <select data-setting="translation.convertHiraganaToKatakana"> -                    <option value="false">Disabled</option> -                    <option value="true">Enabled</option> -                    <option value="variant">Use both variants</option> -                </select> -            </div> -        </div></div> -        <div class="settings-item"><div class="settings-item-inner settings-item-inner-wrappable"> -            <div class="settings-item-left"> -                <div class="settings-item-label">Convert katakana to hiragana</div> -                <div class="settings-item-description" lang="ja">ヨミタン → よみたん</div> -            </div> -            <div class="settings-item-right"> -                <select data-setting="translation.convertKatakanaToHiragana"> -                    <option value="false">Disabled</option> -                    <option value="true">Enabled</option> -                    <option value="variant">Use both variants</option> -                </select> -            </div> -        </div></div> -        <div class="settings-item"><div class="settings-item-inner settings-item-inner-wrappable"> -            <div class="settings-item-left"> -                <div class="settings-item-label">Collapse emphatic character sequences</div> -                <div class="settings-item-description" lang="ja">すっっごーーい → すっごーい / すごい</div> -            </div> -            <div class="settings-item-right"> -                <select data-setting="translation.collapseEmphaticSequences"> -                    <option value="false">Disabled</option> -                    <option value="true">Collapse into single character</option> -                    <option value="full">Remove all characters</option> -                </select> -            </div> -        </div></div>      </div>      <!-- Anki --> diff --git a/test/data/anki-note-builder-test-results.json b/test/data/anki-note-builder-test-results.json index a2dd0923..1d84712d 100644 --- a/test/data/anki-note-builder-test-results.json +++ b/test/data/anki-note-builder-test-results.json @@ -869,6 +869,43 @@          "sentence-furigana": "cloze-prefixダースcloze-suffix",          "tags": "abbr, n",          "url": "<a href=\"url:\">url:</a>" +      }, +      { +        "audio": "", +        "clipboard-image": "", +        "clipboard-text": "", +        "cloze-body": "ダ", +        "cloze-body-kana": "ダ", +        "cloze-prefix": "cloze-prefix", +        "cloze-suffix": "cloze-suffix", +        "conjugation": "", +        "dictionary": "Test Dictionary 2", +        "document-title": "title", +        "expression": "打", +        "frequencies": "<ul style=\"text-align: left;\"><li>Test Dictionary 2: 1</li><li>Test Dictionary 2: four</li><li>Test Dictionary 2: five (5)</li><li>Test Dictionary 2: 8</li><li>Test Dictionary 2: fourteen</li><li>Test Dictionary 2: twenty (20)</li><li>Test Dictionary 2: 26</li></ul>", +        "frequency-harmonic-rank": "1", +        "frequency-harmonic-occurrence": "1", +        "frequency-average-rank": "1", +        "frequency-average-occurrence": "1", +        "furigana": "<ruby>打<rt>だ</rt></ruby>", +        "furigana-plain": "打[だ]", +        "glossary": "<div style=\"text-align: left;\"><i>(n, Test Dictionary 2)</i> <ul><li>da definition 1</li><li>da definition 2</li></ul></div>", +        "glossary-brief": "<div style=\"text-align: left;\"><ul><li>da definition 1</li><li>da definition 2</li></ul></div>", +        "glossary-no-dictionary": "<div style=\"text-align: left;\"><i>(n)</i> <ul><li>da definition 1</li><li>da definition 2</li></ul></div>", +        "part-of-speech": "Noun", +        "pitch-accents": "No pitch accent data", +        "pitch-accent-graphs": "No pitch accent data", +        "pitch-accent-positions": "No pitch accent data", +        "pitch-accent-categories": "", +        "phonetic-transcriptions": "", +        "reading": "だ", +        "screenshot": "", +        "search-query": "fullQuery", +        "selection-text": "", +        "sentence": "cloze-prefixダcloze-suffix", +        "sentence-furigana": "cloze-prefixダcloze-suffix", +        "tags": "n", +        "url": "<a href=\"url:\">url:</a>"        }      ]    }, @@ -3437,7 +3474,7 @@      ]    },    { -    "name": "Test text transformations - convertNumericCharacters", +    "name": "Test text preprocessors - convertNumericCharacters",      "results": [        {          "audio": "", @@ -3479,7 +3516,7 @@      ]    },    { -    "name": "Test text transformations - convertAlphabeticCharacters", +    "name": "Test text preprocessors - convertAlphabeticCharacters",      "results": [        {          "audio": "", @@ -3558,7 +3595,7 @@      ]    },    { -    "name": "Test text transformations - convertKatakanaToHiragana", +    "name": "Test text preprocessors - convertKatakanaToHiragana",      "results": [        {          "audio": "", @@ -3637,7 +3674,7 @@      ]    },    { -    "name": "Test text transformations - convertHiraganaToKatakana", +    "name": "Test text preprocessors - convertHiraganaToKatakana",      "results": [        {          "audio": "", @@ -3679,7 +3716,7 @@      ]    },    { -    "name": "Test text transformations - convertHalfWidthCharacters, convertKatakanaToHiragana", +    "name": "Test text preprocessors - convertHalfWidthCharacters, convertKatakanaToHiragana",      "results": [        {          "audio": "", @@ -3758,7 +3795,7 @@      ]    },    { -    "name": "Test text transformations - collapseEmphaticSequences", +    "name": "Test text preprocessors - collapseEmphaticSequences",      "results": [        {          "audio": "", @@ -3768,7 +3805,7 @@          "cloze-body-kana": "すっっごーーい",          "cloze-prefix": "cloze-prefix",          "cloze-suffix": "cloze-suffix", -        "conjugation": "", +        "conjugation": "<ul><li>adv « kansai-ben</li></ul>",          "dictionary": "Test Dictionary 2",          "document-title": "title",          "expression": "凄い", @@ -3798,5 +3835,89 @@          "url": "<a href=\"url:\">url:</a>"        }      ] +  }, +  { +    "name": "Test text preprocessors - capitalizeFirstLetter", +    "results": [ +      { +        "audio": "", +        "clipboard-image": "", +        "clipboard-text": "", +        "cloze-body": "english", +        "cloze-body-kana": "english", +        "cloze-prefix": "cloze-prefix", +        "cloze-suffix": "cloze-suffix", +        "conjugation": "", +        "dictionary": "Test Dictionary 2", +        "document-title": "title", +        "expression": "English", +        "frequencies": "", +        "frequency-harmonic-rank": "9999999", +        "frequency-harmonic-occurrence": "0", +        "frequency-average-rank": "9999999", +        "frequency-average-occurrence": "0", +        "furigana": "English", +        "furigana-plain": "English", +        "glossary": "<div style=\"text-align: left;\"><i>(n, Test Dictionary 2)</i> English definition</div>", +        "glossary-brief": "<div style=\"text-align: left;\">English definition</div>", +        "glossary-no-dictionary": "<div style=\"text-align: left;\"><i>(n)</i> English definition</div>", +        "part-of-speech": "Noun", +        "pitch-accents": "No pitch accent data", +        "pitch-accent-graphs": "No pitch accent data", +        "pitch-accent-positions": "No pitch accent data", +        "pitch-accent-categories": "", +        "phonetic-transcriptions": "", +        "reading": "English", +        "screenshot": "", +        "search-query": "fullQuery", +        "selection-text": "", +        "sentence": "cloze-prefixenglishcloze-suffix", +        "sentence-furigana": "cloze-prefixenglishcloze-suffix", +        "tags": "n", +        "url": "<a href=\"url:\">url:</a>" +      } +    ] +  }, +  { +    "name": "Test text preprocessors - decapitalize", +    "results": [ +      { +        "audio": "", +        "clipboard-image": "", +        "clipboard-text": "", +        "cloze-body": "LANGUAGE", +        "cloze-body-kana": "LANGUAGE", +        "cloze-prefix": "cloze-prefix", +        "cloze-suffix": "cloze-suffix", +        "conjugation": "", +        "dictionary": "Test Dictionary 2", +        "document-title": "title", +        "expression": "language", +        "frequencies": "", +        "frequency-harmonic-rank": "9999999", +        "frequency-harmonic-occurrence": "0", +        "frequency-average-rank": "9999999", +        "frequency-average-occurrence": "0", +        "furigana": "language", +        "furigana-plain": "language", +        "glossary": "<div style=\"text-align: left;\"><i>(n, Test Dictionary 2)</i> language definition</div>", +        "glossary-brief": "<div style=\"text-align: left;\">language definition</div>", +        "glossary-no-dictionary": "<div style=\"text-align: left;\"><i>(n)</i> language definition</div>", +        "part-of-speech": "Noun", +        "pitch-accents": "No pitch accent data", +        "pitch-accent-graphs": "No pitch accent data", +        "pitch-accent-positions": "No pitch accent data", +        "pitch-accent-categories": "", +        "phonetic-transcriptions": "", +        "reading": "language", +        "screenshot": "", +        "search-query": "fullQuery", +        "selection-text": "", +        "sentence": "cloze-prefixLANGUAGEcloze-suffix", +        "sentence-furigana": "cloze-prefixLANGUAGEcloze-suffix", +        "tags": "n", +        "url": "<a href=\"url:\">url:</a>" +      } +    ]    }  ] diff --git a/test/data/database-test-cases.json b/test/data/database-test-cases.json index 6d0f74ce..5747f59e 100644 --- a/test/data/database-test-cases.json +++ b/test/data/database-test-cases.json @@ -27,7 +27,7 @@          "ipa": 1        },        "terms": { -        "total": 27 +        "total": 29        }      }    }, @@ -36,7 +36,7 @@        {          "kanji": 2,          "kanjiMeta": 6, -        "terms": 27, +        "terms": 29,          "termMeta": 39,          "tagMeta": 15,          "media": 6 @@ -45,7 +45,7 @@      "total": {        "kanji": 2,        "kanjiMeta": 6, -      "terms": 27, +      "terms": 29,        "termMeta": 39,        "tagMeta": 15,        "media": 6 diff --git a/test/data/dictionaries/valid-dictionary1/term_bank_1.json b/test/data/dictionaries/valid-dictionary1/term_bank_1.json index 03a9900c..e7fb015c 100644 --- a/test/data/dictionaries/valid-dictionary1/term_bank_1.json +++ b/test/data/dictionaries/valid-dictionary1/term_bank_1.json @@ -341,5 +341,7 @@      ["のたまう", "のたまう", "v5", "v5", 1, ["notamau definition"], 15, ""],      ["のたもうた", "のたもうた", "", "", 1, [["のたまう", ["past"]]], 16, ""],      ["39", "さんきゅう", "", "", 1, ["sankyuu definition"], 17, ""], -    ["凄い", "すごい", "adj-i", "adj-i", 1, ["sugoi definition"], 18, ""] +    ["凄い", "すごい", "adj-i", "adj-i", 1, ["sugoi definition"], 18, ""], +    ["English", "", "n", "n", 1, ["English definition"], 19, ""], +    ["language", "", "n", "n", 1, ["language definition"], 20, ""]  ] diff --git a/test/data/translator-test-inputs.json b/test/data/translator-test-inputs.json index 03ec938a..c9047716 100644 --- a/test/data/translator-test-inputs.json +++ b/test/data/translator-test-inputs.json @@ -21,12 +21,7 @@              "sortFrequencyDictionary": null,              "sortFrequencyDictionaryOrder": "descending",              "removeNonJapaneseCharacters": true, -            "convertHalfWidthCharacters": "false", -            "convertNumericCharacters": "false", -            "convertAlphabeticCharacters": "false", -            "convertHiraganaToKatakana": "false", -            "convertKatakanaToHiragana": "false", -            "collapseEmphaticSequences": "false", +            "language": "ja",              "textReplacements": [                  null              ], @@ -361,7 +356,7 @@              "options": "default"          },          { -            "name": "Test text transformations - convertNumericCharacters", +            "name": "Test text preprocessors - convertNumericCharacters",              "func": "findTerms",              "mode": "split",              "text": "39", @@ -369,13 +364,12 @@                  "default",                  {                      "type": "terms", -                    "convertNumericCharacters": "true",                      "removeNonJapaneseCharacters": false                  }              ]          },          { -            "name": "Test text transformations - convertAlphabeticCharacters", +            "name": "Test text preprocessors - convertAlphabeticCharacters",              "func": "findTerms",              "mode": "split",              "text": "utsu", @@ -383,61 +377,83 @@                  "default",                  {                      "type": "terms", -                    "convertAlphabeticCharacters": "true",                      "removeNonJapaneseCharacters": false                  }              ]          },          { -            "name": "Test text transformations - convertKatakanaToHiragana", +            "name": "Test text preprocessors - convertKatakanaToHiragana",              "func": "findTerms",              "mode": "split",              "text": "ウツ",              "options": [                  "default",                  { -                    "type": "terms", -                    "convertKatakanaToHiragana": "true" +                    "type": "terms"                  }              ]          },          { -            "name": "Test text transformations - convertHiraganaToKatakana", +            "name": "Test text preprocessors - convertHiraganaToKatakana",              "func": "findTerms",              "mode": "split",              "text": "てきすと",              "options": [                  "default",                  { -                    "type": "terms", -                    "convertHiraganaToKatakana": "true" +                    "type": "terms"                  }              ]          },          { -            "name": "Test text transformations - convertHalfWidthCharacters, convertKatakanaToHiragana", +            "name": "Test text preprocessors - convertHalfWidthCharacters, convertKatakanaToHiragana",              "func": "findTerms",              "mode": "split",              "text": "ウツ",              "options": [                  "default",                  { -                    "type": "terms", -                    "convertHalfWidthCharacters": "true", -                    "convertKatakanaToHiragana": "true" +                    "type": "terms"                  }              ]          },          { -            "name": "Test text transformations - collapseEmphaticSequences", +            "name": "Test text preprocessors - collapseEmphaticSequences",              "func": "findTerms",              "mode": "split",              "text": "すっっごーーい",              "options": [                  "default",                  { +                    "type": "terms" +                } +            ] +        }, +        { +            "name": "Test text preprocessors - capitalizeFirstLetter", +            "func": "findTerms", +            "mode": "split", +            "text": "english", +            "options": [ +                "default", +                {                      "type": "terms", -                    "collapseEmphaticSequences": "full" +                    "language": "en", +                    "removeNonJapaneseCharacters": false +                } +            ] +        }, +        { +            "name": "Test text preprocessors - decapitalize", +            "func": "findTerms", +            "mode": "split", +            "text": "LANGUAGE", +            "options": [ +                "default", +                { +                    "type": "terms", +                    "language": "en", +                    "removeNonJapaneseCharacters": false                  }              ]          } diff --git a/test/data/translator-test-results-note-data1.json b/test/data/translator-test-results-note-data1.json index f84f28a4..f580ac53 100644 --- a/test/data/translator-test-results-note-data1.json +++ b/test/data/translator-test-results-note-data1.json @@ -7276,6 +7276,320 @@            }          },          "media": {} +      }, +      { +        "marker": "{marker}", +        "definition": { +          "type": "term", +          "id": 1, +          "source": "だ", +          "rawSource": "ダ", +          "sourceTerm": "だ", +          "inflectionRuleChainCandidates": [ +            { +              "source": "algorithm", +              "inflectionRules": [] +            } +          ], +          "score": 1, +          "isPrimary": true, +          "sequence": 1, +          "dictionary": "Test Dictionary 2", +          "dictionaryOrder": { +            "index": 0, +            "priority": 0 +          }, +          "dictionaryNames": [ +            "Test Dictionary 2" +          ], +          "expression": "打", +          "reading": "だ", +          "expressions": [ +            { +              "sourceTerm": "だ", +              "expression": "打", +              "reading": "だ", +              "termTags": [ +                { +                  "name": "E1", +                  "category": "default", +                  "notes": "example tag 1", +                  "order": 0, +                  "score": 0, +                  "dictionary": "Test Dictionary 2", +                  "redundant": false +                } +              ], +              "frequencies": [ +                { +                  "index": 0, +                  "expressionIndex": 0, +                  "dictionary": "Test Dictionary 2", +                  "dictionaryOrder": { +                    "index": 0, +                    "priority": 0 +                  }, +                  "expression": "打", +                  "reading": "だ", +                  "hasReading": false, +                  "frequency": 1 +                }, +                { +                  "index": 1, +                  "expressionIndex": 0, +                  "dictionary": "Test Dictionary 2", +                  "dictionaryOrder": { +                    "index": 0, +                    "priority": 0 +                  }, +                  "expression": "打", +                  "reading": "だ", +                  "hasReading": false, +                  "frequency": "four" +                }, +                { +                  "index": 2, +                  "expressionIndex": 0, +                  "dictionary": "Test Dictionary 2", +                  "dictionaryOrder": { +                    "index": 0, +                    "priority": 0 +                  }, +                  "expression": "打", +                  "reading": "だ", +                  "hasReading": false, +                  "frequency": "five (5)" +                }, +                { +                  "index": 3, +                  "expressionIndex": 0, +                  "dictionary": "Test Dictionary 2", +                  "dictionaryOrder": { +                    "index": 0, +                    "priority": 0 +                  }, +                  "expression": "打", +                  "reading": "だ", +                  "hasReading": true, +                  "frequency": 8 +                }, +                { +                  "index": 4, +                  "expressionIndex": 0, +                  "dictionary": "Test Dictionary 2", +                  "dictionaryOrder": { +                    "index": 0, +                    "priority": 0 +                  }, +                  "expression": "打", +                  "reading": "だ", +                  "hasReading": true, +                  "frequency": "fourteen" +                }, +                { +                  "index": 5, +                  "expressionIndex": 0, +                  "dictionary": "Test Dictionary 2", +                  "dictionaryOrder": { +                    "index": 0, +                    "priority": 0 +                  }, +                  "expression": "打", +                  "reading": "だ", +                  "hasReading": true, +                  "frequency": "twenty (20)" +                }, +                { +                  "index": 6, +                  "expressionIndex": 0, +                  "dictionary": "Test Dictionary 2", +                  "dictionaryOrder": { +                    "index": 0, +                    "priority": 0 +                  }, +                  "expression": "打", +                  "reading": "だ", +                  "hasReading": true, +                  "frequency": 26 +                } +              ], +              "pitches": [], +              "furiganaSegments": [ +                { +                  "text": "打", +                  "furigana": "だ" +                } +              ], +              "termFrequency": "normal", +              "wordClasses": [ +                "n" +              ] +            } +          ], +          "glossary": [ +            "da definition 1", +            "da definition 2" +          ], +          "definitionTags": [ +            { +              "name": "n", +              "category": "partOfSpeech", +              "notes": "noun", +              "order": 0, +              "score": 0, +              "dictionary": "Test Dictionary 2", +              "redundant": false +            } +          ], +          "termTags": [ +            { +              "name": "E1", +              "category": "default", +              "notes": "example tag 1", +              "order": 0, +              "score": 0, +              "dictionary": "Test Dictionary 2", +              "redundant": false +            } +          ], +          "frequencies": [ +            { +              "index": 0, +              "expressionIndex": 0, +              "dictionary": "Test Dictionary 2", +              "dictionaryOrder": { +                "index": 0, +                "priority": 0 +              }, +              "expression": "打", +              "reading": "だ", +              "hasReading": false, +              "frequency": 1 +            }, +            { +              "index": 1, +              "expressionIndex": 0, +              "dictionary": "Test Dictionary 2", +              "dictionaryOrder": { +                "index": 0, +                "priority": 0 +              }, +              "expression": "打", +              "reading": "だ", +              "hasReading": false, +              "frequency": "four" +            }, +            { +              "index": 2, +              "expressionIndex": 0, +              "dictionary": "Test Dictionary 2", +              "dictionaryOrder": { +                "index": 0, +                "priority": 0 +              }, +              "expression": "打", +              "reading": "だ", +              "hasReading": false, +              "frequency": "five (5)" +            }, +            { +              "index": 3, +              "expressionIndex": 0, +              "dictionary": "Test Dictionary 2", +              "dictionaryOrder": { +                "index": 0, +                "priority": 0 +              }, +              "expression": "打", +              "reading": "だ", +              "hasReading": true, +              "frequency": 8 +            }, +            { +              "index": 4, +              "expressionIndex": 0, +              "dictionary": "Test Dictionary 2", +              "dictionaryOrder": { +                "index": 0, +                "priority": 0 +              }, +              "expression": "打", +              "reading": "だ", +              "hasReading": true, +              "frequency": "fourteen" +            }, +            { +              "index": 5, +              "expressionIndex": 0, +              "dictionary": "Test Dictionary 2", +              "dictionaryOrder": { +                "index": 0, +                "priority": 0 +              }, +              "expression": "打", +              "reading": "だ", +              "hasReading": true, +              "frequency": "twenty (20)" +            }, +            { +              "index": 6, +              "expressionIndex": 0, +              "dictionary": "Test Dictionary 2", +              "dictionaryOrder": { +                "index": 0, +                "priority": 0 +              }, +              "expression": "打", +              "reading": "だ", +              "hasReading": true, +              "frequency": 26 +            } +          ], +          "frequencyHarmonic": 1, +          "frequencyAverage": 1, +          "pitches": [], +          "phoneticTranscriptions": [], +          "sourceTermExactMatchCount": 0, +          "url": "url:", +          "cloze": { +            "sentence": "", +            "prefix": "", +            "body": "", +            "bodyKana": "", +            "suffix": "" +          }, +          "furiganaSegments": [ +            { +              "text": "打", +              "furigana": "だ" +            } +          ] +        }, +        "glossaryLayoutMode": "default", +        "compactTags": false, +        "group": false, +        "merge": false, +        "modeTermKanji": false, +        "modeTermKana": false, +        "modeKanji": false, +        "compactGlossaries": false, +        "uniqueExpressions": [ +          "打" +        ], +        "uniqueReadings": [ +          "だ" +        ], +        "pitches": [], +        "pitchCount": 0, +        "phoneticTranscriptions": [], +        "context": { +          "query": "query", +          "fullQuery": "fullQuery", +          "document": { +            "title": "title" +          } +        }, +        "media": {}        }      ]    }, @@ -30117,7 +30431,7 @@      ]    },    { -    "name": "Test text transformations - convertNumericCharacters", +    "name": "Test text preprocessors - convertNumericCharacters",      "noteDataList": [        {          "marker": "{marker}", @@ -30219,7 +30533,7 @@      ]    },    { -    "name": "Test text transformations - convertAlphabeticCharacters", +    "name": "Test text preprocessors - convertAlphabeticCharacters",      "noteDataList": [        {          "marker": "{marker}", @@ -30852,7 +31166,7 @@      ]    },    { -    "name": "Test text transformations - convertKatakanaToHiragana", +    "name": "Test text preprocessors - convertKatakanaToHiragana",      "noteDataList": [        {          "marker": "{marker}", @@ -31485,7 +31799,7 @@      ]    },    { -    "name": "Test text transformations - convertHiraganaToKatakana", +    "name": "Test text preprocessors - convertHiraganaToKatakana",      "noteDataList": [        {          "marker": "{marker}", @@ -31638,7 +31952,7 @@      ]    },    { -    "name": "Test text transformations - convertHalfWidthCharacters, convertKatakanaToHiragana", +    "name": "Test text preprocessors - convertHalfWidthCharacters, convertKatakanaToHiragana",      "noteDataList": [        {          "marker": "{marker}", @@ -32271,7 +32585,7 @@      ]    },    { -    "name": "Test text transformations - collapseEmphaticSequences", +    "name": "Test text preprocessors - collapseEmphaticSequences",      "noteDataList": [        {          "marker": "{marker}", @@ -32285,6 +32599,13 @@              {                "source": "algorithm",                "inflectionRules": [] +            }, +            { +              "source": "algorithm", +              "inflectionRules": [ +                "adv", +                "kansai-ben" +              ]              }            ],            "score": 1, @@ -32391,5 +32712,233 @@          "media": {}        }      ] +  }, +  { +    "name": "Test text preprocessors - capitalizeFirstLetter", +    "noteDataList": [ +      { +        "marker": "{marker}", +        "definition": { +          "type": "term", +          "id": 27, +          "source": "English", +          "rawSource": "english", +          "sourceTerm": "English", +          "inflectionRuleChainCandidates": [ +            { +              "source": "algorithm", +              "inflectionRules": [] +            } +          ], +          "score": 1, +          "isPrimary": true, +          "sequence": 19, +          "dictionary": "Test Dictionary 2", +          "dictionaryOrder": { +            "index": 0, +            "priority": 0 +          }, +          "dictionaryNames": [ +            "Test Dictionary 2" +          ], +          "expression": "English", +          "reading": "English", +          "expressions": [ +            { +              "sourceTerm": "English", +              "expression": "English", +              "reading": "English", +              "termTags": [], +              "frequencies": [], +              "pitches": [], +              "furiganaSegments": [ +                { +                  "text": "English", +                  "furigana": "" +                } +              ], +              "termFrequency": "normal", +              "wordClasses": [ +                "n" +              ] +            } +          ], +          "glossary": [ +            "English definition" +          ], +          "definitionTags": [ +            { +              "name": "n", +              "category": "partOfSpeech", +              "notes": "noun", +              "order": 0, +              "score": 0, +              "dictionary": "Test Dictionary 2", +              "redundant": false +            } +          ], +          "termTags": [], +          "frequencies": [], +          "frequencyHarmonic": -1, +          "frequencyAverage": -1, +          "pitches": [], +          "phoneticTranscriptions": [], +          "sourceTermExactMatchCount": 1, +          "url": "url:", +          "cloze": { +            "sentence": "", +            "prefix": "", +            "body": "", +            "bodyKana": "", +            "suffix": "" +          }, +          "furiganaSegments": [ +            { +              "text": "English", +              "furigana": "" +            } +          ] +        }, +        "glossaryLayoutMode": "default", +        "compactTags": false, +        "group": false, +        "merge": false, +        "modeTermKanji": false, +        "modeTermKana": false, +        "modeKanji": false, +        "compactGlossaries": false, +        "uniqueExpressions": [ +          "English" +        ], +        "uniqueReadings": [ +          "English" +        ], +        "pitches": [], +        "pitchCount": 0, +        "phoneticTranscriptions": [], +        "context": { +          "query": "query", +          "fullQuery": "fullQuery", +          "document": { +            "title": "title" +          } +        }, +        "media": {} +      } +    ] +  }, +  { +    "name": "Test text preprocessors - decapitalize", +    "noteDataList": [ +      { +        "marker": "{marker}", +        "definition": { +          "type": "term", +          "id": 28, +          "source": "language", +          "rawSource": "LANGUAGE", +          "sourceTerm": "language", +          "inflectionRuleChainCandidates": [ +            { +              "source": "algorithm", +              "inflectionRules": [] +            } +          ], +          "score": 1, +          "isPrimary": true, +          "sequence": 20, +          "dictionary": "Test Dictionary 2", +          "dictionaryOrder": { +            "index": 0, +            "priority": 0 +          }, +          "dictionaryNames": [ +            "Test Dictionary 2" +          ], +          "expression": "language", +          "reading": "language", +          "expressions": [ +            { +              "sourceTerm": "language", +              "expression": "language", +              "reading": "language", +              "termTags": [], +              "frequencies": [], +              "pitches": [], +              "furiganaSegments": [ +                { +                  "text": "language", +                  "furigana": "" +                } +              ], +              "termFrequency": "normal", +              "wordClasses": [ +                "n" +              ] +            } +          ], +          "glossary": [ +            "language definition" +          ], +          "definitionTags": [ +            { +              "name": "n", +              "category": "partOfSpeech", +              "notes": "noun", +              "order": 0, +              "score": 0, +              "dictionary": "Test Dictionary 2", +              "redundant": false +            } +          ], +          "termTags": [], +          "frequencies": [], +          "frequencyHarmonic": -1, +          "frequencyAverage": -1, +          "pitches": [], +          "phoneticTranscriptions": [], +          "sourceTermExactMatchCount": 1, +          "url": "url:", +          "cloze": { +            "sentence": "", +            "prefix": "", +            "body": "", +            "bodyKana": "", +            "suffix": "" +          }, +          "furiganaSegments": [ +            { +              "text": "language", +              "furigana": "" +            } +          ] +        }, +        "glossaryLayoutMode": "default", +        "compactTags": false, +        "group": false, +        "merge": false, +        "modeTermKanji": false, +        "modeTermKana": false, +        "modeKanji": false, +        "compactGlossaries": false, +        "uniqueExpressions": [ +          "language" +        ], +        "uniqueReadings": [ +          "language" +        ], +        "pitches": [], +        "pitchCount": 0, +        "phoneticTranscriptions": [], +        "context": { +          "query": "query", +          "fullQuery": "fullQuery", +          "document": { +            "title": "title" +          } +        }, +        "media": {} +      } +    ]    }  ] diff --git a/test/data/translator-test-results.json b/test/data/translator-test-results.json index e11cb4a9..cd3f7ab6 100644 --- a/test/data/translator-test-results.json +++ b/test/data/translator-test-results.json @@ -3969,6 +3969,174 @@              "displayValueParsed": false            }          ] +      }, +      { +        "type": "term", +        "isPrimary": true, +        "inflectionRuleChainCandidates": [ +          { +            "source": "algorithm", +            "inflectionRules": [] +          } +        ], +        "score": 1, +        "frequencyOrder": 0, +        "dictionaryIndex": 0, +        "dictionaryPriority": 0, +        "sourceTermExactMatchCount": 0, +        "maxTransformedTextLength": 1, +        "headwords": [ +          { +            "index": 0, +            "term": "打", +            "reading": "だ", +            "sources": [ +              { +                "originalText": "ダ", +                "transformedText": "だ", +                "deinflectedText": "だ", +                "matchType": "exact", +                "matchSource": "reading", +                "isPrimary": true +              } +            ], +            "tags": [ +              { +                "name": "E1", +                "category": "default", +                "order": 0, +                "score": 0, +                "content": [ +                  "example tag 1" +                ], +                "dictionaries": [ +                  "Test Dictionary 2" +                ], +                "redundant": false +              } +            ], +            "wordClasses": [ +              "n" +            ] +          } +        ], +        "definitions": [ +          { +            "index": 0, +            "headwordIndices": [ +              0 +            ], +            "dictionary": "Test Dictionary 2", +            "dictionaryIndex": 0, +            "dictionaryPriority": 0, +            "id": 1, +            "score": 1, +            "frequencyOrder": 0, +            "sequences": [ +              1 +            ], +            "isPrimary": true, +            "tags": [ +              { +                "name": "n", +                "category": "partOfSpeech", +                "order": 0, +                "score": 0, +                "content": [ +                  "noun" +                ], +                "dictionaries": [ +                  "Test Dictionary 2" +                ], +                "redundant": false +              } +            ], +            "entries": [ +              "da definition 1", +              "da definition 2" +            ] +          } +        ], +        "pronunciations": [], +        "frequencies": [ +          { +            "index": 0, +            "headwordIndex": 0, +            "dictionary": "Test Dictionary 2", +            "dictionaryIndex": 0, +            "dictionaryPriority": 0, +            "hasReading": false, +            "frequency": 1, +            "displayValue": null, +            "displayValueParsed": false +          }, +          { +            "index": 1, +            "headwordIndex": 0, +            "dictionary": "Test Dictionary 2", +            "dictionaryIndex": 0, +            "dictionaryPriority": 0, +            "hasReading": false, +            "frequency": 0, +            "displayValue": "four", +            "displayValueParsed": true +          }, +          { +            "index": 2, +            "headwordIndex": 0, +            "dictionary": "Test Dictionary 2", +            "dictionaryIndex": 0, +            "dictionaryPriority": 0, +            "hasReading": false, +            "frequency": 5, +            "displayValue": "five (5)", +            "displayValueParsed": true +          }, +          { +            "index": 3, +            "headwordIndex": 0, +            "dictionary": "Test Dictionary 2", +            "dictionaryIndex": 0, +            "dictionaryPriority": 0, +            "hasReading": true, +            "frequency": 8, +            "displayValue": null, +            "displayValueParsed": false +          }, +          { +            "index": 4, +            "headwordIndex": 0, +            "dictionary": "Test Dictionary 2", +            "dictionaryIndex": 0, +            "dictionaryPriority": 0, +            "hasReading": true, +            "frequency": 0, +            "displayValue": "fourteen", +            "displayValueParsed": true +          }, +          { +            "index": 5, +            "headwordIndex": 0, +            "dictionary": "Test Dictionary 2", +            "dictionaryIndex": 0, +            "dictionaryPriority": 0, +            "hasReading": true, +            "frequency": 20, +            "displayValue": "twenty (20)", +            "displayValueParsed": true +          }, +          { +            "index": 6, +            "headwordIndex": 0, +            "dictionary": "Test Dictionary 2", +            "dictionaryIndex": 0, +            "dictionaryPriority": 0, +            "hasReading": true, +            "frequency": 26, +            "displayValue": null, +            "displayValueParsed": false +          } +        ]        }      ]    }, @@ -16978,7 +17146,7 @@      ]    },    { -    "name": "Test text transformations - convertNumericCharacters", +    "name": "Test text preprocessors - convertNumericCharacters",      "originalTextLength": 2,      "dictionaryEntries": [        { @@ -17043,7 +17211,7 @@      ]    },    { -    "name": "Test text transformations - convertAlphabeticCharacters", +    "name": "Test text preprocessors - convertAlphabeticCharacters",      "originalTextLength": 4,      "dictionaryEntries": [        { @@ -17389,7 +17557,7 @@      ]    },    { -    "name": "Test text transformations - convertKatakanaToHiragana", +    "name": "Test text preprocessors - convertKatakanaToHiragana",      "originalTextLength": 2,      "dictionaryEntries": [        { @@ -17735,7 +17903,7 @@      ]    },    { -    "name": "Test text transformations - convertHiraganaToKatakana", +    "name": "Test text preprocessors - convertHiraganaToKatakana",      "originalTextLength": 4,      "dictionaryEntries": [        { @@ -17844,7 +18012,7 @@      ]    },    { -    "name": "Test text transformations - convertHalfWidthCharacters, convertKatakanaToHiragana", +    "name": "Test text preprocessors - convertHalfWidthCharacters, convertKatakanaToHiragana",      "originalTextLength": 2,      "dictionaryEntries": [        { @@ -18190,7 +18358,7 @@      ]    },    { -    "name": "Test text transformations - collapseEmphaticSequences", +    "name": "Test text preprocessors - collapseEmphaticSequences",      "originalTextLength": 7,      "dictionaryEntries": [        { @@ -18200,6 +18368,13 @@            {              "source": "algorithm",              "inflectionRules": [] +          }, +          { +            "source": "algorithm", +            "inflectionRules": [ +              "adv", +              "kansai-ben" +            ]            }          ],          "score": 1, @@ -18267,5 +18442,167 @@          "frequencies": []        }      ] +  }, +  { +    "name": "Test text preprocessors - capitalizeFirstLetter", +    "originalTextLength": 7, +    "dictionaryEntries": [ +      { +        "type": "term", +        "isPrimary": true, +        "inflectionRuleChainCandidates": [ +          { +            "source": "algorithm", +            "inflectionRules": [] +          } +        ], +        "score": 1, +        "frequencyOrder": 0, +        "dictionaryIndex": 0, +        "dictionaryPriority": 0, +        "sourceTermExactMatchCount": 1, +        "maxTransformedTextLength": 7, +        "headwords": [ +          { +            "index": 0, +            "term": "English", +            "reading": "English", +            "sources": [ +              { +                "originalText": "english", +                "transformedText": "English", +                "deinflectedText": "English", +                "matchType": "exact", +                "matchSource": "term", +                "isPrimary": true +              } +            ], +            "tags": [], +            "wordClasses": [ +              "n" +            ] +          } +        ], +        "definitions": [ +          { +            "index": 0, +            "headwordIndices": [ +              0 +            ], +            "dictionary": "Test Dictionary 2", +            "dictionaryIndex": 0, +            "dictionaryPriority": 0, +            "id": 27, +            "score": 1, +            "frequencyOrder": 0, +            "sequences": [ +              19 +            ], +            "isPrimary": true, +            "tags": [ +              { +                "name": "n", +                "category": "partOfSpeech", +                "order": 0, +                "score": 0, +                "content": [ +                  "noun" +                ], +                "dictionaries": [ +                  "Test Dictionary 2" +                ], +                "redundant": false +              } +            ], +            "entries": [ +              "English definition" +            ] +          } +        ], +        "pronunciations": [], +        "frequencies": [] +      } +    ] +  }, +  { +    "name": "Test text preprocessors - decapitalize", +    "originalTextLength": 8, +    "dictionaryEntries": [ +      { +        "type": "term", +        "isPrimary": true, +        "inflectionRuleChainCandidates": [ +          { +            "source": "algorithm", +            "inflectionRules": [] +          } +        ], +        "score": 1, +        "frequencyOrder": 0, +        "dictionaryIndex": 0, +        "dictionaryPriority": 0, +        "sourceTermExactMatchCount": 1, +        "maxTransformedTextLength": 8, +        "headwords": [ +          { +            "index": 0, +            "term": "language", +            "reading": "language", +            "sources": [ +              { +                "originalText": "LANGUAGE", +                "transformedText": "language", +                "deinflectedText": "language", +                "matchType": "exact", +                "matchSource": "term", +                "isPrimary": true +              } +            ], +            "tags": [], +            "wordClasses": [ +              "n" +            ] +          } +        ], +        "definitions": [ +          { +            "index": 0, +            "headwordIndices": [ +              0 +            ], +            "dictionary": "Test Dictionary 2", +            "dictionaryIndex": 0, +            "dictionaryPriority": 0, +            "id": 28, +            "score": 1, +            "frequencyOrder": 0, +            "sequences": [ +              20 +            ], +            "isPrimary": true, +            "tags": [ +              { +                "name": "n", +                "category": "partOfSpeech", +                "order": 0, +                "score": 0, +                "content": [ +                  "noun" +                ], +                "dictionaries": [ +                  "Test Dictionary 2" +                ], +                "redundant": false +              } +            ], +            "entries": [ +              "language definition" +            ] +          } +        ], +        "pronunciations": [], +        "frequencies": [] +      } +    ]    }  ] diff --git a/test/options-util.test.js b/test/options-util.test.js index 3a1b1efb..8c2bd775 100644 --- a/test/options-util.test.js +++ b/test/options-util.test.js @@ -241,6 +241,7 @@ function createProfileOptionsUpdatedTestData1() {      return {          general: {              enable: true, +            language: 'ja',              resultOutputMode: 'group',              debugInfo: false,              maxResults: 32, @@ -405,12 +406,6 @@ function createProfileOptionsUpdatedTestData1() {              ]          },          translation: { -            convertHalfWidthCharacters: 'false', -            convertNumericCharacters: 'false', -            convertAlphabeticCharacters: 'false', -            convertHiraganaToKatakana: 'false', -            convertKatakanaToHiragana: 'variant', -            collapseEmphaticSequences: 'false',              searchResolution: 'letter',              textReplacements: {                  searchOriginal: true, @@ -604,7 +599,7 @@ function createOptionsUpdatedTestData1() {              }          ],          profileCurrent: 0, -        version: 25, +        version: 26,          global: {              database: {                  prefixWildcardsSupported: false diff --git a/test/utilities/translator.js b/test/utilities/translator.js index f452e688..e9c08443 100644 --- a/test/utilities/translator.js +++ b/test/utilities/translator.js @@ -124,14 +124,9 @@ export function createFindTermsOptions(dictionaryName, optionsPresets, optionsAr          sortFrequencyDictionary,          sortFrequencyDictionaryOrder,          removeNonJapaneseCharacters, -        convertHalfWidthCharacters, -        convertNumericCharacters, -        convertAlphabeticCharacters, -        convertHiraganaToKatakana, -        convertKatakanaToHiragana, -        collapseEmphaticSequences,          excludeDictionaryDefinitions, -        searchResolution +        searchResolution, +        language      } = preset;      return { @@ -141,15 +136,10 @@ export function createFindTermsOptions(dictionaryName, optionsPresets, optionsAr          sortFrequencyDictionary: typeof sortFrequencyDictionary !== 'undefined' ? sortFrequencyDictionary : null,          sortFrequencyDictionaryOrder: typeof sortFrequencyDictionaryOrder !== 'undefined' ? sortFrequencyDictionaryOrder : 'ascending',          removeNonJapaneseCharacters: typeof removeNonJapaneseCharacters !== 'undefined' ? removeNonJapaneseCharacters : false, -        convertHalfWidthCharacters: typeof convertHalfWidthCharacters !== 'undefined' ? convertHalfWidthCharacters : 'false', -        convertNumericCharacters: typeof convertNumericCharacters !== 'undefined' ? convertNumericCharacters : 'false', -        convertAlphabeticCharacters: typeof convertAlphabeticCharacters !== 'undefined' ? convertAlphabeticCharacters : 'false', -        convertHiraganaToKatakana: typeof convertHiraganaToKatakana !== 'undefined' ? convertHiraganaToKatakana : 'false', -        convertKatakanaToHiragana: typeof convertKatakanaToHiragana !== 'undefined' ? convertKatakanaToHiragana : 'false', -        collapseEmphaticSequences: typeof collapseEmphaticSequences !== 'undefined' ? collapseEmphaticSequences : 'false',          textReplacements,          enabledDictionaryMap,          excludeDictionaryDefinitions: Array.isArray(excludeDictionaryDefinitions) ? new Set(excludeDictionaryDefinitions) : null, -        searchResolution: typeof searchResolution !== 'undefined' ? searchResolution : 'letter' +        searchResolution: typeof searchResolution !== 'undefined' ? searchResolution : 'letter', +        language: typeof language !== 'undefined' ? language : 'ja'      };  } diff --git a/types/ext/api.d.ts b/types/ext/api.d.ts index 1f4fc0a9..85f4c146 100644 --- a/types/ext/api.d.ts +++ b/types/ext/api.d.ts @@ -26,6 +26,7 @@ import type * as DictionaryDatabase from './dictionary-database';  import type * as DictionaryImporter from './dictionary-importer';  import type * as Environment from './environment';  import type * as Extension from './extension'; +import type * as Language from './language';  import type * as Log from './log';  import type * as Settings from './settings';  import type * as SettingsModifications from './settings-modifications'; @@ -380,6 +381,10 @@ type ApiSurface = {          params: void;          return: boolean;      }; +    getLanguageSummaries: { +        params: void; +        return: Language.LanguageSummary[]; +    };  };  type ApiExtraArgs = [sender: chrome.runtime.MessageSender]; diff --git a/types/ext/language-english.d.ts b/types/ext/language-english.d.ts new file mode 100644 index 00000000..ed501d57 --- /dev/null +++ b/types/ext/language-english.d.ts @@ -0,0 +1,25 @@ +/* + * Copyright (C) 2024  Yomitan Authors + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program.  If not, see <https://www.gnu.org/licenses/>. + */ + +import type {LanguageDescriptor, TextPreprocessor} from './language'; + +export type EnglishTextPreprocessorDescriptor = { +    capitalizeFirstLetter: TextPreprocessor<boolean>; +    decapitalize: TextPreprocessor<boolean>; +}; + +export type EnglishLanguageDescriptor = LanguageDescriptor<EnglishTextPreprocessorDescriptor>; diff --git a/types/ext/language-japanese.d.ts b/types/ext/language-japanese.d.ts new file mode 100644 index 00000000..1a627ed1 --- /dev/null +++ b/types/ext/language-japanese.d.ts @@ -0,0 +1,29 @@ +/* + * Copyright (C) 2024  Yomitan Authors + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program.  If not, see <https://www.gnu.org/licenses/>. + */ + +import type {LanguageDescriptor, TextPreprocessor} from './language'; + +export type JapaneseTextPreprocessorDescriptor = { +    convertHalfWidthCharacters: TextPreprocessor<boolean>; +    convertNumericCharacters: TextPreprocessor<boolean>; +    convertAlphabeticCharacters: TextPreprocessor<boolean>; +    convertHiraganaToKatakana: TextPreprocessor<boolean>; +    convertKatakanaToHiragana: TextPreprocessor<boolean>; +    collapseEmphaticSequences: TextPreprocessor<[collapseEmphatic: boolean, collapseEmphaticFull: boolean]>; +}; + +export type JapaneseLanguageDescriptor = LanguageDescriptor<JapaneseTextPreprocessorDescriptor>; diff --git a/types/ext/language.d.ts b/types/ext/language.d.ts new file mode 100644 index 00000000..247c7795 --- /dev/null +++ b/types/ext/language.d.ts @@ -0,0 +1,57 @@ +/* + * Copyright (C) 2024  Yomitan Authors + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program.  If not, see <https://www.gnu.org/licenses/>. + */ + +import type {TextSourceMap} from '../../ext/js/general/text-source-map.js'; +import type {SafeAny} from './core'; + +export type TextPreprocessorOptions<T = unknown> = T[]; + +export type TextPreprocessorFunction<T = unknown> = (str: string, setting: T, sourceMap: TextSourceMap) => string; + +export type TextPreprocessor<T = unknown> = { +    name: string; +    description: string; +    options: TextPreprocessorOptions<T>; +    process: TextPreprocessorFunction<T>; +}; + +export type LanguageAndPreprocessors = { +    iso: string; +    textPreprocessors: TextPreprocessorWithId<unknown>[]; +}; + +export type TextPreprocessorWithId<T = unknown> = { +    id: string; +    textPreprocessor: TextPreprocessor<T>; +}; + +export type LanguageSummary = { +    name: string; +    iso: string; +    exampleText: string; +}; + +export type LanguageDescriptor<TTextPreprocessorDescriptor extends TextPreprocessorDescriptor> = { +    name: string; +    iso: string; +    exampleText: string; +    textPreprocessors: TTextPreprocessorDescriptor; +}; + +export type TextPreprocessorDescriptor = { +    [key: string]: TextPreprocessor<SafeAny>; +}; diff --git a/types/ext/settings.d.ts b/types/ext/settings.d.ts index a900dbe6..45466c3d 100644 --- a/types/ext/settings.d.ts +++ b/types/ext/settings.d.ts @@ -101,6 +101,7 @@ export type ProfileOptions = {  export type GeneralOptions = {      enable: boolean; +    language: string;      resultOutputMode: ResultOutputMode;      debugInfo: boolean;      maxResults: number; diff --git a/types/ext/translation-internal.d.ts b/types/ext/translation-internal.d.ts index 82704c54..7006221e 100644 --- a/types/ext/translation-internal.d.ts +++ b/types/ext/translation-internal.d.ts @@ -18,6 +18,7 @@  import type * as DictionaryDatabase from './dictionary-database';  import type * as Dictionary from './dictionary';  import type * as Translation from './translation'; +import type * as Language from './language';  export type TextDeinflectionOptions = [      textReplacements: Translation.FindTermsTextReplacement[] | null, @@ -47,3 +48,5 @@ export type DatabaseDeinflection = {      inflectionRuleChainCandidates: Dictionary.InflectionRuleChainCandidate[];      databaseEntries: DictionaryDatabase.TermEntry[];  }; + +export type PreprocessorOptionsSpace = Map<string, Language.TextPreprocessorOptions<unknown>>; diff --git a/types/ext/translation.d.ts b/types/ext/translation.d.ts index c9a61be0..2e4d1a66 100644 --- a/types/ext/translation.d.ts +++ b/types/ext/translation.d.ts @@ -81,30 +81,6 @@ export type FindTermsOptions = {       */      removeNonJapaneseCharacters: boolean;      /** -     * Whether or not half-width characters should be converted to full-width characters. -     */ -    convertHalfWidthCharacters: FindTermsVariantMode; -    /** -     * Whether or not ASCII numeric characters should be converted to full-width numeric characters. -     */ -    convertNumericCharacters: FindTermsVariantMode; -    /** -     * Whether or not alphabetic characters should be converted to kana. -     */ -    convertAlphabeticCharacters: FindTermsVariantMode; -    /** -     * Whether or not hiragana characters should be converted to katakana. -     */ -    convertHiraganaToKatakana: FindTermsVariantMode; -    /** -     * Whether or not katakana characters should be converted to hiragana. -     */ -    convertKatakanaToHiragana: FindTermsVariantMode; -    /** -     * How emphatic character sequences should be collapsed. -     */ -    collapseEmphaticSequences: FindTermsEmphaticSequencesMode; -    /**       * An iterable sequence of text replacements to be applied during the term lookup process.       */      textReplacements: FindTermsTextReplacements; @@ -121,6 +97,10 @@ export type FindTermsOptions = {       * Whether every substring should be searched for, or only whole words.       */      searchResolution: SearchResolution; +    /** +     * ISO-639 code of the language. +     */ +    language: string;  };  /** @@ -134,16 +114,6 @@ export type FindTermsMatchType = Dictionary.TermSourceMatchType;  export type FindTermsSortOrder = 'ascending' | 'descending';  /** - * Mode describing how to handle variations. - */ -export type FindTermsVariantMode = 'false' | 'true' | 'variant'; - -/** - * Mode describing how to handle emphatic sequence variations. - */ -export type FindTermsEmphaticSequencesMode = 'false' | 'true' | 'full'; - -/**   * Information about how text should be replaced when looking up terms.   */  export type FindTermsTextReplacement = { diff --git a/types/test/translator.d.ts b/types/test/translator.d.ts index e3199225..efd5cc3f 100644 --- a/types/test/translator.d.ts +++ b/types/test/translator.d.ts @@ -15,8 +15,8 @@   * along with this program.  If not, see <https://www.gnu.org/licenses/>.   */ -import type {FindTermsMatchType, FindTermsSortOrder, FindTermsVariantMode, FindTermsEmphaticSequencesMode, FindKanjiDictionary, FindTermDictionary} from '../ext/translation'; -import type {SearchResolution} from 'settings'; +import type {FindTermsMatchType, FindTermsSortOrder, FindKanjiDictionary, FindTermDictionary} from '../ext/translation'; +import type {SearchResolution} from '../ext/settings';  import type {FindTermsMode} from 'translator';  import type {DictionaryEntry} from 'dictionary';  import type {NoteData} from 'anki-templates'; @@ -44,16 +44,11 @@ export type FindTermsOptionsPreset = {      sortFrequencyDictionary?: string | null;      sortFrequencyDictionaryOrder?: FindTermsSortOrder;      removeNonJapaneseCharacters?: boolean; -    convertHalfWidthCharacters?: FindTermsVariantMode; -    convertNumericCharacters?: FindTermsVariantMode; -    convertAlphabeticCharacters?: FindTermsVariantMode; -    convertHiraganaToKatakana?: FindTermsVariantMode; -    convertKatakanaToHiragana?: FindTermsVariantMode; -    collapseEmphaticSequences?: FindTermsEmphaticSequencesMode;      textReplacements?: (FindTermsTextReplacement[] | null)[];      enabledDictionaryMap?: [key: string, value: FindTermDictionary][];      excludeDictionaryDefinitions?: string[] | null;      searchResolution?: SearchResolution; +    language?: string;  };  export type OptionsType = OptionsPreset['type']; |