diff options
| author | StefanVukovic99 <stefanvukovic44@gmail.com> | 2024-05-22 22:45:39 +0200 | 
|---|---|---|
| committer | GitHub <noreply@github.com> | 2024-05-22 20:45:39 +0000 | 
| commit | d19b898792bffed8ab2d5724472e5b65a5f5b146 (patch) | |
| tree | b3e0d5111d748dfcc5d74d9dbf68e79193fa6a7f | |
| parent | 125cde3d98c18b08e71e075b4a9776fc7bd4b4a0 (diff) | |
[ja] add preprocessor for width of alphabetic characters (#964)
* add japanese text preprocessor for variants in width of alphabetic characters
* try combining with numeric to improve performance
* Update ext/js/language/ja/japanese.js
Co-authored-by: Kuuuube <61125188+Kuuuube@users.noreply.github.com>
Signed-off-by: StefanVukovic99 <stefanvukovic44@gmail.com>
* Update ext/js/language/ja/japanese.js
Co-authored-by: Kuuuube <61125188+Kuuuube@users.noreply.github.com>
Signed-off-by: StefanVukovic99 <stefanvukovic44@gmail.com>
* fix tests
---------
Signed-off-by: StefanVukovic99 <stefanvukovic44@gmail.com>
Co-authored-by: Kuuuube <61125188+Kuuuube@users.noreply.github.com>
| -rw-r--r-- | ext/js/language/ja/japanese-text-preprocessors.js | 31 | ||||
| -rw-r--r-- | ext/js/language/ja/japanese.js | 32 | ||||
| -rw-r--r-- | ext/js/language/language-descriptors.js | 12 | ||||
| -rw-r--r-- | test/data/anki-note-builder-test-results.json | 94 | ||||
| -rw-r--r-- | test/data/database-test-cases.json | 6 | ||||
| -rw-r--r-- | test/data/dictionaries/valid-dictionary1/term_bank_1.json | 3 | ||||
| -rw-r--r-- | test/data/translator-test-inputs.json | 28 | ||||
| -rw-r--r-- | test/data/translator-test-results-note-data1.json | 234 | ||||
| -rw-r--r-- | test/data/translator-test-results.json | 168 | ||||
| -rw-r--r-- | test/japanese-util.test.js | 6 | ||||
| -rw-r--r-- | types/ext/language-descriptors.d.ts | 4 | ||||
| -rw-r--r-- | types/ext/settings.d.ts | 2 | 
12 files changed, 585 insertions, 35 deletions
| diff --git a/ext/js/language/ja/japanese-text-preprocessors.js b/ext/js/language/ja/japanese-text-preprocessors.js index b3d50817..32e45c83 100644 --- a/ext/js/language/ja/japanese-text-preprocessors.js +++ b/ext/js/language/ja/japanese-text-preprocessors.js @@ -19,10 +19,11 @@ import {basicTextProcessorOptions} from '../text-processors.js';  import {convertAlphabeticToKana} from './japanese-wanakana.js';  import {      collapseEmphaticSequences as collapseEmphaticSequencesFunction, +    convertAlphanumericToFullWidth, +    convertFullWidthAlphanumericToNormal,      convertHalfWidthKanaToFullWidth,      convertHiraganaToKatakana as convertHiraganaToKatakanaFunction, -    convertKatakanaToHiragana as convertKatakanaToHiraganaFunction, -    convertNumericToFullWidth +    convertKatakanaToHiragana as convertKatakanaToHiraganaFunction  } from './japanese.js';  /** @type {import('language').TextProcessor<boolean>} */ @@ -33,16 +34,9 @@ export const convertHalfWidthCharacters = {      process: (str, setting) => (setting ? convertHalfWidthKanaToFullWidth(str) : str)  }; -/** @type {import('language').TextProcessor<boolean>} */ -export const convertNumericCharacters = { -    name: 'Convert numeric characters to full width', -    description: '1234 → 1234', -    options: basicTextProcessorOptions, -    process: (str, setting) => (setting ? convertNumericToFullWidth(str) : str) -};  /** @type {import('language').TextProcessor<boolean>} */ -export const convertAlphabeticCharacters = { +export const alphabeticToHiragana = {      name: 'Convert alphabetic characters to hiragana',      description: 'yomichan → よみちゃん',      options: basicTextProcessorOptions, @@ -50,6 +44,23 @@ export const convertAlphabeticCharacters = {  };  /** @type {import('language').BidirectionalConversionPreprocessor} */ +export const alphanumericWidthVariants = { +    name: 'Convert between alphabetic width variants', +    description: 'yomitan → yomitan and vice versa', +    options: ['off', 'direct', 'inverse'], +    process: (str, setting) => { +        switch (setting) { +            case 'off': +                return str; +            case 'direct': +                return convertFullWidthAlphanumericToNormal(str); +            case 'inverse': +                return convertAlphanumericToFullWidth(str); +        } +    } +}; + +/** @type {import('language').BidirectionalConversionPreprocessor} */  export const convertHiraganaToKatakana = {      name: 'Convert hiragana to katakana',      description: 'よみちゃん → ヨミチャン and vice versa', diff --git a/ext/js/language/ja/japanese.js b/ext/js/language/ja/japanese.js index 3507e5df..2200e077 100644 --- a/ext/js/language/ja/japanese.js +++ b/ext/js/language/ja/japanese.js @@ -15,6 +15,7 @@   * along with this program.  If not, see <https://www.gnu.org/licenses/>.   */ +  const HIRAGANA_SMALL_TSU_CODE_POINT = 0x3063;  const KATAKANA_SMALL_TSU_CODE_POINT = 0x30c3;  const KATAKANA_SMALL_KA_CODE_POINT = 0x30f5; @@ -523,16 +524,39 @@ export function convertHiraganaToKatakana(text) {   * @param {string} text   * @returns {string}   */ -export function convertNumericToFullWidth(text) { +export function convertAlphanumericToFullWidth(text) {      let result = '';      for (const char of text) {          let c = /** @type {number} */ (char.codePointAt(0));          if (c >= 0x30 && c <= 0x39) { // ['0', '9']              c += 0xff10 - 0x30; // 0xff10 = '0' full width -            result += String.fromCodePoint(c); -        } else { -            result += char; +        } else if (c >= 0x41 && c <= 0x5a) { // ['A', 'Z'] +            c += 0xff21 - 0x41; // 0xff21 = 'A' full width +        } else if (c >= 0x61 && c <= 0x7a) { // ['a', 'z'] +            c += 0xff41 - 0x61; // 0xff41 = 'a' full width +        } +        result += String.fromCodePoint(c); +    } +    return result; +} + +/** + * @param {string} text + * @returns {string} + */ +export function convertFullWidthAlphanumericToNormal(text) { +    let result = ''; +    const length = text.length; +    for (let i = 0; i < length; i++) { +        let c = /** @type {number} */ (text[i].codePointAt(0)); +        if (c >= 0xff10 && c <= 0xff19) { // ['0', '9'] +            c -= 0xff10 - 0x30; // 0x30 = '0' +        } else if (c >= 0xff21 && c <= 0xff3a) { // ['A', 'Z'] +            c -= 0xff21 - 0x41; // 0x41 = 'A' +        } else if (c >= 0xff41 && c <= 0xff5a) { // ['a', 'z'] +            c -= 0xff41 - 0x61; // 0x61 = 'a'          } +        result += String.fromCodePoint(c);      }      return result;  } diff --git a/ext/js/language/language-descriptors.js b/ext/js/language/language-descriptors.js index 726842f1..baf53f81 100644 --- a/ext/js/language/language-descriptors.js +++ b/ext/js/language/language-descriptors.js @@ -19,7 +19,13 @@ import {removeArabicScriptDiacritics} from './ar/arabic-text-preprocessors.js';  import {eszettPreprocessor} from './de/german-text-preprocessors.js';  import {germanTransforms} from './de/german-transforms.js';  import {englishTransforms} from './en/english-transforms.js'; -import {collapseEmphaticSequences, convertAlphabeticCharacters, convertHalfWidthCharacters, convertHiraganaToKatakana, convertNumericCharacters} from './ja/japanese-text-preprocessors.js'; +import { +    alphabeticToHiragana, +    alphanumericWidthVariants, +    collapseEmphaticSequences, +    convertHalfWidthCharacters, +    convertHiraganaToKatakana +} from './ja/japanese-text-preprocessors.js';  import {japaneseTransforms} from './ja/japanese-transforms.js';  import {isStringPartiallyJapanese} from './ja/japanese.js';  import {disassembleHangul, reassembleHangul} from './ko/korean-text-processors.js'; @@ -143,8 +149,8 @@ const languageDescriptors = [          isTextLookupWorthy: isStringPartiallyJapanese,          textPreprocessors: {              convertHalfWidthCharacters, -            convertNumericCharacters, -            convertAlphabeticCharacters, +            alphabeticToHiragana, +            alphanumericWidthVariants,              convertHiraganaToKatakana,              collapseEmphaticSequences          }, diff --git a/test/data/anki-note-builder-test-results.json b/test/data/anki-note-builder-test-results.json index 1ec8d4cd..b4badf10 100644 --- a/test/data/anki-note-builder-test-results.json +++ b/test/data/anki-note-builder-test-results.json @@ -3874,7 +3874,99 @@      ]    },    { -    "name": "Test text preprocessors - convertAlphabeticCharacters", +    "name": "Test text preprocessors - alphabeticWidthVariants 1", +    "results": [ +      { +        "audio": "", +        "clipboard-image": "", +        "clipboard-text": "", +        "cloze-body": "English", +        "cloze-body-kana": "English", +        "cloze-prefix": "cloze-prefix", +        "cloze-suffix": "cloze-suffix", +        "conjugation": "", +        "dictionary": "Test Dictionary 2", +        "document-title": "title", +        "expression": "English", +        "frequencies": "", +        "frequency-harmonic-rank": "9999999", +        "frequency-harmonic-occurrence": "0", +        "frequency-average-rank": "9999999", +        "frequency-average-occurrence": "0", +        "furigana": "English", +        "furigana-plain": "English", +        "glossary": "<div style=\"text-align: left;\"><i>(n, Test Dictionary 2)</i> English definition</div>", +        "glossary-brief": "<div style=\"text-align: left;\">English definition</div>", +        "glossary-no-dictionary": "<div style=\"text-align: left;\"><i>(n)</i> English definition</div>", +        "glossary-first": "<div style=\"text-align: left;\"><i>(n, Test Dictionary 2)</i> English definition</div>", +        "glossary-first-brief": "<div style=\"text-align: left;\">English definition</div>", +        "glossary-first-no-dictionary": "<div style=\"text-align: left;\"><i>(n)</i> English definition</div>", +        "part-of-speech": "Noun", +        "pitch-accents": "", +        "pitch-accent-graphs": "", +        "pitch-accent-graphs-jj": "", +        "pitch-accent-positions": "", +        "pitch-accent-categories": "", +        "phonetic-transcriptions": "", +        "reading": "English", +        "screenshot": "", +        "search-query": "fullQuery", +        "selection-text": "", +        "sentence": "cloze-prefixEnglishcloze-suffix", +        "sentence-furigana": "cloze-prefixEnglishcloze-suffix", +        "tags": "n", +        "url": "<a href=\"url:\">url:</a>" +      } +    ] +  }, +  { +    "name": "Test text preprocessors - alphabeticWidthVariants 2", +    "results": [ +      { +        "audio": "", +        "clipboard-image": "", +        "clipboard-text": "", +        "cloze-body": "USB", +        "cloze-body-kana": "USB", +        "cloze-prefix": "cloze-prefix", +        "cloze-suffix": "cloze-suffix", +        "conjugation": "", +        "dictionary": "Test Dictionary 2", +        "document-title": "title", +        "expression": "USB", +        "frequencies": "", +        "frequency-harmonic-rank": "9999999", +        "frequency-harmonic-occurrence": "0", +        "frequency-average-rank": "9999999", +        "frequency-average-occurrence": "0", +        "furigana": "<ruby>USB<rt>ユーエスビー</rt></ruby>", +        "furigana-plain": "USB[ユーエスビー]", +        "glossary": "<div style=\"text-align: left;\"><i>(n, Test Dictionary 2)</i> USB definition</div>", +        "glossary-brief": "<div style=\"text-align: left;\">USB definition</div>", +        "glossary-no-dictionary": "<div style=\"text-align: left;\"><i>(n)</i> USB definition</div>", +        "glossary-first": "<div style=\"text-align: left;\"><i>(n, Test Dictionary 2)</i> USB definition</div>", +        "glossary-first-brief": "<div style=\"text-align: left;\">USB definition</div>", +        "glossary-first-no-dictionary": "<div style=\"text-align: left;\"><i>(n)</i> USB definition</div>", +        "part-of-speech": "Noun", +        "pitch-accents": "", +        "pitch-accent-graphs": "", +        "pitch-accent-graphs-jj": "", +        "pitch-accent-positions": "", +        "pitch-accent-categories": "", +        "phonetic-transcriptions": "", +        "reading": "ユーエスビー", +        "screenshot": "", +        "search-query": "fullQuery", +        "selection-text": "", +        "sentence": "cloze-prefixUSBcloze-suffix", +        "sentence-furigana": "cloze-prefixUSBcloze-suffix", +        "tags": "n", +        "url": "<a href=\"url:\">url:</a>" +      } +    ] +  }, +  { +    "name": "Test text preprocessors - alphabeticToHiragana",      "results": [        {          "audio": "", diff --git a/test/data/database-test-cases.json b/test/data/database-test-cases.json index 054ede34..04b98aa9 100644 --- a/test/data/database-test-cases.json +++ b/test/data/database-test-cases.json @@ -27,7 +27,7 @@          "ipa": 1        },        "terms": { -        "total": 30 +        "total": 31        }      }    }, @@ -36,7 +36,7 @@        {          "kanji": 2,          "kanjiMeta": 6, -        "terms": 30, +        "terms": 31,          "termMeta": 39,          "tagMeta": 15,          "media": 6 @@ -45,7 +45,7 @@      "total": {        "kanji": 2,        "kanjiMeta": 6, -      "terms": 30, +      "terms": 31,        "termMeta": 39,        "tagMeta": 15,        "media": 6 diff --git a/test/data/dictionaries/valid-dictionary1/term_bank_1.json b/test/data/dictionaries/valid-dictionary1/term_bank_1.json index a362de23..0a635b84 100644 --- a/test/data/dictionaries/valid-dictionary1/term_bank_1.json +++ b/test/data/dictionaries/valid-dictionary1/term_bank_1.json @@ -344,5 +344,6 @@      ["凄い", "すごい", "adj-i", "adj-i", 1, ["sugoi definition"], 18, ""],      ["English", "", "n", "n", 1, ["English definition"], 19, ""],      ["language", "", "n", "n", 1, ["language definition"], 20, ""], -    ["마시다", "", "v", "v", 1, ["masida definition"], 21, ""] +    ["USB", "ユーエスビー", "n", "n", 1, ["USB definition"], 21, ""], +    ["마시다", "", "v", "v", 1, ["masida definition"], 22, ""]  ] diff --git a/test/data/translator-test-inputs.json b/test/data/translator-test-inputs.json index 9f508a1d..ed028273 100644 --- a/test/data/translator-test-inputs.json +++ b/test/data/translator-test-inputs.json @@ -369,7 +369,33 @@              ]          },          { -            "name": "Test text preprocessors - convertAlphabeticCharacters", +            "name": "Test text preprocessors - alphabeticWidthVariants 1", +            "func": "findTerms", +            "mode": "split", +            "text": "English", +            "options": [ +                "default", +                { +                    "type": "terms", +                    "removeNonJapaneseCharacters": false +                } +            ] +        }, +        { +            "name": "Test text preprocessors - alphabeticWidthVariants 2", +            "func": "findTerms", +            "mode": "split", +            "text": "USB", +            "options": [ +                "default", +                { +                    "type": "terms", +                    "removeNonJapaneseCharacters": false +                } +            ] +        }, +        { +            "name": "Test text preprocessors - alphabeticToHiragana",              "func": "findTerms",              "mode": "split",              "text": "utsu", diff --git a/test/data/translator-test-results-note-data1.json b/test/data/translator-test-results-note-data1.json index 0782c8f5..11c9d078 100644 --- a/test/data/translator-test-results-note-data1.json +++ b/test/data/translator-test-results-note-data1.json @@ -30533,7 +30533,235 @@      ]    },    { -    "name": "Test text preprocessors - convertAlphabeticCharacters", +    "name": "Test text preprocessors - alphabeticWidthVariants 1", +    "noteDataList": [ +      { +        "marker": "{marker}", +        "definition": { +          "type": "term", +          "id": 27, +          "source": "English", +          "rawSource": "English", +          "sourceTerm": "English", +          "inflectionRuleChainCandidates": [ +            { +              "source": "algorithm", +              "inflectionRules": [] +            } +          ], +          "score": 1, +          "isPrimary": true, +          "sequence": 19, +          "dictionary": "Test Dictionary 2", +          "dictionaryOrder": { +            "index": 0, +            "priority": 0 +          }, +          "dictionaryNames": [ +            "Test Dictionary 2" +          ], +          "expression": "English", +          "reading": "English", +          "expressions": [ +            { +              "sourceTerm": "English", +              "expression": "English", +              "reading": "English", +              "termTags": [], +              "frequencies": [], +              "pitches": [], +              "furiganaSegments": [ +                { +                  "text": "English", +                  "furigana": "" +                } +              ], +              "termFrequency": "normal", +              "wordClasses": [ +                "n" +              ] +            } +          ], +          "glossary": [ +            "English definition" +          ], +          "definitionTags": [ +            { +              "name": "n", +              "category": "partOfSpeech", +              "notes": "noun", +              "order": 0, +              "score": 0, +              "dictionary": "Test Dictionary 2", +              "redundant": false +            } +          ], +          "termTags": [], +          "frequencies": [], +          "frequencyHarmonic": -1, +          "frequencyAverage": -1, +          "pitches": [], +          "phoneticTranscriptions": [], +          "sourceTermExactMatchCount": 1, +          "url": "url:", +          "cloze": { +            "sentence": "", +            "prefix": "", +            "body": "", +            "bodyKana": "", +            "suffix": "" +          }, +          "furiganaSegments": [ +            { +              "text": "English", +              "furigana": "" +            } +          ] +        }, +        "glossaryLayoutMode": "default", +        "compactTags": false, +        "group": false, +        "merge": false, +        "modeTermKanji": false, +        "modeTermKana": false, +        "modeKanji": false, +        "compactGlossaries": false, +        "uniqueExpressions": [ +          "English" +        ], +        "uniqueReadings": [ +          "English" +        ], +        "pitches": [], +        "pitchCount": 0, +        "phoneticTranscriptions": [], +        "context": { +          "query": "query", +          "fullQuery": "fullQuery", +          "document": { +            "title": "title" +          } +        }, +        "media": {} +      } +    ] +  }, +  { +    "name": "Test text preprocessors - alphabeticWidthVariants 2", +    "noteDataList": [ +      { +        "marker": "{marker}", +        "definition": { +          "type": "term", +          "id": 29, +          "source": "USB", +          "rawSource": "USB", +          "sourceTerm": "USB", +          "inflectionRuleChainCandidates": [ +            { +              "source": "algorithm", +              "inflectionRules": [] +            } +          ], +          "score": 1, +          "isPrimary": true, +          "sequence": 21, +          "dictionary": "Test Dictionary 2", +          "dictionaryOrder": { +            "index": 0, +            "priority": 0 +          }, +          "dictionaryNames": [ +            "Test Dictionary 2" +          ], +          "expression": "USB", +          "reading": "ユーエスビー", +          "expressions": [ +            { +              "sourceTerm": "USB", +              "expression": "USB", +              "reading": "ユーエスビー", +              "termTags": [], +              "frequencies": [], +              "pitches": [], +              "furiganaSegments": [ +                { +                  "text": "USB", +                  "furigana": "ユーエスビー" +                } +              ], +              "termFrequency": "normal", +              "wordClasses": [ +                "n" +              ] +            } +          ], +          "glossary": [ +            "USB definition" +          ], +          "definitionTags": [ +            { +              "name": "n", +              "category": "partOfSpeech", +              "notes": "noun", +              "order": 0, +              "score": 0, +              "dictionary": "Test Dictionary 2", +              "redundant": false +            } +          ], +          "termTags": [], +          "frequencies": [], +          "frequencyHarmonic": -1, +          "frequencyAverage": -1, +          "pitches": [], +          "phoneticTranscriptions": [], +          "sourceTermExactMatchCount": 1, +          "url": "url:", +          "cloze": { +            "sentence": "", +            "prefix": "", +            "body": "", +            "bodyKana": "", +            "suffix": "" +          }, +          "furiganaSegments": [ +            { +              "text": "USB", +              "furigana": "ユーエスビー" +            } +          ] +        }, +        "glossaryLayoutMode": "default", +        "compactTags": false, +        "group": false, +        "merge": false, +        "modeTermKanji": false, +        "modeTermKana": false, +        "modeKanji": false, +        "compactGlossaries": false, +        "uniqueExpressions": [ +          "USB" +        ], +        "uniqueReadings": [ +          "ユーエスビー" +        ], +        "pitches": [], +        "pitchCount": 0, +        "phoneticTranscriptions": [], +        "context": { +          "query": "query", +          "fullQuery": "fullQuery", +          "document": { +            "title": "title" +          } +        }, +        "media": {} +      } +    ] +  }, +  { +    "name": "Test text preprocessors - alphabeticToHiragana",      "noteDataList": [        {          "marker": "{marker}", @@ -32948,7 +33176,7 @@          "marker": "{marker}",          "definition": {            "type": "term", -          "id": 29, +          "id": 30,            "source": "ㅁㅏㅅㅣㄱㅓㄴㅏ",            "rawSource": "마시거나",            "sourceTerm": "마시다", @@ -32962,7 +33190,7 @@            ],            "score": 1,            "isPrimary": true, -          "sequence": 21, +          "sequence": 22,            "dictionary": "Test Dictionary 2",            "dictionaryOrder": {              "index": 0, diff --git a/test/data/translator-test-results.json b/test/data/translator-test-results.json index bc26e0f9..ba251347 100644 --- a/test/data/translator-test-results.json +++ b/test/data/translator-test-results.json @@ -17211,7 +17211,169 @@      ]    },    { -    "name": "Test text preprocessors - convertAlphabeticCharacters", +    "name": "Test text preprocessors - alphabeticWidthVariants 1", +    "originalTextLength": 7, +    "dictionaryEntries": [ +      { +        "type": "term", +        "isPrimary": true, +        "inflectionRuleChainCandidates": [ +          { +            "source": "algorithm", +            "inflectionRules": [] +          } +        ], +        "score": 1, +        "frequencyOrder": 0, +        "dictionaryIndex": 0, +        "dictionaryPriority": 0, +        "sourceTermExactMatchCount": 1, +        "maxOriginalTextLength": 7, +        "headwords": [ +          { +            "index": 0, +            "term": "English", +            "reading": "English", +            "sources": [ +              { +                "originalText": "English", +                "transformedText": "English", +                "deinflectedText": "English", +                "matchType": "exact", +                "matchSource": "term", +                "isPrimary": true +              } +            ], +            "tags": [], +            "wordClasses": [ +              "n" +            ] +          } +        ], +        "definitions": [ +          { +            "index": 0, +            "headwordIndices": [ +              0 +            ], +            "dictionary": "Test Dictionary 2", +            "dictionaryIndex": 0, +            "dictionaryPriority": 0, +            "id": 27, +            "score": 1, +            "frequencyOrder": 0, +            "sequences": [ +              19 +            ], +            "isPrimary": true, +            "tags": [ +              { +                "name": "n", +                "category": "partOfSpeech", +                "order": 0, +                "score": 0, +                "content": [ +                  "noun" +                ], +                "dictionaries": [ +                  "Test Dictionary 2" +                ], +                "redundant": false +              } +            ], +            "entries": [ +              "English definition" +            ] +          } +        ], +        "pronunciations": [], +        "frequencies": [] +      } +    ] +  }, +  { +    "name": "Test text preprocessors - alphabeticWidthVariants 2", +    "originalTextLength": 3, +    "dictionaryEntries": [ +      { +        "type": "term", +        "isPrimary": true, +        "inflectionRuleChainCandidates": [ +          { +            "source": "algorithm", +            "inflectionRules": [] +          } +        ], +        "score": 1, +        "frequencyOrder": 0, +        "dictionaryIndex": 0, +        "dictionaryPriority": 0, +        "sourceTermExactMatchCount": 1, +        "maxOriginalTextLength": 3, +        "headwords": [ +          { +            "index": 0, +            "term": "USB", +            "reading": "ユーエスビー", +            "sources": [ +              { +                "originalText": "USB", +                "transformedText": "USB", +                "deinflectedText": "USB", +                "matchType": "exact", +                "matchSource": "term", +                "isPrimary": true +              } +            ], +            "tags": [], +            "wordClasses": [ +              "n" +            ] +          } +        ], +        "definitions": [ +          { +            "index": 0, +            "headwordIndices": [ +              0 +            ], +            "dictionary": "Test Dictionary 2", +            "dictionaryIndex": 0, +            "dictionaryPriority": 0, +            "id": 29, +            "score": 1, +            "frequencyOrder": 0, +            "sequences": [ +              21 +            ], +            "isPrimary": true, +            "tags": [ +              { +                "name": "n", +                "category": "partOfSpeech", +                "order": 0, +                "score": 0, +                "content": [ +                  "noun" +                ], +                "dictionaries": [ +                  "Test Dictionary 2" +                ], +                "redundant": false +              } +            ], +            "entries": [ +              "USB definition" +            ] +          } +        ], +        "pronunciations": [], +        "frequencies": [] +      } +    ] +  }, +  { +    "name": "Test text preprocessors - alphabeticToHiragana",      "originalTextLength": 4,      "dictionaryEntries": [        { @@ -18656,11 +18818,11 @@              "dictionary": "Test Dictionary 2",              "dictionaryIndex": 0,              "dictionaryPriority": 0, -            "id": 29, +            "id": 30,              "score": 1,              "frequencyOrder": 0,              "sequences": [ -              21 +              22              ],              "isPrimary": true,              "tags": [ diff --git a/test/japanese-util.test.js b/test/japanese-util.test.js index bff51f85..63ac5cdf 100644 --- a/test/japanese-util.test.js +++ b/test/japanese-util.test.js @@ -178,17 +178,17 @@ describe('Japanese utility functions', () => {          });      }); -    describe('convertNumericToFullWidth', () => { +    describe('convertAlphanumericToFullWidth', () => {          /** @type {[string: string, expected: string][]} */          const data = [              ['0123456789', '0123456789'], -            ['abcdefghij', 'abcdefghij'], +            ['abcdefghij', 'abcdefghij'],              ['カタカナ', 'カタカナ'],              ['ひらがな', 'ひらがな']          ];          test.each(data)('%s -> %o', (string, expected) => { -            expect(jp.convertNumericToFullWidth(string)).toStrictEqual(expected); +            expect(jp.convertAlphanumericToFullWidth(string)).toStrictEqual(expected);          });      }); diff --git a/types/ext/language-descriptors.d.ts b/types/ext/language-descriptors.d.ts index aa961a6c..69ccec44 100644 --- a/types/ext/language-descriptors.d.ts +++ b/types/ext/language-descriptors.d.ts @@ -118,8 +118,8 @@ type AllTextProcessors = {      ja: {          pre: {              convertHalfWidthCharacters: TextProcessor<boolean>; -            convertNumericCharacters: TextProcessor<boolean>; -            convertAlphabeticCharacters: TextProcessor<boolean>; +            alphabeticToHiragana: TextProcessor<boolean>; +            alphanumericWidthVariants: BidirectionalConversionPreprocessor;              convertHiraganaToKatakana: BidirectionalConversionPreprocessor;              collapseEmphaticSequences: TextProcessor<[collapseEmphatic: boolean, collapseEmphaticFull: boolean]>;          }; diff --git a/types/ext/settings.d.ts b/types/ext/settings.d.ts index 48a66728..b69679b0 100644 --- a/types/ext/settings.d.ts +++ b/types/ext/settings.d.ts @@ -231,7 +231,7 @@ export type ScanningPreventMiddleMouseOptions = {  export type TranslationOptions = {      convertHalfWidthCharacters: TranslationConvertType;      convertNumericCharacters: TranslationConvertType; -    convertAlphabeticCharacters: TranslationConvertType; +    alphabeticToHiragana: TranslationConvertType;      convertHiraganaToKatakana: TranslationConvertType;      convertKatakanaToHiragana: TranslationConvertType;      collapseEmphaticSequences: TranslationCollapseEmphaticSequences; |