[ja] add preprocessor for width of alphabetic characters (#964)

* add japanese text preprocessor for variants in width of alphabetic characters * try combining with numeric to improve performance * Update ext/js/language/ja/japanese.js Co-authored-by: Kuuuube <61125188+Kuuuube@users.noreply.github.com> Signed-off-by: StefanVukovic99 <stefanvukovic44@gmail.com> * Update ext/js/language/ja/japanese.js Co-authored-by: Kuuuube <61125188+Kuuuube@users.noreply.github.com> Signed-off-by: StefanVukovic99 <stefanvukovic44@gmail.com> * fix tests --------- Signed-off-by: StefanVukovic99 <stefanvukovic44@gmail.com> Co-authored-by: Kuuuube <61125188+Kuuuube@users.noreply.github.com>
author: StefanVukovic99 <stefanvukovic44@gmail.com> 2024-05-22 22:45:39 +0200
committer: GitHub <noreply@github.com> 2024-05-22 20:45:39 +0000
commit: d19b898792bffed8ab2d5724472e5b65a5f5b146 (patch)
tree: b3e0d5111d748dfcc5d74d9dbf68e79193fa6a7f
parent: 125cde3d98c18b08e71e075b4a9776fc7bd4b4a0 (diff)
12 files changed, 585 insertions, 35 deletions
diff --git a/ext/js/language/ja/japanese-text-preprocessors.js b/ext/js/language/ja/japanese-text-preprocessors.js
index b3d50817..32e45c83 100644
--- a/ext/js/language/ja/japanese-text-preprocessors.js
+++ b/ext/js/language/ja/japanese-text-preprocessors.js
@@ -19,10 +19,11 @@ import {basicTextProcessorOptions} from '../text-processors.js';
 import {convertAlphabeticToKana} from './japanese-wanakana.js';
 import {
     collapseEmphaticSequences as collapseEmphaticSequencesFunction,
+    convertAlphanumericToFullWidth,
+    convertFullWidthAlphanumericToNormal,
     convertHalfWidthKanaToFullWidth,
     convertHiraganaToKatakana as convertHiraganaToKatakanaFunction,
-    convertKatakanaToHiragana as convertKatakanaToHiraganaFunction,
-    convertNumericToFullWidth
+    convertKatakanaToHiragana as convertKatakanaToHiraganaFunction
 } from './japanese.js';
 
 /** @type {import('language').TextProcessor<boolean>} */
@@ -33,16 +34,9 @@ export const convertHalfWidthCharacters = {
     process: (str, setting) => (setting ? convertHalfWidthKanaToFullWidth(str) : str)
 };
 
-/** @type {import('language').TextProcessor<boolean>} */
-export const convertNumericCharacters = {
-    name: 'Convert numeric characters to full width',
-    description: '1234 → １２３４',
-    options: basicTextProcessorOptions,
-    process: (str, setting) => (setting ? convertNumericToFullWidth(str) : str)
-};
 
 /** @type {import('language').TextProcessor<boolean>} */
-export const convertAlphabeticCharacters = {
+export const alphabeticToHiragana = {
     name: 'Convert alphabetic characters to hiragana',
     description: 'yomichan → よみちゃん',
     options: basicTextProcessorOptions,
@@ -50,6 +44,23 @@ export const convertAlphabeticCharacters = {
 };
 
 /** @type {import('language').BidirectionalConversionPreprocessor} */
+export const alphanumericWidthVariants = {
+    name: 'Convert between alphabetic width variants',
+    description: 'ｙｏｍｉｔａｎ → yomitan and vice versa',
+    options: ['off', 'direct', 'inverse'],
+    process: (str, setting) => {
+        switch (setting) {
+            case 'off':
+                return str;
+            case 'direct':
+                return convertFullWidthAlphanumericToNormal(str);
+            case 'inverse':
+                return convertAlphanumericToFullWidth(str);
+        }
+    }
+};
+
+/** @type {import('language').BidirectionalConversionPreprocessor} */
 export const convertHiraganaToKatakana = {
     name: 'Convert hiragana to katakana',
     description: 'よみちゃん → ヨミチャン and vice versa',
diff --git a/ext/js/language/ja/japanese.js b/ext/js/language/ja/japanese.js
index 3507e5df..2200e077 100644
--- a/ext/js/language/ja/japanese.js
+++ b/ext/js/language/ja/japanese.js
@@ -15,6 +15,7 @@
  * along with this program.  If not, see <https://www.gnu.org/licenses/>.
  */
 
+
 const HIRAGANA_SMALL_TSU_CODE_POINT = 0x3063;
 const KATAKANA_SMALL_TSU_CODE_POINT = 0x30c3;
 const KATAKANA_SMALL_KA_CODE_POINT = 0x30f5;
@@ -523,16 +524,39 @@ export function convertHiraganaToKatakana(text) {
  * @param {string} text
  * @returns {string}
  */
-export function convertNumericToFullWidth(text) {
+export function convertAlphanumericToFullWidth(text) {
     let result = '';
     for (const char of text) {
         let c = /** @type {number} */ (char.codePointAt(0));
         if (c >= 0x30 && c <= 0x39) { // ['0', '9']
             c += 0xff10 - 0x30; // 0xff10 = '0' full width
-            result += String.fromCodePoint(c);
-        } else {
-            result += char;
+        } else if (c >= 0x41 && c <= 0x5a) { // ['A', 'Z']
+            c += 0xff21 - 0x41; // 0xff21 = 'A' full width
+        } else if (c >= 0x61 && c <= 0x7a) { // ['a', 'z']
+            c += 0xff41 - 0x61; // 0xff41 = 'a' full width
+        }
+        result += String.fromCodePoint(c);
+    }
+    return result;
+}
+
+/**
+ * @param {string} text
+ * @returns {string}
+ */
+export function convertFullWidthAlphanumericToNormal(text) {
+    let result = '';
+    const length = text.length;
+    for (let i = 0; i < length; i++) {
+        let c = /** @type {number} */ (text[i].codePointAt(0));
+        if (c >= 0xff10 && c <= 0xff19) { // ['０', '９']
+            c -= 0xff10 - 0x30; // 0x30 = '0'
+        } else if (c >= 0xff21 && c <= 0xff3a) { // ['Ａ', 'Ｚ']
+            c -= 0xff21 - 0x41; // 0x41 = 'A'
+        } else if (c >= 0xff41 && c <= 0xff5a) { // ['ａ', 'ｚ']
+            c -= 0xff41 - 0x61; // 0x61 = 'a'
         }
+        result += String.fromCodePoint(c);
     }
     return result;
 }
diff --git a/ext/js/language/language-descriptors.js b/ext/js/language/language-descriptors.js
index 726842f1..baf53f81 100644
--- a/ext/js/language/language-descriptors.js
+++ b/ext/js/language/language-descriptors.js
@@ -19,7 +19,13 @@ import {removeArabicScriptDiacritics} from './ar/arabic-text-preprocessors.js';
 import {eszettPreprocessor} from './de/german-text-preprocessors.js';
 import {germanTransforms} from './de/german-transforms.js';
 import {englishTransforms} from './en/english-transforms.js';
-import {collapseEmphaticSequences, convertAlphabeticCharacters, convertHalfWidthCharacters, convertHiraganaToKatakana, convertNumericCharacters} from './ja/japanese-text-preprocessors.js';
+import {
+    alphabeticToHiragana,
+    alphanumericWidthVariants,
+    collapseEmphaticSequences,
+    convertHalfWidthCharacters,
+    convertHiraganaToKatakana
+} from './ja/japanese-text-preprocessors.js';
 import {japaneseTransforms} from './ja/japanese-transforms.js';
 import {isStringPartiallyJapanese} from './ja/japanese.js';
 import {disassembleHangul, reassembleHangul} from './ko/korean-text-processors.js';
@@ -143,8 +149,8 @@ const languageDescriptors = [
         isTextLookupWorthy: isStringPartiallyJapanese,
         textPreprocessors: {
             convertHalfWidthCharacters,
-            convertNumericCharacters,
-            convertAlphabeticCharacters,
+            alphabeticToHiragana,
+            alphanumericWidthVariants,
             convertHiraganaToKatakana,
             collapseEmphaticSequences
         },
diff --git a/test/data/anki-note-builder-test-results.json b/test/data/anki-note-builder-test-results.json
index 1ec8d4cd..b4badf10 100644
--- a/test/data/anki-note-builder-test-results.json
+++ b/test/data/anki-note-builder-test-results.json
@@ -3874,7 +3874,99 @@
     ]
   },
   {
-    "name": "Test text preprocessors - convertAlphabeticCharacters",
+    "name": "Test text preprocessors - alphabeticWidthVariants 1",
+    "results": [
+      {
+        "audio": "",
+        "clipboard-image": "",
+        "clipboard-text": "",
+        "cloze-body": "Ｅｎｇｌｉｓｈ",
+        "cloze-body-kana": "Ｅｎｇｌｉｓｈ",
+        "cloze-prefix": "cloze-prefix",
+        "cloze-suffix": "cloze-suffix",
+        "conjugation": "",
+        "dictionary": "Test Dictionary 2",
+        "document-title": "title",
+        "expression": "English",
+        "frequencies": "",
+        "frequency-harmonic-rank": "9999999",
+        "frequency-harmonic-occurrence": "0",
+        "frequency-average-rank": "9999999",
+        "frequency-average-occurrence": "0",
+        "furigana": "English",
+        "furigana-plain": "English",
+        "glossary": "<div style=\"text-align: left;\"><i>(n, Test Dictionary 2)</i> English definition</div>",
+        "glossary-brief": "<div style=\"text-align: left;\">English definition</div>",
+        "glossary-no-dictionary": "<div style=\"text-align: left;\"><i>(n)</i> English definition</div>",
+        "glossary-first": "<div style=\"text-align: left;\"><i>(n, Test Dictionary 2)</i> English definition</div>",
+        "glossary-first-brief": "<div style=\"text-align: left;\">English definition</div>",
+        "glossary-first-no-dictionary": "<div style=\"text-align: left;\"><i>(n)</i> English definition</div>",
+        "part-of-speech": "Noun",
+        "pitch-accents": "",
+        "pitch-accent-graphs": "",
+        "pitch-accent-graphs-jj": "",
+        "pitch-accent-positions": "",
+        "pitch-accent-categories": "",
+        "phonetic-transcriptions": "",
+        "reading": "English",
+        "screenshot": "",
+        "search-query": "fullQuery",
+        "selection-text": "",
+        "sentence": "cloze-prefixＥｎｇｌｉｓｈcloze-suffix",
+        "sentence-furigana": "cloze-prefixＥｎｇｌｉｓｈcloze-suffix",
+        "tags": "n",
+        "url": "<a href=\"url:\">url:</a>"
+      }
+    ]
+  },
+  {
+    "name": "Test text preprocessors - alphabeticWidthVariants 2",
+    "results": [
+      {
+        "audio": "",
+        "clipboard-image": "",
+        "clipboard-text": "",
+        "cloze-body": "USB",
+        "cloze-body-kana": "USB",
+        "cloze-prefix": "cloze-prefix",
+        "cloze-suffix": "cloze-suffix",
+        "conjugation": "",
+        "dictionary": "Test Dictionary 2",
+        "document-title": "title",
+        "expression": "ＵＳＢ",
+        "frequencies": "",
+        "frequency-harmonic-rank": "9999999",
+        "frequency-harmonic-occurrence": "0",
+        "frequency-average-rank": "9999999",
+        "frequency-average-occurrence": "0",
+        "furigana": "<ruby>ＵＳＢ<rt>ユーエスビー</rt></ruby>",
+        "furigana-plain": "ＵＳＢ[ユーエスビー]",
+        "glossary": "<div style=\"text-align: left;\"><i>(n, Test Dictionary 2)</i> ＵＳＢ definition</div>",
+        "glossary-brief": "<div style=\"text-align: left;\">ＵＳＢ definition</div>",
+        "glossary-no-dictionary": "<div style=\"text-align: left;\"><i>(n)</i> ＵＳＢ definition</div>",
+        "glossary-first": "<div style=\"text-align: left;\"><i>(n, Test Dictionary 2)</i> ＵＳＢ definition</div>",
+        "glossary-first-brief": "<div style=\"text-align: left;\">ＵＳＢ definition</div>",
+        "glossary-first-no-dictionary": "<div style=\"text-align: left;\"><i>(n)</i> ＵＳＢ definition</div>",
+        "part-of-speech": "Noun",
+        "pitch-accents": "",
+        "pitch-accent-graphs": "",
+        "pitch-accent-graphs-jj": "",
+        "pitch-accent-positions": "",
+        "pitch-accent-categories": "",
+        "phonetic-transcriptions": "",
+        "reading": "ユーエスビー",
+        "screenshot": "",
+        "search-query": "fullQuery",
+        "selection-text": "",
+        "sentence": "cloze-prefixUSBcloze-suffix",
+        "sentence-furigana": "cloze-prefixUSBcloze-suffix",
+        "tags": "n",
+        "url": "<a href=\"url:\">url:</a>"
+      }
+    ]
+  },
+  {
+    "name": "Test text preprocessors - alphabeticToHiragana",
     "results": [
       {
         "audio": "",
diff --git a/test/data/database-test-cases.json b/test/data/database-test-cases.json
index 054ede34..04b98aa9 100644
--- a/test/data/database-test-cases.json
+++ b/test/data/database-test-cases.json
@@ -27,7 +27,7 @@
         "ipa": 1
       },
       "terms": {
-        "total": 30
+        "total": 31
       }
     }
   },
@@ -36,7 +36,7 @@
       {
         "kanji": 2,
         "kanjiMeta": 6,
-        "terms": 30,
+        "terms": 31,
         "termMeta": 39,
         "tagMeta": 15,
         "media": 6
@@ -45,7 +45,7 @@
     "total": {
       "kanji": 2,
       "kanjiMeta": 6,
-      "terms": 30,
+      "terms": 31,
       "termMeta": 39,
       "tagMeta": 15,
       "media": 6
diff --git a/test/data/dictionaries/valid-dictionary1/term_bank_1.json b/test/data/dictionaries/valid-dictionary1/term_bank_1.json
index a362de23..0a635b84 100644
--- a/test/data/dictionaries/valid-dictionary1/term_bank_1.json
+++ b/test/data/dictionaries/valid-dictionary1/term_bank_1.json
@@ -344,5 +344,6 @@
     ["凄い", "すごい", "adj-i", "adj-i", 1, ["sugoi definition"], 18, ""],
     ["English", "", "n", "n", 1, ["English definition"], 19, ""],
     ["language", "", "n", "n", 1, ["language definition"], 20, ""],
-    ["마시다", "", "v", "v", 1, ["masida definition"], 21, ""]
+    ["ＵＳＢ", "ユーエスビー", "n", "n", 1, ["ＵＳＢ definition"], 21, ""],
+    ["마시다", "", "v", "v", 1, ["masida definition"], 22, ""]
 ]
diff --git a/test/data/translator-test-inputs.json b/test/data/translator-test-inputs.json
index 9f508a1d..ed028273 100644
--- a/test/data/translator-test-inputs.json
+++ b/test/data/translator-test-inputs.json
@@ -369,7 +369,33 @@
             ]
         },
         {
-            "name": "Test text preprocessors - convertAlphabeticCharacters",
+            "name": "Test text preprocessors - alphabeticWidthVariants 1",
+            "func": "findTerms",
+            "mode": "split",
+            "text": "Ｅｎｇｌｉｓｈ",
+            "options": [
+                "default",
+                {
+                    "type": "terms",
+                    "removeNonJapaneseCharacters": false
+                }
+            ]
+        },
+        {
+            "name": "Test text preprocessors - alphabeticWidthVariants 2",
+            "func": "findTerms",
+            "mode": "split",
+            "text": "USB",
+            "options": [
+                "default",
+                {
+                    "type": "terms",
+                    "removeNonJapaneseCharacters": false
+                }
+            ]
+        },
+        {
+            "name": "Test text preprocessors - alphabeticToHiragana",
             "func": "findTerms",
             "mode": "split",
             "text": "utsu",
diff --git a/test/data/translator-test-results-note-data1.json b/test/data/translator-test-results-note-data1.json
index 0782c8f5..11c9d078 100644
--- a/test/data/translator-test-results-note-data1.json
+++ b/test/data/translator-test-results-note-data1.json
@@ -30533,7 +30533,235 @@
     ]
   },
   {
-    "name": "Test text preprocessors - convertAlphabeticCharacters",
+    "name": "Test text preprocessors - alphabeticWidthVariants 1",
+    "noteDataList": [
+      {
+        "marker": "{marker}",
+        "definition": {
+          "type": "term",
+          "id": 27,
+          "source": "English",
+          "rawSource": "Ｅｎｇｌｉｓｈ",
+          "sourceTerm": "English",
+          "inflectionRuleChainCandidates": [
+            {
+              "source": "algorithm",
+              "inflectionRules": []
+            }
+          ],
+          "score": 1,
+          "isPrimary": true,
+          "sequence": 19,
+          "dictionary": "Test Dictionary 2",
+          "dictionaryOrder": {
+            "index": 0,
+            "priority": 0
+          },
+          "dictionaryNames": [
+            "Test Dictionary 2"
+          ],
+          "expression": "English",
+          "reading": "English",
+          "expressions": [
+            {
+              "sourceTerm": "English",
+              "expression": "English",
+              "reading": "English",
+              "termTags": [],
+              "frequencies": [],
+              "pitches": [],
+              "furiganaSegments": [
+                {
+                  "text": "English",
+                  "furigana": ""
+                }
+              ],
+              "termFrequency": "normal",
+              "wordClasses": [
+                "n"
+              ]
+            }
+          ],
+          "glossary": [
+            "English definition"
+          ],
+          "definitionTags": [
+            {
+              "name": "n",
+              "category": "partOfSpeech",
+              "notes": "noun",
+              "order": 0,
+              "score": 0,
+              "dictionary": "Test Dictionary 2",
+              "redundant": false
+            }
+          ],
+          "termTags": [],
+          "frequencies": [],
+          "frequencyHarmonic": -1,
+          "frequencyAverage": -1,
+          "pitches": [],
+          "phoneticTranscriptions": [],
+          "sourceTermExactMatchCount": 1,
+          "url": "url:",
+          "cloze": {
+            "sentence": "",
+            "prefix": "",
+            "body": "",
+            "bodyKana": "",
+            "suffix": ""
+          },
+          "furiganaSegments": [
+            {
+              "text": "English",
+              "furigana": ""
+            }
+          ]
+        },
+        "glossaryLayoutMode": "default",
+        "compactTags": false,
+        "group": false,
+        "merge": false,
+        "modeTermKanji": false,
+        "modeTermKana": false,
+        "modeKanji": false,
+        "compactGlossaries": false,
+        "uniqueExpressions": [
+          "English"
+        ],
+        "uniqueReadings": [
+          "English"
+        ],
+        "pitches": [],
+        "pitchCount": 0,
+        "phoneticTranscriptions": [],
+        "context": {
+          "query": "query",
+          "fullQuery": "fullQuery",
+          "document": {
+            "title": "title"
+          }
+        },
+        "media": {}
+      }
+    ]
+  },
+  {
+    "name": "Test text preprocessors - alphabeticWidthVariants 2",
+    "noteDataList": [
+      {
+        "marker": "{marker}",
+        "definition": {
+          "type": "term",
+          "id": 29,
+          "source": "ＵＳＢ",
+          "rawSource": "USB",
+          "sourceTerm": "ＵＳＢ",
+          "inflectionRuleChainCandidates": [
+            {
+              "source": "algorithm",
+              "inflectionRules": []
+            }
+          ],
+          "score": 1,
+          "isPrimary": true,
+          "sequence": 21,
+          "dictionary": "Test Dictionary 2",
+          "dictionaryOrder": {
+            "index": 0,
+            "priority": 0
+          },
+          "dictionaryNames": [
+            "Test Dictionary 2"
+          ],
+          "expression": "ＵＳＢ",
+          "reading": "ユーエスビー",
+          "expressions": [
+            {
+              "sourceTerm": "ＵＳＢ",
+              "expression": "ＵＳＢ",
+              "reading": "ユーエスビー",
+              "termTags": [],
+              "frequencies": [],
+              "pitches": [],
+              "furiganaSegments": [
+                {
+                  "text": "ＵＳＢ",
+                  "furigana": "ユーエスビー"
+                }
+              ],
+              "termFrequency": "normal",
+              "wordClasses": [
+                "n"
+              ]
+            }
+          ],
+          "glossary": [
+            "ＵＳＢ definition"
+          ],
+          "definitionTags": [
+            {
+              "name": "n",
+              "category": "partOfSpeech",
+              "notes": "noun",
+              "order": 0,
+              "score": 0,
+              "dictionary": "Test Dictionary 2",
+              "redundant": false
+            }
+          ],
+          "termTags": [],
+          "frequencies": [],
+          "frequencyHarmonic": -1,
+          "frequencyAverage": -1,
+          "pitches": [],
+          "phoneticTranscriptions": [],
+          "sourceTermExactMatchCount": 1,
+          "url": "url:",
+          "cloze": {
+            "sentence": "",
+            "prefix": "",
+            "body": "",
+            "bodyKana": "",
+            "suffix": ""
+          },
+          "furiganaSegments": [
+            {
+              "text": "ＵＳＢ",
+              "furigana": "ユーエスビー"
+            }
+          ]
+        },
+        "glossaryLayoutMode": "default",
+        "compactTags": false,
+        "group": false,
+        "merge": false,
+        "modeTermKanji": false,
+        "modeTermKana": false,
+        "modeKanji": false,
+        "compactGlossaries": false,
+        "uniqueExpressions": [
+          "ＵＳＢ"
+        ],
+        "uniqueReadings": [
+          "ユーエスビー"
+        ],
+        "pitches": [],
+        "pitchCount": 0,
+        "phoneticTranscriptions": [],
+        "context": {
+          "query": "query",
+          "fullQuery": "fullQuery",
+          "document": {
+            "title": "title"
+          }
+        },
+        "media": {}
+      }
+    ]
+  },
+  {
+    "name": "Test text preprocessors - alphabeticToHiragana",
     "noteDataList": [
       {
         "marker": "{marker}",
@@ -32948,7 +33176,7 @@
         "marker": "{marker}",
         "definition": {
           "type": "term",
-          "id": 29,
+          "id": 30,
           "source": "ㅁㅏㅅㅣㄱㅓㄴㅏ",
           "rawSource": "마시거나",
           "sourceTerm": "마시다",
@@ -32962,7 +33190,7 @@
           ],
           "score": 1,
           "isPrimary": true,
-          "sequence": 21,
+          "sequence": 22,
           "dictionary": "Test Dictionary 2",
           "dictionaryOrder": {
             "index": 0,
diff --git a/test/data/translator-test-results.json b/test/data/translator-test-results.json
index bc26e0f9..ba251347 100644
--- a/test/data/translator-test-results.json
+++ b/test/data/translator-test-results.json
@@ -17211,7 +17211,169 @@
     ]
   },
   {
-    "name": "Test text preprocessors - convertAlphabeticCharacters",
+    "name": "Test text preprocessors - alphabeticWidthVariants 1",
+    "originalTextLength": 7,
+    "dictionaryEntries": [
+      {
+        "type": "term",
+        "isPrimary": true,
+        "inflectionRuleChainCandidates": [
+          {
+            "source": "algorithm",
+            "inflectionRules": []
+          }
+        ],
+        "score": 1,
+        "frequencyOrder": 0,
+        "dictionaryIndex": 0,
+        "dictionaryPriority": 0,
+        "sourceTermExactMatchCount": 1,
+        "maxOriginalTextLength": 7,
+        "headwords": [
+          {
+            "index": 0,
+            "term": "English",
+            "reading": "English",
+            "sources": [
+              {
+                "originalText": "Ｅｎｇｌｉｓｈ",
+                "transformedText": "English",
+                "deinflectedText": "English",
+                "matchType": "exact",
+                "matchSource": "term",
+                "isPrimary": true
+              }
+            ],
+            "tags": [],
+            "wordClasses": [
+              "n"
+            ]
+          }
+        ],
+        "definitions": [
+          {
+            "index": 0,
+            "headwordIndices": [
+              0
+            ],
+            "dictionary": "Test Dictionary 2",
+            "dictionaryIndex": 0,
+            "dictionaryPriority": 0,
+            "id": 27,
+            "score": 1,
+            "frequencyOrder": 0,
+            "sequences": [
+              19
+            ],
+            "isPrimary": true,
+            "tags": [
+              {
+                "name": "n",
+                "category": "partOfSpeech",
+                "order": 0,
+                "score": 0,
+                "content": [
+                  "noun"
+                ],
+                "dictionaries": [
+                  "Test Dictionary 2"
+                ],
+                "redundant": false
+              }
+            ],
+            "entries": [
+              "English definition"
+            ]
+          }
+        ],
+        "pronunciations": [],
+        "frequencies": []
+      }
+    ]
+  },
+  {
+    "name": "Test text preprocessors - alphabeticWidthVariants 2",
+    "originalTextLength": 3,
+    "dictionaryEntries": [
+      {
+        "type": "term",
+        "isPrimary": true,
+        "inflectionRuleChainCandidates": [
+          {
+            "source": "algorithm",
+            "inflectionRules": []
+          }
+        ],
+        "score": 1,
+        "frequencyOrder": 0,
+        "dictionaryIndex": 0,
+        "dictionaryPriority": 0,
+        "sourceTermExactMatchCount": 1,
+        "maxOriginalTextLength": 3,
+        "headwords": [
+          {
+            "index": 0,
+            "term": "ＵＳＢ",
+            "reading": "ユーエスビー",
+            "sources": [
+              {
+                "originalText": "USB",
+                "transformedText": "ＵＳＢ",
+                "deinflectedText": "ＵＳＢ",
+                "matchType": "exact",
+                "matchSource": "term",
+                "isPrimary": true
+              }
+            ],
+            "tags": [],
+            "wordClasses": [
+              "n"
+            ]
+          }
+        ],
+        "definitions": [
+          {
+            "index": 0,
+            "headwordIndices": [
+              0
+            ],
+            "dictionary": "Test Dictionary 2",
+            "dictionaryIndex": 0,
+            "dictionaryPriority": 0,
+            "id": 29,
+            "score": 1,
+            "frequencyOrder": 0,
+            "sequences": [
+              21
+            ],
+            "isPrimary": true,
+            "tags": [
+              {
+                "name": "n",
+                "category": "partOfSpeech",
+                "order": 0,
+                "score": 0,
+                "content": [
+                  "noun"
+                ],
+                "dictionaries": [
+                  "Test Dictionary 2"
+                ],
+                "redundant": false
+              }
+            ],
+            "entries": [
+              "ＵＳＢ definition"
+            ]
+          }
+        ],
+        "pronunciations": [],
+        "frequencies": []
+      }
+    ]
+  },
+  {
+    "name": "Test text preprocessors - alphabeticToHiragana",
     "originalTextLength": 4,
     "dictionaryEntries": [
       {
@@ -18656,11 +18818,11 @@
             "dictionary": "Test Dictionary 2",
             "dictionaryIndex": 0,
             "dictionaryPriority": 0,
-            "id": 29,
+            "id": 30,
             "score": 1,
             "frequencyOrder": 0,
             "sequences": [
-              21
+              22
             ],
             "isPrimary": true,
             "tags": [
diff --git a/test/japanese-util.test.js b/test/japanese-util.test.js
index bff51f85..63ac5cdf 100644
--- a/test/japanese-util.test.js
+++ b/test/japanese-util.test.js
@@ -178,17 +178,17 @@ describe('Japanese utility functions', () => {
         });
     });
 
-    describe('convertNumericToFullWidth', () => {
+    describe('convertAlphanumericToFullWidth', () => {
         /** @type {[string: string, expected: string][]} */
         const data = [
             ['0123456789', '０１２３４５６７８９'],
-            ['abcdefghij', 'abcdefghij'],
+            ['abcdefghij', 'ａｂｃｄｅｆｇｈｉｊ'],
             ['カタカナ', 'カタカナ'],
             ['ひらがな', 'ひらがな']
         ];
 
         test.each(data)('%s -> %o', (string, expected) => {
-            expect(jp.convertNumericToFullWidth(string)).toStrictEqual(expected);
+            expect(jp.convertAlphanumericToFullWidth(string)).toStrictEqual(expected);
         });
     });
 
diff --git a/types/ext/language-descriptors.d.ts b/types/ext/language-descriptors.d.ts
index aa961a6c..69ccec44 100644
--- a/types/ext/language-descriptors.d.ts
+++ b/types/ext/language-descriptors.d.ts
@@ -118,8 +118,8 @@ type AllTextProcessors = {
     ja: {
         pre: {
             convertHalfWidthCharacters: TextProcessor<boolean>;
-            convertNumericCharacters: TextProcessor<boolean>;
-            convertAlphabeticCharacters: TextProcessor<boolean>;
+            alphabeticToHiragana: TextProcessor<boolean>;
+            alphanumericWidthVariants: BidirectionalConversionPreprocessor;
             convertHiraganaToKatakana: BidirectionalConversionPreprocessor;
             collapseEmphaticSequences: TextProcessor<[collapseEmphatic: boolean, collapseEmphaticFull: boolean]>;
         };
diff --git a/types/ext/settings.d.ts b/types/ext/settings.d.ts
index 48a66728..b69679b0 100644
--- a/types/ext/settings.d.ts
+++ b/types/ext/settings.d.ts
@@ -231,7 +231,7 @@ export type ScanningPreventMiddleMouseOptions = {
 export type TranslationOptions = {
     convertHalfWidthCharacters: TranslationConvertType;
     convertNumericCharacters: TranslationConvertType;
-    convertAlphabeticCharacters: TranslationConvertType;
+    alphabeticToHiragana: TranslationConvertType;
     convertHiraganaToKatakana: TranslationConvertType;
     convertKatakanaToHiragana: TranslationConvertType;
     collapseEmphaticSequences: TranslationCollapseEmphaticSequences;
author	StefanVukovic99 <stefanvukovic44@gmail.com>	2024-05-22 22:45:39 +0200
committer	GitHub <noreply@github.com>	2024-05-22 20:45:39 +0000
commit	d19b898792bffed8ab2d5724472e5b65a5f5b146 (patch)
tree	b3e0d5111d748dfcc5d74d9dbf68e79193fa6a7f
parent	125cde3d98c18b08e71e075b4a9776fc7bd4b4a0 (diff)