summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorStefanVukovic99 <stefanvukovic44@gmail.com>2024-05-22 22:45:39 +0200
committerGitHub <noreply@github.com>2024-05-22 20:45:39 +0000
commitd19b898792bffed8ab2d5724472e5b65a5f5b146 (patch)
treeb3e0d5111d748dfcc5d74d9dbf68e79193fa6a7f
parent125cde3d98c18b08e71e075b4a9776fc7bd4b4a0 (diff)
[ja] add preprocessor for width of alphabetic characters (#964)
* add japanese text preprocessor for variants in width of alphabetic characters * try combining with numeric to improve performance * Update ext/js/language/ja/japanese.js Co-authored-by: Kuuuube <61125188+Kuuuube@users.noreply.github.com> Signed-off-by: StefanVukovic99 <stefanvukovic44@gmail.com> * Update ext/js/language/ja/japanese.js Co-authored-by: Kuuuube <61125188+Kuuuube@users.noreply.github.com> Signed-off-by: StefanVukovic99 <stefanvukovic44@gmail.com> * fix tests --------- Signed-off-by: StefanVukovic99 <stefanvukovic44@gmail.com> Co-authored-by: Kuuuube <61125188+Kuuuube@users.noreply.github.com>
-rw-r--r--ext/js/language/ja/japanese-text-preprocessors.js31
-rw-r--r--ext/js/language/ja/japanese.js32
-rw-r--r--ext/js/language/language-descriptors.js12
-rw-r--r--test/data/anki-note-builder-test-results.json94
-rw-r--r--test/data/database-test-cases.json6
-rw-r--r--test/data/dictionaries/valid-dictionary1/term_bank_1.json3
-rw-r--r--test/data/translator-test-inputs.json28
-rw-r--r--test/data/translator-test-results-note-data1.json234
-rw-r--r--test/data/translator-test-results.json168
-rw-r--r--test/japanese-util.test.js6
-rw-r--r--types/ext/language-descriptors.d.ts4
-rw-r--r--types/ext/settings.d.ts2
12 files changed, 585 insertions, 35 deletions
diff --git a/ext/js/language/ja/japanese-text-preprocessors.js b/ext/js/language/ja/japanese-text-preprocessors.js
index b3d50817..32e45c83 100644
--- a/ext/js/language/ja/japanese-text-preprocessors.js
+++ b/ext/js/language/ja/japanese-text-preprocessors.js
@@ -19,10 +19,11 @@ import {basicTextProcessorOptions} from '../text-processors.js';
import {convertAlphabeticToKana} from './japanese-wanakana.js';
import {
collapseEmphaticSequences as collapseEmphaticSequencesFunction,
+ convertAlphanumericToFullWidth,
+ convertFullWidthAlphanumericToNormal,
convertHalfWidthKanaToFullWidth,
convertHiraganaToKatakana as convertHiraganaToKatakanaFunction,
- convertKatakanaToHiragana as convertKatakanaToHiraganaFunction,
- convertNumericToFullWidth
+ convertKatakanaToHiragana as convertKatakanaToHiraganaFunction
} from './japanese.js';
/** @type {import('language').TextProcessor<boolean>} */
@@ -33,16 +34,9 @@ export const convertHalfWidthCharacters = {
process: (str, setting) => (setting ? convertHalfWidthKanaToFullWidth(str) : str)
};
-/** @type {import('language').TextProcessor<boolean>} */
-export const convertNumericCharacters = {
- name: 'Convert numeric characters to full width',
- description: '1234 → 1234',
- options: basicTextProcessorOptions,
- process: (str, setting) => (setting ? convertNumericToFullWidth(str) : str)
-};
/** @type {import('language').TextProcessor<boolean>} */
-export const convertAlphabeticCharacters = {
+export const alphabeticToHiragana = {
name: 'Convert alphabetic characters to hiragana',
description: 'yomichan → よみちゃん',
options: basicTextProcessorOptions,
@@ -50,6 +44,23 @@ export const convertAlphabeticCharacters = {
};
/** @type {import('language').BidirectionalConversionPreprocessor} */
+export const alphanumericWidthVariants = {
+ name: 'Convert between alphabetic width variants',
+ description: 'yomitan → yomitan and vice versa',
+ options: ['off', 'direct', 'inverse'],
+ process: (str, setting) => {
+ switch (setting) {
+ case 'off':
+ return str;
+ case 'direct':
+ return convertFullWidthAlphanumericToNormal(str);
+ case 'inverse':
+ return convertAlphanumericToFullWidth(str);
+ }
+ }
+};
+
+/** @type {import('language').BidirectionalConversionPreprocessor} */
export const convertHiraganaToKatakana = {
name: 'Convert hiragana to katakana',
description: 'よみちゃん → ヨミチャン and vice versa',
diff --git a/ext/js/language/ja/japanese.js b/ext/js/language/ja/japanese.js
index 3507e5df..2200e077 100644
--- a/ext/js/language/ja/japanese.js
+++ b/ext/js/language/ja/japanese.js
@@ -15,6 +15,7 @@
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
+
const HIRAGANA_SMALL_TSU_CODE_POINT = 0x3063;
const KATAKANA_SMALL_TSU_CODE_POINT = 0x30c3;
const KATAKANA_SMALL_KA_CODE_POINT = 0x30f5;
@@ -523,16 +524,39 @@ export function convertHiraganaToKatakana(text) {
* @param {string} text
* @returns {string}
*/
-export function convertNumericToFullWidth(text) {
+export function convertAlphanumericToFullWidth(text) {
let result = '';
for (const char of text) {
let c = /** @type {number} */ (char.codePointAt(0));
if (c >= 0x30 && c <= 0x39) { // ['0', '9']
c += 0xff10 - 0x30; // 0xff10 = '0' full width
- result += String.fromCodePoint(c);
- } else {
- result += char;
+ } else if (c >= 0x41 && c <= 0x5a) { // ['A', 'Z']
+ c += 0xff21 - 0x41; // 0xff21 = 'A' full width
+ } else if (c >= 0x61 && c <= 0x7a) { // ['a', 'z']
+ c += 0xff41 - 0x61; // 0xff41 = 'a' full width
+ }
+ result += String.fromCodePoint(c);
+ }
+ return result;
+}
+
+/**
+ * @param {string} text
+ * @returns {string}
+ */
+export function convertFullWidthAlphanumericToNormal(text) {
+ let result = '';
+ const length = text.length;
+ for (let i = 0; i < length; i++) {
+ let c = /** @type {number} */ (text[i].codePointAt(0));
+ if (c >= 0xff10 && c <= 0xff19) { // ['0', '9']
+ c -= 0xff10 - 0x30; // 0x30 = '0'
+ } else if (c >= 0xff21 && c <= 0xff3a) { // ['A', 'Z']
+ c -= 0xff21 - 0x41; // 0x41 = 'A'
+ } else if (c >= 0xff41 && c <= 0xff5a) { // ['a', 'z']
+ c -= 0xff41 - 0x61; // 0x61 = 'a'
}
+ result += String.fromCodePoint(c);
}
return result;
}
diff --git a/ext/js/language/language-descriptors.js b/ext/js/language/language-descriptors.js
index 726842f1..baf53f81 100644
--- a/ext/js/language/language-descriptors.js
+++ b/ext/js/language/language-descriptors.js
@@ -19,7 +19,13 @@ import {removeArabicScriptDiacritics} from './ar/arabic-text-preprocessors.js';
import {eszettPreprocessor} from './de/german-text-preprocessors.js';
import {germanTransforms} from './de/german-transforms.js';
import {englishTransforms} from './en/english-transforms.js';
-import {collapseEmphaticSequences, convertAlphabeticCharacters, convertHalfWidthCharacters, convertHiraganaToKatakana, convertNumericCharacters} from './ja/japanese-text-preprocessors.js';
+import {
+ alphabeticToHiragana,
+ alphanumericWidthVariants,
+ collapseEmphaticSequences,
+ convertHalfWidthCharacters,
+ convertHiraganaToKatakana
+} from './ja/japanese-text-preprocessors.js';
import {japaneseTransforms} from './ja/japanese-transforms.js';
import {isStringPartiallyJapanese} from './ja/japanese.js';
import {disassembleHangul, reassembleHangul} from './ko/korean-text-processors.js';
@@ -143,8 +149,8 @@ const languageDescriptors = [
isTextLookupWorthy: isStringPartiallyJapanese,
textPreprocessors: {
convertHalfWidthCharacters,
- convertNumericCharacters,
- convertAlphabeticCharacters,
+ alphabeticToHiragana,
+ alphanumericWidthVariants,
convertHiraganaToKatakana,
collapseEmphaticSequences
},
diff --git a/test/data/anki-note-builder-test-results.json b/test/data/anki-note-builder-test-results.json
index 1ec8d4cd..b4badf10 100644
--- a/test/data/anki-note-builder-test-results.json
+++ b/test/data/anki-note-builder-test-results.json
@@ -3874,7 +3874,99 @@
]
},
{
- "name": "Test text preprocessors - convertAlphabeticCharacters",
+ "name": "Test text preprocessors - alphabeticWidthVariants 1",
+ "results": [
+ {
+ "audio": "",
+ "clipboard-image": "",
+ "clipboard-text": "",
+ "cloze-body": "English",
+ "cloze-body-kana": "English",
+ "cloze-prefix": "cloze-prefix",
+ "cloze-suffix": "cloze-suffix",
+ "conjugation": "",
+ "dictionary": "Test Dictionary 2",
+ "document-title": "title",
+ "expression": "English",
+ "frequencies": "",
+ "frequency-harmonic-rank": "9999999",
+ "frequency-harmonic-occurrence": "0",
+ "frequency-average-rank": "9999999",
+ "frequency-average-occurrence": "0",
+ "furigana": "English",
+ "furigana-plain": "English",
+ "glossary": "<div style=\"text-align: left;\"><i>(n, Test Dictionary 2)</i> English definition</div>",
+ "glossary-brief": "<div style=\"text-align: left;\">English definition</div>",
+ "glossary-no-dictionary": "<div style=\"text-align: left;\"><i>(n)</i> English definition</div>",
+ "glossary-first": "<div style=\"text-align: left;\"><i>(n, Test Dictionary 2)</i> English definition</div>",
+ "glossary-first-brief": "<div style=\"text-align: left;\">English definition</div>",
+ "glossary-first-no-dictionary": "<div style=\"text-align: left;\"><i>(n)</i> English definition</div>",
+ "part-of-speech": "Noun",
+ "pitch-accents": "",
+ "pitch-accent-graphs": "",
+ "pitch-accent-graphs-jj": "",
+ "pitch-accent-positions": "",
+ "pitch-accent-categories": "",
+ "phonetic-transcriptions": "",
+ "reading": "English",
+ "screenshot": "",
+ "search-query": "fullQuery",
+ "selection-text": "",
+ "sentence": "cloze-prefixEnglishcloze-suffix",
+ "sentence-furigana": "cloze-prefixEnglishcloze-suffix",
+ "tags": "n",
+ "url": "<a href=\"url:\">url:</a>"
+ }
+ ]
+ },
+ {
+ "name": "Test text preprocessors - alphabeticWidthVariants 2",
+ "results": [
+ {
+ "audio": "",
+ "clipboard-image": "",
+ "clipboard-text": "",
+ "cloze-body": "USB",
+ "cloze-body-kana": "USB",
+ "cloze-prefix": "cloze-prefix",
+ "cloze-suffix": "cloze-suffix",
+ "conjugation": "",
+ "dictionary": "Test Dictionary 2",
+ "document-title": "title",
+ "expression": "USB",
+ "frequencies": "",
+ "frequency-harmonic-rank": "9999999",
+ "frequency-harmonic-occurrence": "0",
+ "frequency-average-rank": "9999999",
+ "frequency-average-occurrence": "0",
+ "furigana": "<ruby>USB<rt>ユーエスビー</rt></ruby>",
+ "furigana-plain": "USB[ユーエスビー]",
+ "glossary": "<div style=\"text-align: left;\"><i>(n, Test Dictionary 2)</i> USB definition</div>",
+ "glossary-brief": "<div style=\"text-align: left;\">USB definition</div>",
+ "glossary-no-dictionary": "<div style=\"text-align: left;\"><i>(n)</i> USB definition</div>",
+ "glossary-first": "<div style=\"text-align: left;\"><i>(n, Test Dictionary 2)</i> USB definition</div>",
+ "glossary-first-brief": "<div style=\"text-align: left;\">USB definition</div>",
+ "glossary-first-no-dictionary": "<div style=\"text-align: left;\"><i>(n)</i> USB definition</div>",
+ "part-of-speech": "Noun",
+ "pitch-accents": "",
+ "pitch-accent-graphs": "",
+ "pitch-accent-graphs-jj": "",
+ "pitch-accent-positions": "",
+ "pitch-accent-categories": "",
+ "phonetic-transcriptions": "",
+ "reading": "ユーエスビー",
+ "screenshot": "",
+ "search-query": "fullQuery",
+ "selection-text": "",
+ "sentence": "cloze-prefixUSBcloze-suffix",
+ "sentence-furigana": "cloze-prefixUSBcloze-suffix",
+ "tags": "n",
+ "url": "<a href=\"url:\">url:</a>"
+ }
+ ]
+ },
+ {
+ "name": "Test text preprocessors - alphabeticToHiragana",
"results": [
{
"audio": "",
diff --git a/test/data/database-test-cases.json b/test/data/database-test-cases.json
index 054ede34..04b98aa9 100644
--- a/test/data/database-test-cases.json
+++ b/test/data/database-test-cases.json
@@ -27,7 +27,7 @@
"ipa": 1
},
"terms": {
- "total": 30
+ "total": 31
}
}
},
@@ -36,7 +36,7 @@
{
"kanji": 2,
"kanjiMeta": 6,
- "terms": 30,
+ "terms": 31,
"termMeta": 39,
"tagMeta": 15,
"media": 6
@@ -45,7 +45,7 @@
"total": {
"kanji": 2,
"kanjiMeta": 6,
- "terms": 30,
+ "terms": 31,
"termMeta": 39,
"tagMeta": 15,
"media": 6
diff --git a/test/data/dictionaries/valid-dictionary1/term_bank_1.json b/test/data/dictionaries/valid-dictionary1/term_bank_1.json
index a362de23..0a635b84 100644
--- a/test/data/dictionaries/valid-dictionary1/term_bank_1.json
+++ b/test/data/dictionaries/valid-dictionary1/term_bank_1.json
@@ -344,5 +344,6 @@
["凄い", "すごい", "adj-i", "adj-i", 1, ["sugoi definition"], 18, ""],
["English", "", "n", "n", 1, ["English definition"], 19, ""],
["language", "", "n", "n", 1, ["language definition"], 20, ""],
- ["마시다", "", "v", "v", 1, ["masida definition"], 21, ""]
+ ["USB", "ユーエスビー", "n", "n", 1, ["USB definition"], 21, ""],
+ ["마시다", "", "v", "v", 1, ["masida definition"], 22, ""]
]
diff --git a/test/data/translator-test-inputs.json b/test/data/translator-test-inputs.json
index 9f508a1d..ed028273 100644
--- a/test/data/translator-test-inputs.json
+++ b/test/data/translator-test-inputs.json
@@ -369,7 +369,33 @@
]
},
{
- "name": "Test text preprocessors - convertAlphabeticCharacters",
+ "name": "Test text preprocessors - alphabeticWidthVariants 1",
+ "func": "findTerms",
+ "mode": "split",
+ "text": "English",
+ "options": [
+ "default",
+ {
+ "type": "terms",
+ "removeNonJapaneseCharacters": false
+ }
+ ]
+ },
+ {
+ "name": "Test text preprocessors - alphabeticWidthVariants 2",
+ "func": "findTerms",
+ "mode": "split",
+ "text": "USB",
+ "options": [
+ "default",
+ {
+ "type": "terms",
+ "removeNonJapaneseCharacters": false
+ }
+ ]
+ },
+ {
+ "name": "Test text preprocessors - alphabeticToHiragana",
"func": "findTerms",
"mode": "split",
"text": "utsu",
diff --git a/test/data/translator-test-results-note-data1.json b/test/data/translator-test-results-note-data1.json
index 0782c8f5..11c9d078 100644
--- a/test/data/translator-test-results-note-data1.json
+++ b/test/data/translator-test-results-note-data1.json
@@ -30533,7 +30533,235 @@
]
},
{
- "name": "Test text preprocessors - convertAlphabeticCharacters",
+ "name": "Test text preprocessors - alphabeticWidthVariants 1",
+ "noteDataList": [
+ {
+ "marker": "{marker}",
+ "definition": {
+ "type": "term",
+ "id": 27,
+ "source": "English",
+ "rawSource": "English",
+ "sourceTerm": "English",
+ "inflectionRuleChainCandidates": [
+ {
+ "source": "algorithm",
+ "inflectionRules": []
+ }
+ ],
+ "score": 1,
+ "isPrimary": true,
+ "sequence": 19,
+ "dictionary": "Test Dictionary 2",
+ "dictionaryOrder": {
+ "index": 0,
+ "priority": 0
+ },
+ "dictionaryNames": [
+ "Test Dictionary 2"
+ ],
+ "expression": "English",
+ "reading": "English",
+ "expressions": [
+ {
+ "sourceTerm": "English",
+ "expression": "English",
+ "reading": "English",
+ "termTags": [],
+ "frequencies": [],
+ "pitches": [],
+ "furiganaSegments": [
+ {
+ "text": "English",
+ "furigana": ""
+ }
+ ],
+ "termFrequency": "normal",
+ "wordClasses": [
+ "n"
+ ]
+ }
+ ],
+ "glossary": [
+ "English definition"
+ ],
+ "definitionTags": [
+ {
+ "name": "n",
+ "category": "partOfSpeech",
+ "notes": "noun",
+ "order": 0,
+ "score": 0,
+ "dictionary": "Test Dictionary 2",
+ "redundant": false
+ }
+ ],
+ "termTags": [],
+ "frequencies": [],
+ "frequencyHarmonic": -1,
+ "frequencyAverage": -1,
+ "pitches": [],
+ "phoneticTranscriptions": [],
+ "sourceTermExactMatchCount": 1,
+ "url": "url:",
+ "cloze": {
+ "sentence": "",
+ "prefix": "",
+ "body": "",
+ "bodyKana": "",
+ "suffix": ""
+ },
+ "furiganaSegments": [
+ {
+ "text": "English",
+ "furigana": ""
+ }
+ ]
+ },
+ "glossaryLayoutMode": "default",
+ "compactTags": false,
+ "group": false,
+ "merge": false,
+ "modeTermKanji": false,
+ "modeTermKana": false,
+ "modeKanji": false,
+ "compactGlossaries": false,
+ "uniqueExpressions": [
+ "English"
+ ],
+ "uniqueReadings": [
+ "English"
+ ],
+ "pitches": [],
+ "pitchCount": 0,
+ "phoneticTranscriptions": [],
+ "context": {
+ "query": "query",
+ "fullQuery": "fullQuery",
+ "document": {
+ "title": "title"
+ }
+ },
+ "media": {}
+ }
+ ]
+ },
+ {
+ "name": "Test text preprocessors - alphabeticWidthVariants 2",
+ "noteDataList": [
+ {
+ "marker": "{marker}",
+ "definition": {
+ "type": "term",
+ "id": 29,
+ "source": "USB",
+ "rawSource": "USB",
+ "sourceTerm": "USB",
+ "inflectionRuleChainCandidates": [
+ {
+ "source": "algorithm",
+ "inflectionRules": []
+ }
+ ],
+ "score": 1,
+ "isPrimary": true,
+ "sequence": 21,
+ "dictionary": "Test Dictionary 2",
+ "dictionaryOrder": {
+ "index": 0,
+ "priority": 0
+ },
+ "dictionaryNames": [
+ "Test Dictionary 2"
+ ],
+ "expression": "USB",
+ "reading": "ユーエスビー",
+ "expressions": [
+ {
+ "sourceTerm": "USB",
+ "expression": "USB",
+ "reading": "ユーエスビー",
+ "termTags": [],
+ "frequencies": [],
+ "pitches": [],
+ "furiganaSegments": [
+ {
+ "text": "USB",
+ "furigana": "ユーエスビー"
+ }
+ ],
+ "termFrequency": "normal",
+ "wordClasses": [
+ "n"
+ ]
+ }
+ ],
+ "glossary": [
+ "USB definition"
+ ],
+ "definitionTags": [
+ {
+ "name": "n",
+ "category": "partOfSpeech",
+ "notes": "noun",
+ "order": 0,
+ "score": 0,
+ "dictionary": "Test Dictionary 2",
+ "redundant": false
+ }
+ ],
+ "termTags": [],
+ "frequencies": [],
+ "frequencyHarmonic": -1,
+ "frequencyAverage": -1,
+ "pitches": [],
+ "phoneticTranscriptions": [],
+ "sourceTermExactMatchCount": 1,
+ "url": "url:",
+ "cloze": {
+ "sentence": "",
+ "prefix": "",
+ "body": "",
+ "bodyKana": "",
+ "suffix": ""
+ },
+ "furiganaSegments": [
+ {
+ "text": "USB",
+ "furigana": "ユーエスビー"
+ }
+ ]
+ },
+ "glossaryLayoutMode": "default",
+ "compactTags": false,
+ "group": false,
+ "merge": false,
+ "modeTermKanji": false,
+ "modeTermKana": false,
+ "modeKanji": false,
+ "compactGlossaries": false,
+ "uniqueExpressions": [
+ "USB"
+ ],
+ "uniqueReadings": [
+ "ユーエスビー"
+ ],
+ "pitches": [],
+ "pitchCount": 0,
+ "phoneticTranscriptions": [],
+ "context": {
+ "query": "query",
+ "fullQuery": "fullQuery",
+ "document": {
+ "title": "title"
+ }
+ },
+ "media": {}
+ }
+ ]
+ },
+ {
+ "name": "Test text preprocessors - alphabeticToHiragana",
"noteDataList": [
{
"marker": "{marker}",
@@ -32948,7 +33176,7 @@
"marker": "{marker}",
"definition": {
"type": "term",
- "id": 29,
+ "id": 30,
"source": "ㅁㅏㅅㅣㄱㅓㄴㅏ",
"rawSource": "마시거나",
"sourceTerm": "마시다",
@@ -32962,7 +33190,7 @@
],
"score": 1,
"isPrimary": true,
- "sequence": 21,
+ "sequence": 22,
"dictionary": "Test Dictionary 2",
"dictionaryOrder": {
"index": 0,
diff --git a/test/data/translator-test-results.json b/test/data/translator-test-results.json
index bc26e0f9..ba251347 100644
--- a/test/data/translator-test-results.json
+++ b/test/data/translator-test-results.json
@@ -17211,7 +17211,169 @@
]
},
{
- "name": "Test text preprocessors - convertAlphabeticCharacters",
+ "name": "Test text preprocessors - alphabeticWidthVariants 1",
+ "originalTextLength": 7,
+ "dictionaryEntries": [
+ {
+ "type": "term",
+ "isPrimary": true,
+ "inflectionRuleChainCandidates": [
+ {
+ "source": "algorithm",
+ "inflectionRules": []
+ }
+ ],
+ "score": 1,
+ "frequencyOrder": 0,
+ "dictionaryIndex": 0,
+ "dictionaryPriority": 0,
+ "sourceTermExactMatchCount": 1,
+ "maxOriginalTextLength": 7,
+ "headwords": [
+ {
+ "index": 0,
+ "term": "English",
+ "reading": "English",
+ "sources": [
+ {
+ "originalText": "English",
+ "transformedText": "English",
+ "deinflectedText": "English",
+ "matchType": "exact",
+ "matchSource": "term",
+ "isPrimary": true
+ }
+ ],
+ "tags": [],
+ "wordClasses": [
+ "n"
+ ]
+ }
+ ],
+ "definitions": [
+ {
+ "index": 0,
+ "headwordIndices": [
+ 0
+ ],
+ "dictionary": "Test Dictionary 2",
+ "dictionaryIndex": 0,
+ "dictionaryPriority": 0,
+ "id": 27,
+ "score": 1,
+ "frequencyOrder": 0,
+ "sequences": [
+ 19
+ ],
+ "isPrimary": true,
+ "tags": [
+ {
+ "name": "n",
+ "category": "partOfSpeech",
+ "order": 0,
+ "score": 0,
+ "content": [
+ "noun"
+ ],
+ "dictionaries": [
+ "Test Dictionary 2"
+ ],
+ "redundant": false
+ }
+ ],
+ "entries": [
+ "English definition"
+ ]
+ }
+ ],
+ "pronunciations": [],
+ "frequencies": []
+ }
+ ]
+ },
+ {
+ "name": "Test text preprocessors - alphabeticWidthVariants 2",
+ "originalTextLength": 3,
+ "dictionaryEntries": [
+ {
+ "type": "term",
+ "isPrimary": true,
+ "inflectionRuleChainCandidates": [
+ {
+ "source": "algorithm",
+ "inflectionRules": []
+ }
+ ],
+ "score": 1,
+ "frequencyOrder": 0,
+ "dictionaryIndex": 0,
+ "dictionaryPriority": 0,
+ "sourceTermExactMatchCount": 1,
+ "maxOriginalTextLength": 3,
+ "headwords": [
+ {
+ "index": 0,
+ "term": "USB",
+ "reading": "ユーエスビー",
+ "sources": [
+ {
+ "originalText": "USB",
+ "transformedText": "USB",
+ "deinflectedText": "USB",
+ "matchType": "exact",
+ "matchSource": "term",
+ "isPrimary": true
+ }
+ ],
+ "tags": [],
+ "wordClasses": [
+ "n"
+ ]
+ }
+ ],
+ "definitions": [
+ {
+ "index": 0,
+ "headwordIndices": [
+ 0
+ ],
+ "dictionary": "Test Dictionary 2",
+ "dictionaryIndex": 0,
+ "dictionaryPriority": 0,
+ "id": 29,
+ "score": 1,
+ "frequencyOrder": 0,
+ "sequences": [
+ 21
+ ],
+ "isPrimary": true,
+ "tags": [
+ {
+ "name": "n",
+ "category": "partOfSpeech",
+ "order": 0,
+ "score": 0,
+ "content": [
+ "noun"
+ ],
+ "dictionaries": [
+ "Test Dictionary 2"
+ ],
+ "redundant": false
+ }
+ ],
+ "entries": [
+ "USB definition"
+ ]
+ }
+ ],
+ "pronunciations": [],
+ "frequencies": []
+ }
+ ]
+ },
+ {
+ "name": "Test text preprocessors - alphabeticToHiragana",
"originalTextLength": 4,
"dictionaryEntries": [
{
@@ -18656,11 +18818,11 @@
"dictionary": "Test Dictionary 2",
"dictionaryIndex": 0,
"dictionaryPriority": 0,
- "id": 29,
+ "id": 30,
"score": 1,
"frequencyOrder": 0,
"sequences": [
- 21
+ 22
],
"isPrimary": true,
"tags": [
diff --git a/test/japanese-util.test.js b/test/japanese-util.test.js
index bff51f85..63ac5cdf 100644
--- a/test/japanese-util.test.js
+++ b/test/japanese-util.test.js
@@ -178,17 +178,17 @@ describe('Japanese utility functions', () => {
});
});
- describe('convertNumericToFullWidth', () => {
+ describe('convertAlphanumericToFullWidth', () => {
/** @type {[string: string, expected: string][]} */
const data = [
['0123456789', '0123456789'],
- ['abcdefghij', 'abcdefghij'],
+ ['abcdefghij', 'abcdefghij'],
['カタカナ', 'カタカナ'],
['ひらがな', 'ひらがな']
];
test.each(data)('%s -> %o', (string, expected) => {
- expect(jp.convertNumericToFullWidth(string)).toStrictEqual(expected);
+ expect(jp.convertAlphanumericToFullWidth(string)).toStrictEqual(expected);
});
});
diff --git a/types/ext/language-descriptors.d.ts b/types/ext/language-descriptors.d.ts
index aa961a6c..69ccec44 100644
--- a/types/ext/language-descriptors.d.ts
+++ b/types/ext/language-descriptors.d.ts
@@ -118,8 +118,8 @@ type AllTextProcessors = {
ja: {
pre: {
convertHalfWidthCharacters: TextProcessor<boolean>;
- convertNumericCharacters: TextProcessor<boolean>;
- convertAlphabeticCharacters: TextProcessor<boolean>;
+ alphabeticToHiragana: TextProcessor<boolean>;
+ alphanumericWidthVariants: BidirectionalConversionPreprocessor;
convertHiraganaToKatakana: BidirectionalConversionPreprocessor;
collapseEmphaticSequences: TextProcessor<[collapseEmphatic: boolean, collapseEmphaticFull: boolean]>;
};
diff --git a/types/ext/settings.d.ts b/types/ext/settings.d.ts
index 48a66728..b69679b0 100644
--- a/types/ext/settings.d.ts
+++ b/types/ext/settings.d.ts
@@ -231,7 +231,7 @@ export type ScanningPreventMiddleMouseOptions = {
export type TranslationOptions = {
convertHalfWidthCharacters: TranslationConvertType;
convertNumericCharacters: TranslationConvertType;
- convertAlphabeticCharacters: TranslationConvertType;
+ alphabeticToHiragana: TranslationConvertType;
convertHiraganaToKatakana: TranslationConvertType;
convertKatakanaToHiragana: TranslationConvertType;
collapseEmphaticSequences: TranslationCollapseEmphaticSequences;