diff options
author | StefanVukovic99 <stefanvukovic44@gmail.com> | 2024-05-22 22:45:39 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-05-22 20:45:39 +0000 |
commit | d19b898792bffed8ab2d5724472e5b65a5f5b146 (patch) | |
tree | b3e0d5111d748dfcc5d74d9dbf68e79193fa6a7f | |
parent | 125cde3d98c18b08e71e075b4a9776fc7bd4b4a0 (diff) |
[ja] add preprocessor for width of alphabetic characters (#964)
* add japanese text preprocessor for variants in width of alphabetic characters
* try combining with numeric to improve performance
* Update ext/js/language/ja/japanese.js
Co-authored-by: Kuuuube <61125188+Kuuuube@users.noreply.github.com>
Signed-off-by: StefanVukovic99 <stefanvukovic44@gmail.com>
* Update ext/js/language/ja/japanese.js
Co-authored-by: Kuuuube <61125188+Kuuuube@users.noreply.github.com>
Signed-off-by: StefanVukovic99 <stefanvukovic44@gmail.com>
* fix tests
---------
Signed-off-by: StefanVukovic99 <stefanvukovic44@gmail.com>
Co-authored-by: Kuuuube <61125188+Kuuuube@users.noreply.github.com>
-rw-r--r-- | ext/js/language/ja/japanese-text-preprocessors.js | 31 | ||||
-rw-r--r-- | ext/js/language/ja/japanese.js | 32 | ||||
-rw-r--r-- | ext/js/language/language-descriptors.js | 12 | ||||
-rw-r--r-- | test/data/anki-note-builder-test-results.json | 94 | ||||
-rw-r--r-- | test/data/database-test-cases.json | 6 | ||||
-rw-r--r-- | test/data/dictionaries/valid-dictionary1/term_bank_1.json | 3 | ||||
-rw-r--r-- | test/data/translator-test-inputs.json | 28 | ||||
-rw-r--r-- | test/data/translator-test-results-note-data1.json | 234 | ||||
-rw-r--r-- | test/data/translator-test-results.json | 168 | ||||
-rw-r--r-- | test/japanese-util.test.js | 6 | ||||
-rw-r--r-- | types/ext/language-descriptors.d.ts | 4 | ||||
-rw-r--r-- | types/ext/settings.d.ts | 2 |
12 files changed, 585 insertions, 35 deletions
diff --git a/ext/js/language/ja/japanese-text-preprocessors.js b/ext/js/language/ja/japanese-text-preprocessors.js index b3d50817..32e45c83 100644 --- a/ext/js/language/ja/japanese-text-preprocessors.js +++ b/ext/js/language/ja/japanese-text-preprocessors.js @@ -19,10 +19,11 @@ import {basicTextProcessorOptions} from '../text-processors.js'; import {convertAlphabeticToKana} from './japanese-wanakana.js'; import { collapseEmphaticSequences as collapseEmphaticSequencesFunction, + convertAlphanumericToFullWidth, + convertFullWidthAlphanumericToNormal, convertHalfWidthKanaToFullWidth, convertHiraganaToKatakana as convertHiraganaToKatakanaFunction, - convertKatakanaToHiragana as convertKatakanaToHiraganaFunction, - convertNumericToFullWidth + convertKatakanaToHiragana as convertKatakanaToHiraganaFunction } from './japanese.js'; /** @type {import('language').TextProcessor<boolean>} */ @@ -33,16 +34,9 @@ export const convertHalfWidthCharacters = { process: (str, setting) => (setting ? convertHalfWidthKanaToFullWidth(str) : str) }; -/** @type {import('language').TextProcessor<boolean>} */ -export const convertNumericCharacters = { - name: 'Convert numeric characters to full width', - description: '1234 → 1234', - options: basicTextProcessorOptions, - process: (str, setting) => (setting ? convertNumericToFullWidth(str) : str) -}; /** @type {import('language').TextProcessor<boolean>} */ -export const convertAlphabeticCharacters = { +export const alphabeticToHiragana = { name: 'Convert alphabetic characters to hiragana', description: 'yomichan → よみちゃん', options: basicTextProcessorOptions, @@ -50,6 +44,23 @@ export const convertAlphabeticCharacters = { }; /** @type {import('language').BidirectionalConversionPreprocessor} */ +export const alphanumericWidthVariants = { + name: 'Convert between alphabetic width variants', + description: 'yomitan → yomitan and vice versa', + options: ['off', 'direct', 'inverse'], + process: (str, setting) => { + switch (setting) { + case 'off': + return str; + case 'direct': + return convertFullWidthAlphanumericToNormal(str); + case 'inverse': + return convertAlphanumericToFullWidth(str); + } + } +}; + +/** @type {import('language').BidirectionalConversionPreprocessor} */ export const convertHiraganaToKatakana = { name: 'Convert hiragana to katakana', description: 'よみちゃん → ヨミチャン and vice versa', diff --git a/ext/js/language/ja/japanese.js b/ext/js/language/ja/japanese.js index 3507e5df..2200e077 100644 --- a/ext/js/language/ja/japanese.js +++ b/ext/js/language/ja/japanese.js @@ -15,6 +15,7 @@ * along with this program. If not, see <https://www.gnu.org/licenses/>. */ + const HIRAGANA_SMALL_TSU_CODE_POINT = 0x3063; const KATAKANA_SMALL_TSU_CODE_POINT = 0x30c3; const KATAKANA_SMALL_KA_CODE_POINT = 0x30f5; @@ -523,16 +524,39 @@ export function convertHiraganaToKatakana(text) { * @param {string} text * @returns {string} */ -export function convertNumericToFullWidth(text) { +export function convertAlphanumericToFullWidth(text) { let result = ''; for (const char of text) { let c = /** @type {number} */ (char.codePointAt(0)); if (c >= 0x30 && c <= 0x39) { // ['0', '9'] c += 0xff10 - 0x30; // 0xff10 = '0' full width - result += String.fromCodePoint(c); - } else { - result += char; + } else if (c >= 0x41 && c <= 0x5a) { // ['A', 'Z'] + c += 0xff21 - 0x41; // 0xff21 = 'A' full width + } else if (c >= 0x61 && c <= 0x7a) { // ['a', 'z'] + c += 0xff41 - 0x61; // 0xff41 = 'a' full width + } + result += String.fromCodePoint(c); + } + return result; +} + +/** + * @param {string} text + * @returns {string} + */ +export function convertFullWidthAlphanumericToNormal(text) { + let result = ''; + const length = text.length; + for (let i = 0; i < length; i++) { + let c = /** @type {number} */ (text[i].codePointAt(0)); + if (c >= 0xff10 && c <= 0xff19) { // ['0', '9'] + c -= 0xff10 - 0x30; // 0x30 = '0' + } else if (c >= 0xff21 && c <= 0xff3a) { // ['A', 'Z'] + c -= 0xff21 - 0x41; // 0x41 = 'A' + } else if (c >= 0xff41 && c <= 0xff5a) { // ['a', 'z'] + c -= 0xff41 - 0x61; // 0x61 = 'a' } + result += String.fromCodePoint(c); } return result; } diff --git a/ext/js/language/language-descriptors.js b/ext/js/language/language-descriptors.js index 726842f1..baf53f81 100644 --- a/ext/js/language/language-descriptors.js +++ b/ext/js/language/language-descriptors.js @@ -19,7 +19,13 @@ import {removeArabicScriptDiacritics} from './ar/arabic-text-preprocessors.js'; import {eszettPreprocessor} from './de/german-text-preprocessors.js'; import {germanTransforms} from './de/german-transforms.js'; import {englishTransforms} from './en/english-transforms.js'; -import {collapseEmphaticSequences, convertAlphabeticCharacters, convertHalfWidthCharacters, convertHiraganaToKatakana, convertNumericCharacters} from './ja/japanese-text-preprocessors.js'; +import { + alphabeticToHiragana, + alphanumericWidthVariants, + collapseEmphaticSequences, + convertHalfWidthCharacters, + convertHiraganaToKatakana +} from './ja/japanese-text-preprocessors.js'; import {japaneseTransforms} from './ja/japanese-transforms.js'; import {isStringPartiallyJapanese} from './ja/japanese.js'; import {disassembleHangul, reassembleHangul} from './ko/korean-text-processors.js'; @@ -143,8 +149,8 @@ const languageDescriptors = [ isTextLookupWorthy: isStringPartiallyJapanese, textPreprocessors: { convertHalfWidthCharacters, - convertNumericCharacters, - convertAlphabeticCharacters, + alphabeticToHiragana, + alphanumericWidthVariants, convertHiraganaToKatakana, collapseEmphaticSequences }, diff --git a/test/data/anki-note-builder-test-results.json b/test/data/anki-note-builder-test-results.json index 1ec8d4cd..b4badf10 100644 --- a/test/data/anki-note-builder-test-results.json +++ b/test/data/anki-note-builder-test-results.json @@ -3874,7 +3874,99 @@ ] }, { - "name": "Test text preprocessors - convertAlphabeticCharacters", + "name": "Test text preprocessors - alphabeticWidthVariants 1", + "results": [ + { + "audio": "", + "clipboard-image": "", + "clipboard-text": "", + "cloze-body": "English", + "cloze-body-kana": "English", + "cloze-prefix": "cloze-prefix", + "cloze-suffix": "cloze-suffix", + "conjugation": "", + "dictionary": "Test Dictionary 2", + "document-title": "title", + "expression": "English", + "frequencies": "", + "frequency-harmonic-rank": "9999999", + "frequency-harmonic-occurrence": "0", + "frequency-average-rank": "9999999", + "frequency-average-occurrence": "0", + "furigana": "English", + "furigana-plain": "English", + "glossary": "<div style=\"text-align: left;\"><i>(n, Test Dictionary 2)</i> English definition</div>", + "glossary-brief": "<div style=\"text-align: left;\">English definition</div>", + "glossary-no-dictionary": "<div style=\"text-align: left;\"><i>(n)</i> English definition</div>", + "glossary-first": "<div style=\"text-align: left;\"><i>(n, Test Dictionary 2)</i> English definition</div>", + "glossary-first-brief": "<div style=\"text-align: left;\">English definition</div>", + "glossary-first-no-dictionary": "<div style=\"text-align: left;\"><i>(n)</i> English definition</div>", + "part-of-speech": "Noun", + "pitch-accents": "", + "pitch-accent-graphs": "", + "pitch-accent-graphs-jj": "", + "pitch-accent-positions": "", + "pitch-accent-categories": "", + "phonetic-transcriptions": "", + "reading": "English", + "screenshot": "", + "search-query": "fullQuery", + "selection-text": "", + "sentence": "cloze-prefixEnglishcloze-suffix", + "sentence-furigana": "cloze-prefixEnglishcloze-suffix", + "tags": "n", + "url": "<a href=\"url:\">url:</a>" + } + ] + }, + { + "name": "Test text preprocessors - alphabeticWidthVariants 2", + "results": [ + { + "audio": "", + "clipboard-image": "", + "clipboard-text": "", + "cloze-body": "USB", + "cloze-body-kana": "USB", + "cloze-prefix": "cloze-prefix", + "cloze-suffix": "cloze-suffix", + "conjugation": "", + "dictionary": "Test Dictionary 2", + "document-title": "title", + "expression": "USB", + "frequencies": "", + "frequency-harmonic-rank": "9999999", + "frequency-harmonic-occurrence": "0", + "frequency-average-rank": "9999999", + "frequency-average-occurrence": "0", + "furigana": "<ruby>USB<rt>ユーエスビー</rt></ruby>", + "furigana-plain": "USB[ユーエスビー]", + "glossary": "<div style=\"text-align: left;\"><i>(n, Test Dictionary 2)</i> USB definition</div>", + "glossary-brief": "<div style=\"text-align: left;\">USB definition</div>", + "glossary-no-dictionary": "<div style=\"text-align: left;\"><i>(n)</i> USB definition</div>", + "glossary-first": "<div style=\"text-align: left;\"><i>(n, Test Dictionary 2)</i> USB definition</div>", + "glossary-first-brief": "<div style=\"text-align: left;\">USB definition</div>", + "glossary-first-no-dictionary": "<div style=\"text-align: left;\"><i>(n)</i> USB definition</div>", + "part-of-speech": "Noun", + "pitch-accents": "", + "pitch-accent-graphs": "", + "pitch-accent-graphs-jj": "", + "pitch-accent-positions": "", + "pitch-accent-categories": "", + "phonetic-transcriptions": "", + "reading": "ユーエスビー", + "screenshot": "", + "search-query": "fullQuery", + "selection-text": "", + "sentence": "cloze-prefixUSBcloze-suffix", + "sentence-furigana": "cloze-prefixUSBcloze-suffix", + "tags": "n", + "url": "<a href=\"url:\">url:</a>" + } + ] + }, + { + "name": "Test text preprocessors - alphabeticToHiragana", "results": [ { "audio": "", diff --git a/test/data/database-test-cases.json b/test/data/database-test-cases.json index 054ede34..04b98aa9 100644 --- a/test/data/database-test-cases.json +++ b/test/data/database-test-cases.json @@ -27,7 +27,7 @@ "ipa": 1 }, "terms": { - "total": 30 + "total": 31 } } }, @@ -36,7 +36,7 @@ { "kanji": 2, "kanjiMeta": 6, - "terms": 30, + "terms": 31, "termMeta": 39, "tagMeta": 15, "media": 6 @@ -45,7 +45,7 @@ "total": { "kanji": 2, "kanjiMeta": 6, - "terms": 30, + "terms": 31, "termMeta": 39, "tagMeta": 15, "media": 6 diff --git a/test/data/dictionaries/valid-dictionary1/term_bank_1.json b/test/data/dictionaries/valid-dictionary1/term_bank_1.json index a362de23..0a635b84 100644 --- a/test/data/dictionaries/valid-dictionary1/term_bank_1.json +++ b/test/data/dictionaries/valid-dictionary1/term_bank_1.json @@ -344,5 +344,6 @@ ["凄い", "すごい", "adj-i", "adj-i", 1, ["sugoi definition"], 18, ""], ["English", "", "n", "n", 1, ["English definition"], 19, ""], ["language", "", "n", "n", 1, ["language definition"], 20, ""], - ["마시다", "", "v", "v", 1, ["masida definition"], 21, ""] + ["USB", "ユーエスビー", "n", "n", 1, ["USB definition"], 21, ""], + ["마시다", "", "v", "v", 1, ["masida definition"], 22, ""] ] diff --git a/test/data/translator-test-inputs.json b/test/data/translator-test-inputs.json index 9f508a1d..ed028273 100644 --- a/test/data/translator-test-inputs.json +++ b/test/data/translator-test-inputs.json @@ -369,7 +369,33 @@ ] }, { - "name": "Test text preprocessors - convertAlphabeticCharacters", + "name": "Test text preprocessors - alphabeticWidthVariants 1", + "func": "findTerms", + "mode": "split", + "text": "English", + "options": [ + "default", + { + "type": "terms", + "removeNonJapaneseCharacters": false + } + ] + }, + { + "name": "Test text preprocessors - alphabeticWidthVariants 2", + "func": "findTerms", + "mode": "split", + "text": "USB", + "options": [ + "default", + { + "type": "terms", + "removeNonJapaneseCharacters": false + } + ] + }, + { + "name": "Test text preprocessors - alphabeticToHiragana", "func": "findTerms", "mode": "split", "text": "utsu", diff --git a/test/data/translator-test-results-note-data1.json b/test/data/translator-test-results-note-data1.json index 0782c8f5..11c9d078 100644 --- a/test/data/translator-test-results-note-data1.json +++ b/test/data/translator-test-results-note-data1.json @@ -30533,7 +30533,235 @@ ] }, { - "name": "Test text preprocessors - convertAlphabeticCharacters", + "name": "Test text preprocessors - alphabeticWidthVariants 1", + "noteDataList": [ + { + "marker": "{marker}", + "definition": { + "type": "term", + "id": 27, + "source": "English", + "rawSource": "English", + "sourceTerm": "English", + "inflectionRuleChainCandidates": [ + { + "source": "algorithm", + "inflectionRules": [] + } + ], + "score": 1, + "isPrimary": true, + "sequence": 19, + "dictionary": "Test Dictionary 2", + "dictionaryOrder": { + "index": 0, + "priority": 0 + }, + "dictionaryNames": [ + "Test Dictionary 2" + ], + "expression": "English", + "reading": "English", + "expressions": [ + { + "sourceTerm": "English", + "expression": "English", + "reading": "English", + "termTags": [], + "frequencies": [], + "pitches": [], + "furiganaSegments": [ + { + "text": "English", + "furigana": "" + } + ], + "termFrequency": "normal", + "wordClasses": [ + "n" + ] + } + ], + "glossary": [ + "English definition" + ], + "definitionTags": [ + { + "name": "n", + "category": "partOfSpeech", + "notes": "noun", + "order": 0, + "score": 0, + "dictionary": "Test Dictionary 2", + "redundant": false + } + ], + "termTags": [], + "frequencies": [], + "frequencyHarmonic": -1, + "frequencyAverage": -1, + "pitches": [], + "phoneticTranscriptions": [], + "sourceTermExactMatchCount": 1, + "url": "url:", + "cloze": { + "sentence": "", + "prefix": "", + "body": "", + "bodyKana": "", + "suffix": "" + }, + "furiganaSegments": [ + { + "text": "English", + "furigana": "" + } + ] + }, + "glossaryLayoutMode": "default", + "compactTags": false, + "group": false, + "merge": false, + "modeTermKanji": false, + "modeTermKana": false, + "modeKanji": false, + "compactGlossaries": false, + "uniqueExpressions": [ + "English" + ], + "uniqueReadings": [ + "English" + ], + "pitches": [], + "pitchCount": 0, + "phoneticTranscriptions": [], + "context": { + "query": "query", + "fullQuery": "fullQuery", + "document": { + "title": "title" + } + }, + "media": {} + } + ] + }, + { + "name": "Test text preprocessors - alphabeticWidthVariants 2", + "noteDataList": [ + { + "marker": "{marker}", + "definition": { + "type": "term", + "id": 29, + "source": "USB", + "rawSource": "USB", + "sourceTerm": "USB", + "inflectionRuleChainCandidates": [ + { + "source": "algorithm", + "inflectionRules": [] + } + ], + "score": 1, + "isPrimary": true, + "sequence": 21, + "dictionary": "Test Dictionary 2", + "dictionaryOrder": { + "index": 0, + "priority": 0 + }, + "dictionaryNames": [ + "Test Dictionary 2" + ], + "expression": "USB", + "reading": "ユーエスビー", + "expressions": [ + { + "sourceTerm": "USB", + "expression": "USB", + "reading": "ユーエスビー", + "termTags": [], + "frequencies": [], + "pitches": [], + "furiganaSegments": [ + { + "text": "USB", + "furigana": "ユーエスビー" + } + ], + "termFrequency": "normal", + "wordClasses": [ + "n" + ] + } + ], + "glossary": [ + "USB definition" + ], + "definitionTags": [ + { + "name": "n", + "category": "partOfSpeech", + "notes": "noun", + "order": 0, + "score": 0, + "dictionary": "Test Dictionary 2", + "redundant": false + } + ], + "termTags": [], + "frequencies": [], + "frequencyHarmonic": -1, + "frequencyAverage": -1, + "pitches": [], + "phoneticTranscriptions": [], + "sourceTermExactMatchCount": 1, + "url": "url:", + "cloze": { + "sentence": "", + "prefix": "", + "body": "", + "bodyKana": "", + "suffix": "" + }, + "furiganaSegments": [ + { + "text": "USB", + "furigana": "ユーエスビー" + } + ] + }, + "glossaryLayoutMode": "default", + "compactTags": false, + "group": false, + "merge": false, + "modeTermKanji": false, + "modeTermKana": false, + "modeKanji": false, + "compactGlossaries": false, + "uniqueExpressions": [ + "USB" + ], + "uniqueReadings": [ + "ユーエスビー" + ], + "pitches": [], + "pitchCount": 0, + "phoneticTranscriptions": [], + "context": { + "query": "query", + "fullQuery": "fullQuery", + "document": { + "title": "title" + } + }, + "media": {} + } + ] + }, + { + "name": "Test text preprocessors - alphabeticToHiragana", "noteDataList": [ { "marker": "{marker}", @@ -32948,7 +33176,7 @@ "marker": "{marker}", "definition": { "type": "term", - "id": 29, + "id": 30, "source": "ㅁㅏㅅㅣㄱㅓㄴㅏ", "rawSource": "마시거나", "sourceTerm": "마시다", @@ -32962,7 +33190,7 @@ ], "score": 1, "isPrimary": true, - "sequence": 21, + "sequence": 22, "dictionary": "Test Dictionary 2", "dictionaryOrder": { "index": 0, diff --git a/test/data/translator-test-results.json b/test/data/translator-test-results.json index bc26e0f9..ba251347 100644 --- a/test/data/translator-test-results.json +++ b/test/data/translator-test-results.json @@ -17211,7 +17211,169 @@ ] }, { - "name": "Test text preprocessors - convertAlphabeticCharacters", + "name": "Test text preprocessors - alphabeticWidthVariants 1", + "originalTextLength": 7, + "dictionaryEntries": [ + { + "type": "term", + "isPrimary": true, + "inflectionRuleChainCandidates": [ + { + "source": "algorithm", + "inflectionRules": [] + } + ], + "score": 1, + "frequencyOrder": 0, + "dictionaryIndex": 0, + "dictionaryPriority": 0, + "sourceTermExactMatchCount": 1, + "maxOriginalTextLength": 7, + "headwords": [ + { + "index": 0, + "term": "English", + "reading": "English", + "sources": [ + { + "originalText": "English", + "transformedText": "English", + "deinflectedText": "English", + "matchType": "exact", + "matchSource": "term", + "isPrimary": true + } + ], + "tags": [], + "wordClasses": [ + "n" + ] + } + ], + "definitions": [ + { + "index": 0, + "headwordIndices": [ + 0 + ], + "dictionary": "Test Dictionary 2", + "dictionaryIndex": 0, + "dictionaryPriority": 0, + "id": 27, + "score": 1, + "frequencyOrder": 0, + "sequences": [ + 19 + ], + "isPrimary": true, + "tags": [ + { + "name": "n", + "category": "partOfSpeech", + "order": 0, + "score": 0, + "content": [ + "noun" + ], + "dictionaries": [ + "Test Dictionary 2" + ], + "redundant": false + } + ], + "entries": [ + "English definition" + ] + } + ], + "pronunciations": [], + "frequencies": [] + } + ] + }, + { + "name": "Test text preprocessors - alphabeticWidthVariants 2", + "originalTextLength": 3, + "dictionaryEntries": [ + { + "type": "term", + "isPrimary": true, + "inflectionRuleChainCandidates": [ + { + "source": "algorithm", + "inflectionRules": [] + } + ], + "score": 1, + "frequencyOrder": 0, + "dictionaryIndex": 0, + "dictionaryPriority": 0, + "sourceTermExactMatchCount": 1, + "maxOriginalTextLength": 3, + "headwords": [ + { + "index": 0, + "term": "USB", + "reading": "ユーエスビー", + "sources": [ + { + "originalText": "USB", + "transformedText": "USB", + "deinflectedText": "USB", + "matchType": "exact", + "matchSource": "term", + "isPrimary": true + } + ], + "tags": [], + "wordClasses": [ + "n" + ] + } + ], + "definitions": [ + { + "index": 0, + "headwordIndices": [ + 0 + ], + "dictionary": "Test Dictionary 2", + "dictionaryIndex": 0, + "dictionaryPriority": 0, + "id": 29, + "score": 1, + "frequencyOrder": 0, + "sequences": [ + 21 + ], + "isPrimary": true, + "tags": [ + { + "name": "n", + "category": "partOfSpeech", + "order": 0, + "score": 0, + "content": [ + "noun" + ], + "dictionaries": [ + "Test Dictionary 2" + ], + "redundant": false + } + ], + "entries": [ + "USB definition" + ] + } + ], + "pronunciations": [], + "frequencies": [] + } + ] + }, + { + "name": "Test text preprocessors - alphabeticToHiragana", "originalTextLength": 4, "dictionaryEntries": [ { @@ -18656,11 +18818,11 @@ "dictionary": "Test Dictionary 2", "dictionaryIndex": 0, "dictionaryPriority": 0, - "id": 29, + "id": 30, "score": 1, "frequencyOrder": 0, "sequences": [ - 21 + 22 ], "isPrimary": true, "tags": [ diff --git a/test/japanese-util.test.js b/test/japanese-util.test.js index bff51f85..63ac5cdf 100644 --- a/test/japanese-util.test.js +++ b/test/japanese-util.test.js @@ -178,17 +178,17 @@ describe('Japanese utility functions', () => { }); }); - describe('convertNumericToFullWidth', () => { + describe('convertAlphanumericToFullWidth', () => { /** @type {[string: string, expected: string][]} */ const data = [ ['0123456789', '0123456789'], - ['abcdefghij', 'abcdefghij'], + ['abcdefghij', 'abcdefghij'], ['カタカナ', 'カタカナ'], ['ひらがな', 'ひらがな'] ]; test.each(data)('%s -> %o', (string, expected) => { - expect(jp.convertNumericToFullWidth(string)).toStrictEqual(expected); + expect(jp.convertAlphanumericToFullWidth(string)).toStrictEqual(expected); }); }); diff --git a/types/ext/language-descriptors.d.ts b/types/ext/language-descriptors.d.ts index aa961a6c..69ccec44 100644 --- a/types/ext/language-descriptors.d.ts +++ b/types/ext/language-descriptors.d.ts @@ -118,8 +118,8 @@ type AllTextProcessors = { ja: { pre: { convertHalfWidthCharacters: TextProcessor<boolean>; - convertNumericCharacters: TextProcessor<boolean>; - convertAlphabeticCharacters: TextProcessor<boolean>; + alphabeticToHiragana: TextProcessor<boolean>; + alphanumericWidthVariants: BidirectionalConversionPreprocessor; convertHiraganaToKatakana: BidirectionalConversionPreprocessor; collapseEmphaticSequences: TextProcessor<[collapseEmphatic: boolean, collapseEmphaticFull: boolean]>; }; diff --git a/types/ext/settings.d.ts b/types/ext/settings.d.ts index 48a66728..b69679b0 100644 --- a/types/ext/settings.d.ts +++ b/types/ext/settings.d.ts @@ -231,7 +231,7 @@ export type ScanningPreventMiddleMouseOptions = { export type TranslationOptions = { convertHalfWidthCharacters: TranslationConvertType; convertNumericCharacters: TranslationConvertType; - convertAlphabeticCharacters: TranslationConvertType; + alphabeticToHiragana: TranslationConvertType; convertHiraganaToKatakana: TranslationConvertType; convertKatakanaToHiragana: TranslationConvertType; collapseEmphaticSequences: TranslationCollapseEmphaticSequences; |