diff options
Diffstat (limited to 'ext/js')
-rw-r--r-- | ext/js/language/ja/japanese-text-preprocessors.js | 31 | ||||
-rw-r--r-- | ext/js/language/ja/japanese.js | 32 | ||||
-rw-r--r-- | ext/js/language/language-descriptors.js | 12 |
3 files changed, 58 insertions, 17 deletions
diff --git a/ext/js/language/ja/japanese-text-preprocessors.js b/ext/js/language/ja/japanese-text-preprocessors.js index b3d50817..32e45c83 100644 --- a/ext/js/language/ja/japanese-text-preprocessors.js +++ b/ext/js/language/ja/japanese-text-preprocessors.js @@ -19,10 +19,11 @@ import {basicTextProcessorOptions} from '../text-processors.js'; import {convertAlphabeticToKana} from './japanese-wanakana.js'; import { collapseEmphaticSequences as collapseEmphaticSequencesFunction, + convertAlphanumericToFullWidth, + convertFullWidthAlphanumericToNormal, convertHalfWidthKanaToFullWidth, convertHiraganaToKatakana as convertHiraganaToKatakanaFunction, - convertKatakanaToHiragana as convertKatakanaToHiraganaFunction, - convertNumericToFullWidth + convertKatakanaToHiragana as convertKatakanaToHiraganaFunction } from './japanese.js'; /** @type {import('language').TextProcessor<boolean>} */ @@ -33,16 +34,9 @@ export const convertHalfWidthCharacters = { process: (str, setting) => (setting ? convertHalfWidthKanaToFullWidth(str) : str) }; -/** @type {import('language').TextProcessor<boolean>} */ -export const convertNumericCharacters = { - name: 'Convert numeric characters to full width', - description: '1234 → 1234', - options: basicTextProcessorOptions, - process: (str, setting) => (setting ? convertNumericToFullWidth(str) : str) -}; /** @type {import('language').TextProcessor<boolean>} */ -export const convertAlphabeticCharacters = { +export const alphabeticToHiragana = { name: 'Convert alphabetic characters to hiragana', description: 'yomichan → よみちゃん', options: basicTextProcessorOptions, @@ -50,6 +44,23 @@ export const convertAlphabeticCharacters = { }; /** @type {import('language').BidirectionalConversionPreprocessor} */ +export const alphanumericWidthVariants = { + name: 'Convert between alphabetic width variants', + description: 'yomitan → yomitan and vice versa', + options: ['off', 'direct', 'inverse'], + process: (str, setting) => { + switch (setting) { + case 'off': + return str; + case 'direct': + return convertFullWidthAlphanumericToNormal(str); + case 'inverse': + return convertAlphanumericToFullWidth(str); + } + } +}; + +/** @type {import('language').BidirectionalConversionPreprocessor} */ export const convertHiraganaToKatakana = { name: 'Convert hiragana to katakana', description: 'よみちゃん → ヨミチャン and vice versa', diff --git a/ext/js/language/ja/japanese.js b/ext/js/language/ja/japanese.js index 3507e5df..2200e077 100644 --- a/ext/js/language/ja/japanese.js +++ b/ext/js/language/ja/japanese.js @@ -15,6 +15,7 @@ * along with this program. If not, see <https://www.gnu.org/licenses/>. */ + const HIRAGANA_SMALL_TSU_CODE_POINT = 0x3063; const KATAKANA_SMALL_TSU_CODE_POINT = 0x30c3; const KATAKANA_SMALL_KA_CODE_POINT = 0x30f5; @@ -523,16 +524,39 @@ export function convertHiraganaToKatakana(text) { * @param {string} text * @returns {string} */ -export function convertNumericToFullWidth(text) { +export function convertAlphanumericToFullWidth(text) { let result = ''; for (const char of text) { let c = /** @type {number} */ (char.codePointAt(0)); if (c >= 0x30 && c <= 0x39) { // ['0', '9'] c += 0xff10 - 0x30; // 0xff10 = '0' full width - result += String.fromCodePoint(c); - } else { - result += char; + } else if (c >= 0x41 && c <= 0x5a) { // ['A', 'Z'] + c += 0xff21 - 0x41; // 0xff21 = 'A' full width + } else if (c >= 0x61 && c <= 0x7a) { // ['a', 'z'] + c += 0xff41 - 0x61; // 0xff41 = 'a' full width + } + result += String.fromCodePoint(c); + } + return result; +} + +/** + * @param {string} text + * @returns {string} + */ +export function convertFullWidthAlphanumericToNormal(text) { + let result = ''; + const length = text.length; + for (let i = 0; i < length; i++) { + let c = /** @type {number} */ (text[i].codePointAt(0)); + if (c >= 0xff10 && c <= 0xff19) { // ['0', '9'] + c -= 0xff10 - 0x30; // 0x30 = '0' + } else if (c >= 0xff21 && c <= 0xff3a) { // ['A', 'Z'] + c -= 0xff21 - 0x41; // 0x41 = 'A' + } else if (c >= 0xff41 && c <= 0xff5a) { // ['a', 'z'] + c -= 0xff41 - 0x61; // 0x61 = 'a' } + result += String.fromCodePoint(c); } return result; } diff --git a/ext/js/language/language-descriptors.js b/ext/js/language/language-descriptors.js index 726842f1..baf53f81 100644 --- a/ext/js/language/language-descriptors.js +++ b/ext/js/language/language-descriptors.js @@ -19,7 +19,13 @@ import {removeArabicScriptDiacritics} from './ar/arabic-text-preprocessors.js'; import {eszettPreprocessor} from './de/german-text-preprocessors.js'; import {germanTransforms} from './de/german-transforms.js'; import {englishTransforms} from './en/english-transforms.js'; -import {collapseEmphaticSequences, convertAlphabeticCharacters, convertHalfWidthCharacters, convertHiraganaToKatakana, convertNumericCharacters} from './ja/japanese-text-preprocessors.js'; +import { + alphabeticToHiragana, + alphanumericWidthVariants, + collapseEmphaticSequences, + convertHalfWidthCharacters, + convertHiraganaToKatakana +} from './ja/japanese-text-preprocessors.js'; import {japaneseTransforms} from './ja/japanese-transforms.js'; import {isStringPartiallyJapanese} from './ja/japanese.js'; import {disassembleHangul, reassembleHangul} from './ko/korean-text-processors.js'; @@ -143,8 +149,8 @@ const languageDescriptors = [ isTextLookupWorthy: isStringPartiallyJapanese, textPreprocessors: { convertHalfWidthCharacters, - convertNumericCharacters, - convertAlphabeticCharacters, + alphabeticToHiragana, + alphanumericWidthVariants, convertHiraganaToKatakana, collapseEmphaticSequences }, |