diff options
Diffstat (limited to 'ext')
| -rw-r--r-- | ext/js/language/ja/japanese-text-preprocessors.js | 31 | ||||
| -rw-r--r-- | ext/js/language/ja/japanese.js | 32 | ||||
| -rw-r--r-- | ext/js/language/language-descriptors.js | 12 | 
3 files changed, 58 insertions, 17 deletions
| diff --git a/ext/js/language/ja/japanese-text-preprocessors.js b/ext/js/language/ja/japanese-text-preprocessors.js index b3d50817..32e45c83 100644 --- a/ext/js/language/ja/japanese-text-preprocessors.js +++ b/ext/js/language/ja/japanese-text-preprocessors.js @@ -19,10 +19,11 @@ import {basicTextProcessorOptions} from '../text-processors.js';  import {convertAlphabeticToKana} from './japanese-wanakana.js';  import {      collapseEmphaticSequences as collapseEmphaticSequencesFunction, +    convertAlphanumericToFullWidth, +    convertFullWidthAlphanumericToNormal,      convertHalfWidthKanaToFullWidth,      convertHiraganaToKatakana as convertHiraganaToKatakanaFunction, -    convertKatakanaToHiragana as convertKatakanaToHiraganaFunction, -    convertNumericToFullWidth +    convertKatakanaToHiragana as convertKatakanaToHiraganaFunction  } from './japanese.js';  /** @type {import('language').TextProcessor<boolean>} */ @@ -33,16 +34,9 @@ export const convertHalfWidthCharacters = {      process: (str, setting) => (setting ? convertHalfWidthKanaToFullWidth(str) : str)  }; -/** @type {import('language').TextProcessor<boolean>} */ -export const convertNumericCharacters = { -    name: 'Convert numeric characters to full width', -    description: '1234 → 1234', -    options: basicTextProcessorOptions, -    process: (str, setting) => (setting ? convertNumericToFullWidth(str) : str) -};  /** @type {import('language').TextProcessor<boolean>} */ -export const convertAlphabeticCharacters = { +export const alphabeticToHiragana = {      name: 'Convert alphabetic characters to hiragana',      description: 'yomichan → よみちゃん',      options: basicTextProcessorOptions, @@ -50,6 +44,23 @@ export const convertAlphabeticCharacters = {  };  /** @type {import('language').BidirectionalConversionPreprocessor} */ +export const alphanumericWidthVariants = { +    name: 'Convert between alphabetic width variants', +    description: 'yomitan → yomitan and vice versa', +    options: ['off', 'direct', 'inverse'], +    process: (str, setting) => { +        switch (setting) { +            case 'off': +                return str; +            case 'direct': +                return convertFullWidthAlphanumericToNormal(str); +            case 'inverse': +                return convertAlphanumericToFullWidth(str); +        } +    } +}; + +/** @type {import('language').BidirectionalConversionPreprocessor} */  export const convertHiraganaToKatakana = {      name: 'Convert hiragana to katakana',      description: 'よみちゃん → ヨミチャン and vice versa', diff --git a/ext/js/language/ja/japanese.js b/ext/js/language/ja/japanese.js index 3507e5df..2200e077 100644 --- a/ext/js/language/ja/japanese.js +++ b/ext/js/language/ja/japanese.js @@ -15,6 +15,7 @@   * along with this program.  If not, see <https://www.gnu.org/licenses/>.   */ +  const HIRAGANA_SMALL_TSU_CODE_POINT = 0x3063;  const KATAKANA_SMALL_TSU_CODE_POINT = 0x30c3;  const KATAKANA_SMALL_KA_CODE_POINT = 0x30f5; @@ -523,16 +524,39 @@ export function convertHiraganaToKatakana(text) {   * @param {string} text   * @returns {string}   */ -export function convertNumericToFullWidth(text) { +export function convertAlphanumericToFullWidth(text) {      let result = '';      for (const char of text) {          let c = /** @type {number} */ (char.codePointAt(0));          if (c >= 0x30 && c <= 0x39) { // ['0', '9']              c += 0xff10 - 0x30; // 0xff10 = '0' full width -            result += String.fromCodePoint(c); -        } else { -            result += char; +        } else if (c >= 0x41 && c <= 0x5a) { // ['A', 'Z'] +            c += 0xff21 - 0x41; // 0xff21 = 'A' full width +        } else if (c >= 0x61 && c <= 0x7a) { // ['a', 'z'] +            c += 0xff41 - 0x61; // 0xff41 = 'a' full width +        } +        result += String.fromCodePoint(c); +    } +    return result; +} + +/** + * @param {string} text + * @returns {string} + */ +export function convertFullWidthAlphanumericToNormal(text) { +    let result = ''; +    const length = text.length; +    for (let i = 0; i < length; i++) { +        let c = /** @type {number} */ (text[i].codePointAt(0)); +        if (c >= 0xff10 && c <= 0xff19) { // ['0', '9'] +            c -= 0xff10 - 0x30; // 0x30 = '0' +        } else if (c >= 0xff21 && c <= 0xff3a) { // ['A', 'Z'] +            c -= 0xff21 - 0x41; // 0x41 = 'A' +        } else if (c >= 0xff41 && c <= 0xff5a) { // ['a', 'z'] +            c -= 0xff41 - 0x61; // 0x61 = 'a'          } +        result += String.fromCodePoint(c);      }      return result;  } diff --git a/ext/js/language/language-descriptors.js b/ext/js/language/language-descriptors.js index 726842f1..baf53f81 100644 --- a/ext/js/language/language-descriptors.js +++ b/ext/js/language/language-descriptors.js @@ -19,7 +19,13 @@ import {removeArabicScriptDiacritics} from './ar/arabic-text-preprocessors.js';  import {eszettPreprocessor} from './de/german-text-preprocessors.js';  import {germanTransforms} from './de/german-transforms.js';  import {englishTransforms} from './en/english-transforms.js'; -import {collapseEmphaticSequences, convertAlphabeticCharacters, convertHalfWidthCharacters, convertHiraganaToKatakana, convertNumericCharacters} from './ja/japanese-text-preprocessors.js'; +import { +    alphabeticToHiragana, +    alphanumericWidthVariants, +    collapseEmphaticSequences, +    convertHalfWidthCharacters, +    convertHiraganaToKatakana +} from './ja/japanese-text-preprocessors.js';  import {japaneseTransforms} from './ja/japanese-transforms.js';  import {isStringPartiallyJapanese} from './ja/japanese.js';  import {disassembleHangul, reassembleHangul} from './ko/korean-text-processors.js'; @@ -143,8 +149,8 @@ const languageDescriptors = [          isTextLookupWorthy: isStringPartiallyJapanese,          textPreprocessors: {              convertHalfWidthCharacters, -            convertNumericCharacters, -            convertAlphabeticCharacters, +            alphabeticToHiragana, +            alphanumericWidthVariants,              convertHiraganaToKatakana,              collapseEmphaticSequences          }, |