diff options
| author | toasted-nutbread <toasted-nutbread@users.noreply.github.com> | 2020-03-21 13:18:34 -0400 | 
|---|---|---|
| committer | toasted-nutbread <toasted-nutbread@users.noreply.github.com> | 2020-03-21 13:18:34 -0400 | 
| commit | 77a2cc60e9a4a89da354cadb1bf060204ee3b951 (patch) | |
| tree | 32c35aaee08a247e95e65e8459c779c4837d0f62 /ext | |
| parent | 487d4b239b88fd57fefb0ec3a9d1bd0d25984660 (diff) | |
Move basic string/character testing functions into a mixed/js/japanese.js
Diffstat (limited to 'ext')
| -rw-r--r-- | ext/bg/background.html | 1 | ||||
| -rw-r--r-- | ext/bg/js/japanese.js | 106 | ||||
| -rw-r--r-- | ext/bg/search.html | 1 | ||||
| -rw-r--r-- | ext/bg/settings.html | 1 | ||||
| -rw-r--r-- | ext/mixed/js/japanese.js | 124 | 
5 files changed, 134 insertions, 99 deletions
| diff --git a/ext/bg/background.html b/ext/bg/background.html index 44abe8fd..f7cf6e55 100644 --- a/ext/bg/background.html +++ b/ext/bg/background.html @@ -20,6 +20,7 @@          <script src="/mixed/js/core.js"></script>          <script src="/mixed/js/dom.js"></script> +        <script src="/mixed/js/japanese.js"></script>          <script src="/bg/js/anki.js"></script>          <script src="/bg/js/anki-note-builder.js"></script> diff --git a/ext/bg/js/japanese.js b/ext/bg/js/japanese.js index d2a577e6..c5873cf1 100644 --- a/ext/bg/js/japanese.js +++ b/ext/bg/js/japanese.js @@ -17,10 +17,11 @@   */  /* global + * jp   * wanakana   */ -const jp = (() => { +(() => {      const HALFWIDTH_KATAKANA_MAPPING = new Map([          ['ヲ', 'ヲヺ-'],          ['ァ', 'ァ--'], @@ -80,101 +81,13 @@ const jp = (() => {          ['ン', 'ン--']      ]); -    const HIRAGANA_RANGE = [0x3040, 0x309f]; -    const KATAKANA_RANGE = [0x30a0, 0x30ff]; -    const KANA_RANGES = [HIRAGANA_RANGE, KATAKANA_RANGE]; - -    const CJK_UNIFIED_IDEOGRAPHS_RANGE = [0x4e00, 0x9fff]; -    const CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A_RANGE = [0x3400, 0x4dbf]; -    const CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B_RANGE = [0x20000, 0x2a6df]; -    const CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C_RANGE = [0x2a700, 0x2b73f]; -    const CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D_RANGE = [0x2b740, 0x2b81f]; -    const CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E_RANGE = [0x2b820, 0x2ceaf]; -    const CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F_RANGE = [0x2ceb0, 0x2ebef]; -    const CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT_RANGE = [0x2f800, 0x2fa1f]; -    const CJK_UNIFIED_IDEOGRAPHS_RANGES = [ -        CJK_UNIFIED_IDEOGRAPHS_RANGE, -        CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A_RANGE, -        CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B_RANGE, -        CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C_RANGE, -        CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D_RANGE, -        CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E_RANGE, -        CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F_RANGE, -        CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT_RANGE -    ]; -      const ITERATION_MARK_CODE_POINT = 0x3005; -    // Japanese character ranges, roughly ordered in order of expected frequency -    const JAPANESE_RANGES = [ -        HIRAGANA_RANGE, -        KATAKANA_RANGE, - -        ...CJK_UNIFIED_IDEOGRAPHS_RANGES, - -        [0xff66, 0xff9f], // Halfwidth katakana - -        [0x30fb, 0x30fc], // Katakana punctuation -        [0xff61, 0xff65], // Kana punctuation -        [0x3000, 0x303f], // CJK punctuation - -        [0xff10, 0xff19], // Fullwidth numbers -        [0xff21, 0xff3a], // Fullwidth upper case Latin letters -        [0xff41, 0xff5a], // Fullwidth lower case Latin letters - -        [0xff01, 0xff0f], // Fullwidth punctuation 1 -        [0xff1a, 0xff1f], // Fullwidth punctuation 2 -        [0xff3b, 0xff3f], // Fullwidth punctuation 3 -        [0xff5b, 0xff60], // Fullwidth punctuation 4 -        [0xffe0, 0xffee]  // Currency markers -    ]; - - -    // Character code testing functions - -    function isCodePointKanji(codePoint) { -        return isCodePointInRanges(codePoint, CJK_UNIFIED_IDEOGRAPHS_RANGES); -    } - -    function isCodePointKana(codePoint) { -        return isCodePointInRanges(codePoint, KANA_RANGES); -    } - -    function isCodePointJapanese(codePoint) { -        return isCodePointInRanges(codePoint, JAPANESE_RANGES); -    } -    function isCodePointInRanges(codePoint, ranges) { -        for (const [min, max] of ranges) { -            if (codePoint >= min && codePoint <= max) { -                return true; -            } -        } -        return false; -    } +    // Existing functions - -    // String testing functions - -    function isStringEntirelyKana(str) { -        if (str.length === 0) { return false; } -        for (const c of str) { -            if (!isCodePointKana(c.codePointAt(0))) { -                return false; -            } -        } -        return true; -    } - -    function isStringPartiallyJapanese(str) { -        if (str.length === 0) { return false; } -        for (const c of str) { -            if (isCodePointJapanese(c.codePointAt(0))) { -                return true; -            } -        } -        return false; -    } +    const isCodePointKanji = jp.isCodePointKanji; +    const isStringEntirelyKana = jp.isStringEntirelyKana;      // Conversion functions @@ -469,12 +382,7 @@ const jp = (() => {      // Exports -    return { -        isCodePointKanji, -        isCodePointKana, -        isCodePointJapanese, -        isStringEntirelyKana, -        isStringPartiallyJapanese, +    Object.assign(jp, {          convertKatakanaToHiragana,          convertHiraganaToKatakana,          convertToRomaji, @@ -484,5 +392,5 @@ const jp = (() => {          convertAlphabeticToKana,          distributeFurigana,          distributeFuriganaInflected -    }; +    });  })(); diff --git a/ext/bg/search.html b/ext/bg/search.html index f4c1a737..eacc1893 100644 --- a/ext/bg/search.html +++ b/ext/bg/search.html @@ -74,6 +74,7 @@          <script src="/mixed/js/core.js"></script>          <script src="/mixed/js/dom.js"></script>          <script src="/mixed/js/api.js"></script> +        <script src="/mixed/js/japanese.js"></script>          <script src="/bg/js/dictionary.js"></script>          <script src="/bg/js/handlebars.js"></script> diff --git a/ext/bg/settings.html b/ext/bg/settings.html index 0db76d71..cfe20be4 100644 --- a/ext/bg/settings.html +++ b/ext/bg/settings.html @@ -1088,6 +1088,7 @@          <script src="/mixed/js/core.js"></script>          <script src="/mixed/js/dom.js"></script>          <script src="/mixed/js/api.js"></script> +        <script src="/mixed/js/japanese.js"></script>          <script src="/bg/js/anki.js"></script>          <script src="/bg/js/anki-note-builder.js"></script> diff --git a/ext/mixed/js/japanese.js b/ext/mixed/js/japanese.js new file mode 100644 index 00000000..61a247b2 --- /dev/null +++ b/ext/mixed/js/japanese.js @@ -0,0 +1,124 @@ +/* + * Copyright (C) 2020  Alex Yatskov <alex@foosoft.net> + * Author: Alex Yatskov <alex@foosoft.net> + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program.  If not, see <https://www.gnu.org/licenses/>. + */ + +const jp = (() => { +    const HIRAGANA_RANGE = [0x3040, 0x309f]; +    const KATAKANA_RANGE = [0x30a0, 0x30ff]; +    const KANA_RANGES = [HIRAGANA_RANGE, KATAKANA_RANGE]; + +    const CJK_UNIFIED_IDEOGRAPHS_RANGE = [0x4e00, 0x9fff]; +    const CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A_RANGE = [0x3400, 0x4dbf]; +    const CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B_RANGE = [0x20000, 0x2a6df]; +    const CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C_RANGE = [0x2a700, 0x2b73f]; +    const CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D_RANGE = [0x2b740, 0x2b81f]; +    const CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E_RANGE = [0x2b820, 0x2ceaf]; +    const CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F_RANGE = [0x2ceb0, 0x2ebef]; +    const CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT_RANGE = [0x2f800, 0x2fa1f]; +    const CJK_UNIFIED_IDEOGRAPHS_RANGES = [ +        CJK_UNIFIED_IDEOGRAPHS_RANGE, +        CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A_RANGE, +        CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B_RANGE, +        CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C_RANGE, +        CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D_RANGE, +        CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E_RANGE, +        CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F_RANGE, +        CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT_RANGE +    ]; + +    // Japanese character ranges, roughly ordered in order of expected frequency +    const JAPANESE_RANGES = [ +        HIRAGANA_RANGE, +        KATAKANA_RANGE, + +        ...CJK_UNIFIED_IDEOGRAPHS_RANGES, + +        [0xff66, 0xff9f], // Halfwidth katakana + +        [0x30fb, 0x30fc], // Katakana punctuation +        [0xff61, 0xff65], // Kana punctuation +        [0x3000, 0x303f], // CJK punctuation + +        [0xff10, 0xff19], // Fullwidth numbers +        [0xff21, 0xff3a], // Fullwidth upper case Latin letters +        [0xff41, 0xff5a], // Fullwidth lower case Latin letters + +        [0xff01, 0xff0f], // Fullwidth punctuation 1 +        [0xff1a, 0xff1f], // Fullwidth punctuation 2 +        [0xff3b, 0xff3f], // Fullwidth punctuation 3 +        [0xff5b, 0xff60], // Fullwidth punctuation 4 +        [0xffe0, 0xffee]  // Currency markers +    ]; + + +    // Character code testing functions + +    function isCodePointKanji(codePoint) { +        return isCodePointInRanges(codePoint, CJK_UNIFIED_IDEOGRAPHS_RANGES); +    } + +    function isCodePointKana(codePoint) { +        return isCodePointInRanges(codePoint, KANA_RANGES); +    } + +    function isCodePointJapanese(codePoint) { +        return isCodePointInRanges(codePoint, JAPANESE_RANGES); +    } + +    function isCodePointInRanges(codePoint, ranges) { +        for (const [min, max] of ranges) { +            if (codePoint >= min && codePoint <= max) { +                return true; +            } +        } +        return false; +    } + + +    // String testing functions + +    function isStringEntirelyKana(str) { +        if (str.length === 0) { return false; } +        for (const c of str) { +            if (!isCodePointKana(c.codePointAt(0))) { +                return false; +            } +        } +        return true; +    } + +    function isStringPartiallyJapanese(str) { +        if (str.length === 0) { return false; } +        for (const c of str) { +            if (isCodePointJapanese(c.codePointAt(0))) { +                return true; +            } +        } +        return false; +    } + + +    // Exports + +    return { +        isCodePointKanji, +        isCodePointKana, +        isCodePointJapanese, +        isStringEntirelyKana, +        isStringPartiallyJapanese +    }; +})(); |