diff options
Diffstat (limited to 'util')
-rw-r--r-- | util/array.ts | 14 | ||||
-rw-r--r-- | util/japanese.ts | 133 | ||||
-rw-r--r-- | util/number.ts | 11 | ||||
-rw-r--r-- | util/string.ts | 48 |
4 files changed, 192 insertions, 14 deletions
diff --git a/util/array.ts b/util/array.ts index f032935..c5a26c6 100644 --- a/util/array.ts +++ b/util/array.ts @@ -1,3 +1,5 @@ +import "./set.ts"; + declare global { interface Array<T> { /** @summary check if any of the elements of `arr2` are included in `this` */ @@ -8,6 +10,10 @@ declare global { set(): Set<T>; /** @summary clear array */ clear(): void; + /** @summary filter duplicates from array */ + filterDuplicates(): Array<T>; + /** @summary `true` if the array doesn't contain duplicate items */ + isUniq(): boolean; } } @@ -27,3 +33,11 @@ Array.prototype.clear = function() { while (this.length > 0) this.pop(); } +Array.prototype.filterDuplicates = function() { + return this.set().arr(); // TODO: optimize this +} + +Array.prototype.isUniq = function() { + return this.length == this.filterDuplicates().length; +} + diff --git a/util/japanese.ts b/util/japanese.ts index 2017280..d398b60 100644 --- a/util/japanese.ts +++ b/util/japanese.ts @@ -1,4 +1,5 @@ import { UnicodeRange } from "./string.ts"; +import "./number.ts"; declare global { interface String { @@ -57,6 +58,15 @@ declare global { * `strict` to true */ japaneseOnly(strict?: boolean): boolean + + /** @summary convert any half-width katakana to full-width */ + widenKatakana(): string; + + /** @summary convert any full-width katakana to hiragana */ + katakanaToHiragana(): string; + + /** @summary convert any kana (full and half-width) to full-width hiragana */ + normalizeKana(): string; } } @@ -85,7 +95,7 @@ function stringOnly(input: string, check: (key: string, val: number) => StringOn String.prototype.hiraganaOnly = function(strict = false) { return stringOnly(this as string, (key, val) => { - if (key == UnicodeRange.JapaneseHiragana) + if (key == UnicodeRange.JapaneseFWHiragana) return StringOnlyReturnValue.TallyAdd; // count hiragana characters else if (!strict && key.startsWith("any-")) return StringOnlyReturnValue.TallyIgnore; // allow any- (ascii whitespace and punctuation) @@ -97,7 +107,7 @@ String.prototype.hiraganaOnly = function(strict = false) { String.prototype.katakanaOnly = function(strict = false) { return stringOnly(this as string, (key, val) => { - if (key == UnicodeRange.JapaneseKatakana) + if ([UnicodeRange.JapaneseHWKatakana, UnicodeRange.JapaneseFWKatakana].includes(key as UnicodeRange)) return StringOnlyReturnValue.TallyAdd; // count katakana characters else if (!strict && key.startsWith("any-")) return StringOnlyReturnValue.TallyIgnore; // allow any- (ascii whitespace and punctuation) @@ -121,7 +131,7 @@ String.prototype.kanjiOnly = function(strict = false) { String.prototype.kanaOnly = function(strict = false) { return stringOnly(this as string, (key, val) => { - if (key == UnicodeRange.JapaneseHiragana || key == UnicodeRange.JapaneseKatakana) + if ([UnicodeRange.JapaneseHWKatakana, UnicodeRange.JapaneseFWKatakana, UnicodeRange.JapaneseFWHiragana].includes(key as UnicodeRange)) return StringOnlyReturnValue.TallyAdd; // count kana characters else if (!strict && key.startsWith("any-")) return StringOnlyReturnValue.TallyIgnore; // allow any- (ascii whitespace and punctuation) @@ -143,3 +153,120 @@ String.prototype.japaneseOnly = function(strict = false) { }); } +String.prototype.widenKatakana = function() { + const map: { [key: string]: string } = { + "ァ": "ァ", + "ア": "ア", + "ィ": "ィ", + "イ": "イ", + "ゥ": "ゥ", + "ウ": "ウ", + "ェ": "ェ", + "エ": "エ", + "ォ": "ォ", + "オ": "オ", + "ガ": "ガ", + "カ": "カ", + "ギ": "ギ", + "キ": "キ", + "グ": "グ", + "ク": "ク", + "ゲ": "ゲ", + "ケ": "ケ", + "ゴ": "ゴ", + "コ": "コ", + "ザ": "ザ", + "サ": "サ", + "ジ": "ジ", + "シ": "シ", + "ズ": "ズ", + "ス": "ス", + "ゼ": "ゼ", + "セ": "セ", + "ゾ": "ゾ", + "ソ": "ソ", + "ダ": "ダ", + "タ": "タ", + "ヂ": "ヂ", + "チ": "チ", + "ヅ": "ヅ", + "ッ": "ッ", + "ツ": "ツ", + "デ": "デ", + "テ": "テ", + "ド": "ド", + "ト": "ト", + "ナ": "ナ", + "ニ": "ニ", + "ヌ": "ヌ", + "ネ": "ネ", + "ノ": "ノ", + "バ": "バ", + "パ": "パ", + "ハ": "ハ", + "ビ": "ビ", + "ピ": "ピ", + "ヒ": "ヒ", + "ブ": "ブ", + "プ": "プ", + "フ": "フ", + "ベ": "ベ", + "ペ": "ペ", + "ヘ": "ヘ", + "ボ": "ボ", + "ポ": "ポ", + "ホ": "ホ", + "マ": "マ", + "ミ": "ミ", + "ム": "ム", + "メ": "メ", + "モ": "モ", + "ャ": "ャ", + "ヤ": "ヤ", + "ュ": "ュ", + "ユ": "ユ", + "ョ": "ョ", + "ヨ": "ヨ", + "ラ": "ラ", + "リ": "リ", + "ル": "ル", + "レ": "レ", + "ロ": "ロ", + "ワ": "ワ", + "ヲ": "ヲ", + "ン": "ン", + "ヴ": "ヴ", + "ヷ": "ヷ", + "イ゙": "イ゙", + "エ゙": "エ゙", + "ヺ": "ヺ", + "ー": "ー", + }; + + var out = ""; + outer: + for (let i = 0; i < this.length; i++) { + for (var key in map) { + if (!this.substring(i).startsWith(key)) continue; + out += map[key]; + i += key.length - 1; + continue outer; + } + + out += this[i]; + } + return out; +} + +String.prototype.katakanaToHiragana = function() { + return this.map(char => { + var code = char.codePointAt(0)!; + if (0x30a1 <= code && code <= 0x30f6) return (code + (0x3041 - 0x30a1)).toChar(); + return char; + }) +} + +String.prototype.normalizeKana = function() { + return this.widenKatakana().katakanaToHiragana(); +} + diff --git a/util/number.ts b/util/number.ts new file mode 100644 index 0000000..c28864f --- /dev/null +++ b/util/number.ts @@ -0,0 +1,11 @@ +declare global { + interface Number { + /** @summary convert number to character by charCode */ + toChar(): string; + } +} + +Number.prototype.toChar = function() { + return String.fromCharCode(this as number); +} + diff --git a/util/string.ts b/util/string.ts index 397dcd6..327b884 100644 --- a/util/string.ts +++ b/util/string.ts @@ -26,6 +26,12 @@ declare global { * @argument fallback return this value if parsing fails */ json(fallback?: any): any; + + /** + * @summary map each character of a string to another character using + * `mapFn` + */ + map(mapFn: (char: string) => string): string; } } @@ -34,11 +40,15 @@ export enum UnicodeRange { Whitespace = "any-whitespace", Punctuation = "any-punctuation", Unknown = "any-unknown", - JapanesePunctuation = "jp-punctuation", - JapaneseHiragana = "jp-hiragana", - JapaneseKatakana = "jp-katakana", - JapaneseFWLatinHWKatakana = "jp-full-width-latin-half-width-katakana", + JapaneseFWPunctuation = "jp-full-width-punctuation", + JapaneseHWPunctuation = "jp-half-width-punctuation", + JapaneseFWHiragana = "jp-full-width-hiragana", + JapaneseFWKatakana = "jp-full-width-katakana", + JapaneseFWLatin = "jp-full-width-latin", + JapaneseHWKatakana = "jp-half-width-katakana", JapaneseKanji = "jp-kanji", + JapaneseKanjiRadicals = "jp-kanji-radicals", + JapaneseAuxiliary = "jp-aux", } type RangeTally = Record<UnicodeRange, number>; @@ -52,12 +62,22 @@ String.prototype.range = function() { if (0x2e == code) return UnicodeRange.Punctuation; // full stop if (0x3f == code) return UnicodeRange.Punctuation; // question mark + // https://stackoverflow.com/a/53807563 if (0x0000 <= code && code <= 0x007f) return UnicodeRange.BasicLatin; - if (0x3000 <= code && code <= 0x303f) return UnicodeRange.JapanesePunctuation; - if (0x3040 <= code && code <= 0x309f) return UnicodeRange.JapaneseHiragana; - if (0x30a0 <= code && code <= 0x30ff) return UnicodeRange.JapaneseKatakana; - if (0xff00 <= code && code <= 0xffef) return UnicodeRange.JapaneseFWLatinHWKatakana; - if (0x4e00 <= code && code <= 0x9faf) return UnicodeRange.JapaneseKanji; + if (0x2e80 <= code && code <= 0x2fd5) return UnicodeRange.JapaneseKanjiRadicals; + if (0x3000 <= code && code <= 0x303f) return UnicodeRange.JapaneseFWPunctuation; + if (0xff5f <= code && code <= 0xff60) return UnicodeRange.JapaneseFWPunctuation; + if (0x3041 <= code && code <= 0x3096) return UnicodeRange.JapaneseFWHiragana; + if (0x30a1 <= code && code <= 0x30ff) return UnicodeRange.JapaneseFWKatakana; + if (0x3400 <= code && code <= 0x4db5) return UnicodeRange.JapaneseKanji; + if (0x4e00 <= code && code <= 0x9fcb) return UnicodeRange.JapaneseKanji; + if (0xf900 <= code && code <= 0xfa6a) return UnicodeRange.JapaneseKanji; + if (0xff61 <= code && code <= 0xff65) return UnicodeRange.JapaneseHWPunctuation; + if (0xff66 <= code && code <= 0xff9f) return UnicodeRange.JapaneseHWKatakana; + if (0x31f0 <= code && code <= 0x31ff) return UnicodeRange.JapaneseAuxiliary; + if (0x3220 <= code && code <= 0x3243) return UnicodeRange.JapaneseAuxiliary; + if (0x3280 <= code && code <= 0x337f) return UnicodeRange.JapaneseAuxiliary; + if (0xff01 <= code && code <= 0xff5e) return UnicodeRange.JapaneseFWLatin; return UnicodeRange.Unknown; } @@ -71,11 +91,11 @@ String.prototype.parseTags = function() { return parseTags(this as string); } -String.prototype.removeAll = function(searchValue: string | RegExp) { +String.prototype.removeAll = function(searchValue) { return this.replaceAll(searchValue, ""); } -String.prototype.json = function(fallback?: any) { +String.prototype.json = function(fallback) { if (fallback) { try { return JSON.parse(this as string); @@ -87,3 +107,9 @@ String.prototype.json = function(fallback?: any) { } } +String.prototype.map = function(mapFn) { + var out = ""; + for (var char of this) out += mapFn(char); + return out; +} + |