aboutsummaryrefslogtreecommitdiff
path: root/util
diff options
context:
space:
mode:
Diffstat (limited to 'util')
-rw-r--r--util/array.ts14
-rw-r--r--util/japanese.ts133
-rw-r--r--util/number.ts11
-rw-r--r--util/string.ts48
4 files changed, 192 insertions, 14 deletions
diff --git a/util/array.ts b/util/array.ts
index f032935..c5a26c6 100644
--- a/util/array.ts
+++ b/util/array.ts
@@ -1,3 +1,5 @@
+import "./set.ts";
+
declare global {
interface Array<T> {
/** @summary check if any of the elements of `arr2` are included in `this` */
@@ -8,6 +10,10 @@ declare global {
set(): Set<T>;
/** @summary clear array */
clear(): void;
+ /** @summary filter duplicates from array */
+ filterDuplicates(): Array<T>;
+ /** @summary `true` if the array doesn't contain duplicate items */
+ isUniq(): boolean;
}
}
@@ -27,3 +33,11 @@ Array.prototype.clear = function() {
while (this.length > 0) this.pop();
}
+Array.prototype.filterDuplicates = function() {
+ return this.set().arr(); // TODO: optimize this
+}
+
+Array.prototype.isUniq = function() {
+ return this.length == this.filterDuplicates().length;
+}
+
diff --git a/util/japanese.ts b/util/japanese.ts
index 2017280..d398b60 100644
--- a/util/japanese.ts
+++ b/util/japanese.ts
@@ -1,4 +1,5 @@
import { UnicodeRange } from "./string.ts";
+import "./number.ts";
declare global {
interface String {
@@ -57,6 +58,15 @@ declare global {
* `strict` to true
*/
japaneseOnly(strict?: boolean): boolean
+
+ /** @summary convert any half-width katakana to full-width */
+ widenKatakana(): string;
+
+ /** @summary convert any full-width katakana to hiragana */
+ katakanaToHiragana(): string;
+
+ /** @summary convert any kana (full and half-width) to full-width hiragana */
+ normalizeKana(): string;
}
}
@@ -85,7 +95,7 @@ function stringOnly(input: string, check: (key: string, val: number) => StringOn
String.prototype.hiraganaOnly = function(strict = false) {
return stringOnly(this as string, (key, val) => {
- if (key == UnicodeRange.JapaneseHiragana)
+ if (key == UnicodeRange.JapaneseFWHiragana)
return StringOnlyReturnValue.TallyAdd; // count hiragana characters
else if (!strict && key.startsWith("any-"))
return StringOnlyReturnValue.TallyIgnore; // allow any- (ascii whitespace and punctuation)
@@ -97,7 +107,7 @@ String.prototype.hiraganaOnly = function(strict = false) {
String.prototype.katakanaOnly = function(strict = false) {
return stringOnly(this as string, (key, val) => {
- if (key == UnicodeRange.JapaneseKatakana)
+ if ([UnicodeRange.JapaneseHWKatakana, UnicodeRange.JapaneseFWKatakana].includes(key as UnicodeRange))
return StringOnlyReturnValue.TallyAdd; // count katakana characters
else if (!strict && key.startsWith("any-"))
return StringOnlyReturnValue.TallyIgnore; // allow any- (ascii whitespace and punctuation)
@@ -121,7 +131,7 @@ String.prototype.kanjiOnly = function(strict = false) {
String.prototype.kanaOnly = function(strict = false) {
return stringOnly(this as string, (key, val) => {
- if (key == UnicodeRange.JapaneseHiragana || key == UnicodeRange.JapaneseKatakana)
+ if ([UnicodeRange.JapaneseHWKatakana, UnicodeRange.JapaneseFWKatakana, UnicodeRange.JapaneseFWHiragana].includes(key as UnicodeRange))
return StringOnlyReturnValue.TallyAdd; // count kana characters
else if (!strict && key.startsWith("any-"))
return StringOnlyReturnValue.TallyIgnore; // allow any- (ascii whitespace and punctuation)
@@ -143,3 +153,120 @@ String.prototype.japaneseOnly = function(strict = false) {
});
}
+String.prototype.widenKatakana = function() {
+ const map: { [key: string]: string } = {
+ "ァ": "ァ",
+ "ア": "ア",
+ "ィ": "ィ",
+ "イ": "イ",
+ "ゥ": "ゥ",
+ "ウ": "ウ",
+ "ェ": "ェ",
+ "エ": "エ",
+ "ォ": "ォ",
+ "オ": "オ",
+ "ガ": "ガ",
+ "カ": "カ",
+ "ギ": "ギ",
+ "キ": "キ",
+ "グ": "グ",
+ "ク": "ク",
+ "ゲ": "ゲ",
+ "ケ": "ケ",
+ "ゴ": "ゴ",
+ "コ": "コ",
+ "ザ": "ザ",
+ "サ": "サ",
+ "ジ": "ジ",
+ "シ": "シ",
+ "ズ": "ズ",
+ "ス": "ス",
+ "ゼ": "ゼ",
+ "セ": "セ",
+ "ゾ": "ゾ",
+ "ソ": "ソ",
+ "ダ": "ダ",
+ "タ": "タ",
+ "ヂ": "ヂ",
+ "チ": "チ",
+ "ヅ": "ヅ",
+ "ッ": "ッ",
+ "ツ": "ツ",
+ "デ": "デ",
+ "テ": "テ",
+ "ド": "ド",
+ "ト": "ト",
+ "ナ": "ナ",
+ "ニ": "ニ",
+ "ヌ": "ヌ",
+ "ネ": "ネ",
+ "ノ": "ノ",
+ "バ": "バ",
+ "パ": "パ",
+ "ハ": "ハ",
+ "ビ": "ビ",
+ "ピ": "ピ",
+ "ヒ": "ヒ",
+ "ブ": "ブ",
+ "プ": "プ",
+ "フ": "フ",
+ "ベ": "ベ",
+ "ペ": "ペ",
+ "ヘ": "ヘ",
+ "ボ": "ボ",
+ "ポ": "ポ",
+ "ホ": "ホ",
+ "マ": "マ",
+ "ミ": "ミ",
+ "ム": "ム",
+ "メ": "メ",
+ "モ": "モ",
+ "ャ": "ャ",
+ "ヤ": "ヤ",
+ "ュ": "ュ",
+ "ユ": "ユ",
+ "ョ": "ョ",
+ "ヨ": "ヨ",
+ "ラ": "ラ",
+ "リ": "リ",
+ "ル": "ル",
+ "レ": "レ",
+ "ロ": "ロ",
+ "ワ": "ワ",
+ "ヲ": "ヲ",
+ "ン": "ン",
+ "ヴ": "ヴ",
+ "ヷ": "ヷ",
+ "イ゙": "イ゙",
+ "エ゙": "エ゙",
+ "ヺ": "ヺ",
+ "ー": "ー",
+ };
+
+ var out = "";
+ outer:
+ for (let i = 0; i < this.length; i++) {
+ for (var key in map) {
+ if (!this.substring(i).startsWith(key)) continue;
+ out += map[key];
+ i += key.length - 1;
+ continue outer;
+ }
+
+ out += this[i];
+ }
+ return out;
+}
+
+String.prototype.katakanaToHiragana = function() {
+ return this.map(char => {
+ var code = char.codePointAt(0)!;
+ if (0x30a1 <= code && code <= 0x30f6) return (code + (0x3041 - 0x30a1)).toChar();
+ return char;
+ })
+}
+
+String.prototype.normalizeKana = function() {
+ return this.widenKatakana().katakanaToHiragana();
+}
+
diff --git a/util/number.ts b/util/number.ts
new file mode 100644
index 0000000..c28864f
--- /dev/null
+++ b/util/number.ts
@@ -0,0 +1,11 @@
+declare global {
+ interface Number {
+ /** @summary convert number to character by charCode */
+ toChar(): string;
+ }
+}
+
+Number.prototype.toChar = function() {
+ return String.fromCharCode(this as number);
+}
+
diff --git a/util/string.ts b/util/string.ts
index 397dcd6..327b884 100644
--- a/util/string.ts
+++ b/util/string.ts
@@ -26,6 +26,12 @@ declare global {
* @argument fallback return this value if parsing fails
*/
json(fallback?: any): any;
+
+ /**
+ * @summary map each character of a string to another character using
+ * `mapFn`
+ */
+ map(mapFn: (char: string) => string): string;
}
}
@@ -34,11 +40,15 @@ export enum UnicodeRange {
Whitespace = "any-whitespace",
Punctuation = "any-punctuation",
Unknown = "any-unknown",
- JapanesePunctuation = "jp-punctuation",
- JapaneseHiragana = "jp-hiragana",
- JapaneseKatakana = "jp-katakana",
- JapaneseFWLatinHWKatakana = "jp-full-width-latin-half-width-katakana",
+ JapaneseFWPunctuation = "jp-full-width-punctuation",
+ JapaneseHWPunctuation = "jp-half-width-punctuation",
+ JapaneseFWHiragana = "jp-full-width-hiragana",
+ JapaneseFWKatakana = "jp-full-width-katakana",
+ JapaneseFWLatin = "jp-full-width-latin",
+ JapaneseHWKatakana = "jp-half-width-katakana",
JapaneseKanji = "jp-kanji",
+ JapaneseKanjiRadicals = "jp-kanji-radicals",
+ JapaneseAuxiliary = "jp-aux",
}
type RangeTally = Record<UnicodeRange, number>;
@@ -52,12 +62,22 @@ String.prototype.range = function() {
if (0x2e == code) return UnicodeRange.Punctuation; // full stop
if (0x3f == code) return UnicodeRange.Punctuation; // question mark
+ // https://stackoverflow.com/a/53807563
if (0x0000 <= code && code <= 0x007f) return UnicodeRange.BasicLatin;
- if (0x3000 <= code && code <= 0x303f) return UnicodeRange.JapanesePunctuation;
- if (0x3040 <= code && code <= 0x309f) return UnicodeRange.JapaneseHiragana;
- if (0x30a0 <= code && code <= 0x30ff) return UnicodeRange.JapaneseKatakana;
- if (0xff00 <= code && code <= 0xffef) return UnicodeRange.JapaneseFWLatinHWKatakana;
- if (0x4e00 <= code && code <= 0x9faf) return UnicodeRange.JapaneseKanji;
+ if (0x2e80 <= code && code <= 0x2fd5) return UnicodeRange.JapaneseKanjiRadicals;
+ if (0x3000 <= code && code <= 0x303f) return UnicodeRange.JapaneseFWPunctuation;
+ if (0xff5f <= code && code <= 0xff60) return UnicodeRange.JapaneseFWPunctuation;
+ if (0x3041 <= code && code <= 0x3096) return UnicodeRange.JapaneseFWHiragana;
+ if (0x30a1 <= code && code <= 0x30ff) return UnicodeRange.JapaneseFWKatakana;
+ if (0x3400 <= code && code <= 0x4db5) return UnicodeRange.JapaneseKanji;
+ if (0x4e00 <= code && code <= 0x9fcb) return UnicodeRange.JapaneseKanji;
+ if (0xf900 <= code && code <= 0xfa6a) return UnicodeRange.JapaneseKanji;
+ if (0xff61 <= code && code <= 0xff65) return UnicodeRange.JapaneseHWPunctuation;
+ if (0xff66 <= code && code <= 0xff9f) return UnicodeRange.JapaneseHWKatakana;
+ if (0x31f0 <= code && code <= 0x31ff) return UnicodeRange.JapaneseAuxiliary;
+ if (0x3220 <= code && code <= 0x3243) return UnicodeRange.JapaneseAuxiliary;
+ if (0x3280 <= code && code <= 0x337f) return UnicodeRange.JapaneseAuxiliary;
+ if (0xff01 <= code && code <= 0xff5e) return UnicodeRange.JapaneseFWLatin;
return UnicodeRange.Unknown;
}
@@ -71,11 +91,11 @@ String.prototype.parseTags = function() {
return parseTags(this as string);
}
-String.prototype.removeAll = function(searchValue: string | RegExp) {
+String.prototype.removeAll = function(searchValue) {
return this.replaceAll(searchValue, "");
}
-String.prototype.json = function(fallback?: any) {
+String.prototype.json = function(fallback) {
if (fallback) {
try {
return JSON.parse(this as string);
@@ -87,3 +107,9 @@ String.prototype.json = function(fallback?: any) {
}
}
+String.prototype.map = function(mapFn) {
+ var out = "";
+ for (var char of this) out += mapFn(char);
+ return out;
+}
+