diff options
author | lonkaars <loek@pipeframe.xyz> | 2023-06-30 23:23:47 +0200 |
---|---|---|
committer | lonkaars <loek@pipeframe.xyz> | 2023-06-30 23:23:47 +0200 |
commit | 8ff39cbe6300ca479584fe7d85ff03a1f65bc9b0 (patch) | |
tree | c354294dbbeb6c86d5909955762326272664f07b /util | |
parent | 722127ef4059020876f708b1d5406c04fd07b0da (diff) |
WIP move more stuff around + more broken examples
Diffstat (limited to 'util')
-rw-r--r-- | util/japanese.ts | 145 | ||||
-rw-r--r-- | util/string.ts | 9 |
2 files changed, 145 insertions, 9 deletions
diff --git a/util/japanese.ts b/util/japanese.ts new file mode 100644 index 0000000..2017280 --- /dev/null +++ b/util/japanese.ts @@ -0,0 +1,145 @@ +import { UnicodeRange } from "./string.ts"; + +declare global { + interface String { + /** + * @summary check if string is hiragana only + * + * @argument strict don't allow ascii whitespace and punctuation (default: false) + * + * return `true` if at least one hiragana character is in string, and no other + * unicode ranges are found. ascii whitespace and punctuation is still allowed, + * but not counted as hiragana. this behavior can be turned off by setting + * `strict` to true + */ + hiraganaOnly(strict?: boolean): boolean + /** + * @summary check if string is katakana only + * + * @argument strict don't allow ascii whitespace and punctuation (default: false) + * + * return `true` if at least one katakana character is in string, and no other + * unicode ranges are found. ascii whitespace and punctuation is still allowed, + * but not counted as katakana. this behavior can be turned off by setting + * `strict` to true + */ + katakanaOnly(strict?: boolean): boolean + /** + * @summary check if string is kanji only + * + * @argument strict don't allow ascii whitespace and punctuation (default: false) + * + * return `true` if at least one kanji character is in string, and no other + * unicode ranges are found. ascii whitespace and punctuation is still allowed, + * but not counted as kanji. this behavior can be turned off by setting + * `strict` to true + */ + kanjiOnly(strict?: boolean): boolean + /** + * @summary check if string is kana only + * + * @argument strict don't allow ascii whitespace and punctuation (default: false) + * + * return `true` if at least one kana character is in string, and no other + * unicode ranges are found. ascii whitespace and punctuation is still allowed, + * but not counted as kana. this behavior can be turned off by setting `strict` + * to true + */ + kanaOnly(strict?: boolean): boolean + /** + * @summary check if string is japanese only + * + * @argument strict don't allow ascii whitespace and punctuation (default: false) + * + * return `true` if at least one japanese character is in string, and no other + * unicode ranges are found. ascii whitespace and punctuation is still allowed, + * but not counted as japanese. this behavior can be turned off by setting + * `strict` to true + */ + japaneseOnly(strict?: boolean): boolean + } +} + +enum StringOnlyReturnValue { + TallyAdd, + TallyIgnore, + TallyStop, +} + +/** @summary check tally for allowed scripts (internal use only) */ +function stringOnly(input: string, check: (key: string, val: number) => StringOnlyReturnValue): boolean { + var tally = input.rangeTally(); + var ok = false; + for (var [key, val] of Object.entries(tally)) { + switch(check(key, val)) { + case StringOnlyReturnValue.TallyAdd: { + ok = true; + break; + } + case StringOnlyReturnValue.TallyIgnore: { break; } + case StringOnlyReturnValue.TallyStop: { return false; } + } + } + return ok; +} + +String.prototype.hiraganaOnly = function(strict = false) { + return stringOnly(this as string, (key, val) => { + if (key == UnicodeRange.JapaneseHiragana) + return StringOnlyReturnValue.TallyAdd; // count hiragana characters + else if (!strict && key.startsWith("any-")) + return StringOnlyReturnValue.TallyIgnore; // allow any- (ascii whitespace and punctuation) + else if (val > 0) + return StringOnlyReturnValue.TallyStop; // don't allow any other ranges + return StringOnlyReturnValue.TallyIgnore; + }); +} + +String.prototype.katakanaOnly = function(strict = false) { + return stringOnly(this as string, (key, val) => { + if (key == UnicodeRange.JapaneseKatakana) + return StringOnlyReturnValue.TallyAdd; // count katakana characters + else if (!strict && key.startsWith("any-")) + return StringOnlyReturnValue.TallyIgnore; // allow any- (ascii whitespace and punctuation) + else if (val > 0) + return StringOnlyReturnValue.TallyStop; // don't allow any other ranges + return StringOnlyReturnValue.TallyIgnore; + }); +} + +String.prototype.kanjiOnly = function(strict = false) { + return stringOnly(this as string, (key, val) => { + if (key == UnicodeRange.JapaneseKanji) + return StringOnlyReturnValue.TallyAdd; // count kanji characters + else if (!strict && key.startsWith("any-")) + return StringOnlyReturnValue.TallyIgnore; // allow any- (ascii whitespace and punctuation) + else if (val > 0) + return StringOnlyReturnValue.TallyStop; // don't allow any other ranges + return StringOnlyReturnValue.TallyIgnore; + }); +} + +String.prototype.kanaOnly = function(strict = false) { + return stringOnly(this as string, (key, val) => { + if (key == UnicodeRange.JapaneseHiragana || key == UnicodeRange.JapaneseKatakana) + return StringOnlyReturnValue.TallyAdd; // count kana characters + else if (!strict && key.startsWith("any-")) + return StringOnlyReturnValue.TallyIgnore; // allow any- (ascii whitespace and punctuation) + else if (val > 0) + return StringOnlyReturnValue.TallyStop; // don't allow any other ranges + return StringOnlyReturnValue.TallyIgnore; + }); +} + +String.prototype.japaneseOnly = function(strict = false) { + return stringOnly(this as string, (key, val) => { + if (key.startsWith("jp-")) + return StringOnlyReturnValue.TallyAdd; // count japanese characters + else if (!strict && key.startsWith("any-")) + return StringOnlyReturnValue.TallyIgnore; // allow any- (ascii whitespace and punctuation) + else if (val > 0) + return StringOnlyReturnValue.TallyStop; // don't allow any other ranges + return StringOnlyReturnValue.TallyIgnore; + }); +} + diff --git a/util/string.ts b/util/string.ts index 16d8f0a..397dcd6 100644 --- a/util/string.ts +++ b/util/string.ts @@ -1,17 +1,12 @@ import { TokenTags, parseTags } from "../language/tags.ts"; -import JapaneseString from "../language/japanese.ts"; declare global { - /** @summary extended String prototype functions */ interface String { /** @summary get UnicodeRange for character at index 0 */ range(): UnicodeRange; /** @summary create a RangeTally object for counting used unicode ranges in string */ rangeTally(): RangeTally; - /** @summary get JapaneseString from this string */ - jp(): JapaneseString; - /** @summary parse concatenated tag string to TokenTags */ parseTags(): TokenTags; @@ -72,10 +67,6 @@ String.prototype.rangeTally = function() { return tally; }; -String.prototype.jp = function() { - return new JapaneseString(this); -} - String.prototype.parseTags = function() { return parseTags(this as string); } |