diff options
author | lonkaars <loek@pipeframe.xyz> | 2023-06-28 23:59:50 +0200 |
---|---|---|
committer | lonkaars <loek@pipeframe.xyz> | 2023-06-28 23:59:50 +0200 |
commit | 67dbb6421976254658c5e38045513129dd18187a (patch) | |
tree | 288b599d1097b26bdbcad3b6749b38e133017cf2 /util |
initial public commit
Diffstat (limited to 'util')
-rw-r--r-- | util/array.ts | 17 | ||||
-rw-r--r-- | util/error.ts | 7 | ||||
-rw-r--r-- | util/readme.md | 10 | ||||
-rw-r--r-- | util/set.ts | 17 | ||||
-rw-r--r-- | util/string.ts | 74 |
5 files changed, 125 insertions, 0 deletions
diff --git a/util/array.ts b/util/array.ts new file mode 100644 index 0000000..76e2a9e --- /dev/null +++ b/util/array.ts @@ -0,0 +1,17 @@ +declare global { + interface Array<T> { + anyOf(arr2: Array<T>): boolean; + peek(): T; + } +} + +/** @summary check if any of the elements of `arr2` are included in `this` */ +Array.prototype.anyOf = function(arr2) { + return !!this.filter(e => arr2.includes(e)).length; +}; + +/** @summary return last element of array without removing it */ +Array.prototype.peek = function() { + return this[this.length - 1]; +}; + diff --git a/util/error.ts b/util/error.ts new file mode 100644 index 0000000..1e9d6eb --- /dev/null +++ b/util/error.ts @@ -0,0 +1,7 @@ +export default class YomikunError extends Error { + constructor(message = "", ...args: any) { + super(message, ...args); + this.message = message; + this.name = "YomikunError"; + } +} diff --git a/util/readme.md b/util/readme.md new file mode 100644 index 0000000..e642629 --- /dev/null +++ b/util/readme.md @@ -0,0 +1,10 @@ +# Utilities + +This directory contains files that extend Javascript built-in types with handy +functions. To use these, simply import them: + +```typescript +import "../util/array.ts"; // array extensions example +["foo", "bar"].anyOf(["bar", "baz"]); // -> true +``` + diff --git a/util/set.ts b/util/set.ts new file mode 100644 index 0000000..9790682 --- /dev/null +++ b/util/set.ts @@ -0,0 +1,17 @@ +declare global { + interface Set<T> { + anyOf(arr2: Array<T>): boolean; + arr(): Array<T>; + } +} + +/** @summary return set items as array */ +Set.prototype.arr = function() { + return Array.from(this); +} + +/** @summary check if any of the elements of `arr2` are included in `this` */ +Set.prototype.anyOf = function(arr2) { + return !!this.arr().filter(e => arr2.includes(e)).length; +}; + diff --git a/util/string.ts b/util/string.ts new file mode 100644 index 0000000..d94f5a3 --- /dev/null +++ b/util/string.ts @@ -0,0 +1,74 @@ +import { TokenTags, TokenTag, Tag } from "../language/tags.ts"; +import JapaneseString from "../language/japanese.ts"; + +declare global { + /** @summary extended String prototype functions */ + interface String { + range(): UnicodeRange; + rangeTally(): RangeTally; + + jp(): JapaneseString; + + parseTags(): TokenTags; + } +} + +export enum UnicodeRange { + BasicLatin = "latin", + Whitespace = "any-whitespace", + Punctuation = "any-punctuation", + Unknown = "any-unknown", + JapanesePunctuation = "jp-punctuation", + JapaneseHiragana = "jp-hiragana", + JapaneseKatakana = "jp-katakana", + JapaneseFWLatinHWKatakana = "jp-full-width-latin-half-width-katakana", + JapaneseKanji = "jp-kanji", +} + +type RangeTally = Record<UnicodeRange, number>; + +/** @summary get UnicodeRange for character at index 0 */ +String.prototype.range = function() { + var code = this.charCodeAt(0); + + if (0x09 == code) return UnicodeRange.Whitespace; // tab + if (0x20 == code) return UnicodeRange.Whitespace; // space + if (0x21 == code) return UnicodeRange.Punctuation; // exclamation mark + if (0x2e == code) return UnicodeRange.Punctuation; // full stop + if (0x3f == code) return UnicodeRange.Punctuation; // question mark + + if (0x0000 <= code && code <= 0x007f) return UnicodeRange.BasicLatin; + if (0x3000 <= code && code <= 0x303f) return UnicodeRange.JapanesePunctuation; + if (0x3040 <= code && code <= 0x309f) return UnicodeRange.JapaneseHiragana; + if (0x30a0 <= code && code <= 0x30ff) return UnicodeRange.JapaneseKatakana; + if (0xff00 <= code && code <= 0xffef) return UnicodeRange.JapaneseFWLatinHWKatakana; + if (0x4e00 <= code && code <= 0x9faf) return UnicodeRange.JapaneseKanji; + return UnicodeRange.Unknown; +} + +/** @summary create a RangeTally object for counting used unicode ranges in string */ +String.prototype.rangeTally = function() { + var tally = Object.keys(UnicodeRange).reduce((a: any,c) => (a[c] = 0, a), {}) as RangeTally; + for (var char of this) tally[char.range()]++; + return tally; +}; + +/** @summary get JapaneseString from this string */ +String.prototype.jp = function() { + return new JapaneseString(this); +} + +/** @summary parse concatenated tag string to TokenTags */ +String.prototype.parseTags = function() { + var tags = this.replaceAll(/ +/g, " ").trim().split(" ") as TokenTag[]; + var filteredTags: TokenTag[] = []; + for (var tag of tags) { + // skip past tense tags after -te and -tari deinflection + if (tag == Tag.Inflection.Tense.Past && + filteredTags.anyOf([Tag.Inflection.Suffix.Te, Tag.Inflection.Suffix.Tari])) continue; + + filteredTags.push(tag); + } + return new Set(filteredTags) as TokenTags; +} + |