aboutsummaryrefslogtreecommitdiff
path: root/util
diff options
context:
space:
mode:
authorlonkaars <loek@pipeframe.xyz>2023-06-28 23:59:50 +0200
committerlonkaars <loek@pipeframe.xyz>2023-06-28 23:59:50 +0200
commit67dbb6421976254658c5e38045513129dd18187a (patch)
tree288b599d1097b26bdbcad3b6749b38e133017cf2 /util
initial public commit
Diffstat (limited to 'util')
-rw-r--r--util/array.ts17
-rw-r--r--util/error.ts7
-rw-r--r--util/readme.md10
-rw-r--r--util/set.ts17
-rw-r--r--util/string.ts74
5 files changed, 125 insertions, 0 deletions
diff --git a/util/array.ts b/util/array.ts
new file mode 100644
index 0000000..76e2a9e
--- /dev/null
+++ b/util/array.ts
@@ -0,0 +1,17 @@
+declare global {
+ interface Array<T> {
+ anyOf(arr2: Array<T>): boolean;
+ peek(): T;
+ }
+}
+
+/** @summary check if any of the elements of `arr2` are included in `this` */
+Array.prototype.anyOf = function(arr2) {
+ return !!this.filter(e => arr2.includes(e)).length;
+};
+
+/** @summary return last element of array without removing it */
+Array.prototype.peek = function() {
+ return this[this.length - 1];
+};
+
diff --git a/util/error.ts b/util/error.ts
new file mode 100644
index 0000000..1e9d6eb
--- /dev/null
+++ b/util/error.ts
@@ -0,0 +1,7 @@
+export default class YomikunError extends Error {
+ constructor(message = "", ...args: any) {
+ super(message, ...args);
+ this.message = message;
+ this.name = "YomikunError";
+ }
+}
diff --git a/util/readme.md b/util/readme.md
new file mode 100644
index 0000000..e642629
--- /dev/null
+++ b/util/readme.md
@@ -0,0 +1,10 @@
+# Utilities
+
+This directory contains files that extend Javascript built-in types with handy
+functions. To use these, simply import them:
+
+```typescript
+import "../util/array.ts"; // array extensions example
+["foo", "bar"].anyOf(["bar", "baz"]); // -> true
+```
+
diff --git a/util/set.ts b/util/set.ts
new file mode 100644
index 0000000..9790682
--- /dev/null
+++ b/util/set.ts
@@ -0,0 +1,17 @@
+declare global {
+ interface Set<T> {
+ anyOf(arr2: Array<T>): boolean;
+ arr(): Array<T>;
+ }
+}
+
+/** @summary return set items as array */
+Set.prototype.arr = function() {
+ return Array.from(this);
+}
+
+/** @summary check if any of the elements of `arr2` are included in `this` */
+Set.prototype.anyOf = function(arr2) {
+ return !!this.arr().filter(e => arr2.includes(e)).length;
+};
+
diff --git a/util/string.ts b/util/string.ts
new file mode 100644
index 0000000..d94f5a3
--- /dev/null
+++ b/util/string.ts
@@ -0,0 +1,74 @@
+import { TokenTags, TokenTag, Tag } from "../language/tags.ts";
+import JapaneseString from "../language/japanese.ts";
+
+declare global {
+ /** @summary extended String prototype functions */
+ interface String {
+ range(): UnicodeRange;
+ rangeTally(): RangeTally;
+
+ jp(): JapaneseString;
+
+ parseTags(): TokenTags;
+ }
+}
+
+export enum UnicodeRange {
+ BasicLatin = "latin",
+ Whitespace = "any-whitespace",
+ Punctuation = "any-punctuation",
+ Unknown = "any-unknown",
+ JapanesePunctuation = "jp-punctuation",
+ JapaneseHiragana = "jp-hiragana",
+ JapaneseKatakana = "jp-katakana",
+ JapaneseFWLatinHWKatakana = "jp-full-width-latin-half-width-katakana",
+ JapaneseKanji = "jp-kanji",
+}
+
+type RangeTally = Record<UnicodeRange, number>;
+
+/** @summary get UnicodeRange for character at index 0 */
+String.prototype.range = function() {
+ var code = this.charCodeAt(0);
+
+ if (0x09 == code) return UnicodeRange.Whitespace; // tab
+ if (0x20 == code) return UnicodeRange.Whitespace; // space
+ if (0x21 == code) return UnicodeRange.Punctuation; // exclamation mark
+ if (0x2e == code) return UnicodeRange.Punctuation; // full stop
+ if (0x3f == code) return UnicodeRange.Punctuation; // question mark
+
+ if (0x0000 <= code && code <= 0x007f) return UnicodeRange.BasicLatin;
+ if (0x3000 <= code && code <= 0x303f) return UnicodeRange.JapanesePunctuation;
+ if (0x3040 <= code && code <= 0x309f) return UnicodeRange.JapaneseHiragana;
+ if (0x30a0 <= code && code <= 0x30ff) return UnicodeRange.JapaneseKatakana;
+ if (0xff00 <= code && code <= 0xffef) return UnicodeRange.JapaneseFWLatinHWKatakana;
+ if (0x4e00 <= code && code <= 0x9faf) return UnicodeRange.JapaneseKanji;
+ return UnicodeRange.Unknown;
+}
+
+/** @summary create a RangeTally object for counting used unicode ranges in string */
+String.prototype.rangeTally = function() {
+ var tally = Object.keys(UnicodeRange).reduce((a: any,c) => (a[c] = 0, a), {}) as RangeTally;
+ for (var char of this) tally[char.range()]++;
+ return tally;
+};
+
+/** @summary get JapaneseString from this string */
+String.prototype.jp = function() {
+ return new JapaneseString(this);
+}
+
+/** @summary parse concatenated tag string to TokenTags */
+String.prototype.parseTags = function() {
+ var tags = this.replaceAll(/ +/g, " ").trim().split(" ") as TokenTag[];
+ var filteredTags: TokenTag[] = [];
+ for (var tag of tags) {
+ // skip past tense tags after -te and -tari deinflection
+ if (tag == Tag.Inflection.Tense.Past &&
+ filteredTags.anyOf([Tag.Inflection.Suffix.Te, Tag.Inflection.Suffix.Tari])) continue;
+
+ filteredTags.push(tag);
+ }
+ return new Set(filteredTags) as TokenTags;
+}
+