import { TokenTags, parseTags } from "../language/tags.ts"; import JapaneseString from "../language/japanese.ts"; declare global { /** @summary extended String prototype functions */ interface String { /** @summary get UnicodeRange for character at index 0 */ range(): UnicodeRange; /** @summary create a RangeTally object for counting used unicode ranges in string */ rangeTally(): RangeTally; /** @summary get JapaneseString from this string */ jp(): JapaneseString; /** @summary parse concatenated tag string to TokenTags */ parseTags(): TokenTags; /** * @summary Remove all instances of a substring in a string, using a regular expression or search string * @param searchValue A string to search for */ removeAll(searchValue: string | RegExp): string; /** * @summary parse string as JSON, with optional fallback value * * fallback is undefined by default. if fallback is specified, it will be * returned if JSON.parse throws any error. if fallback is not specified, * no errors will be caught. * * @argument fallback return this value if parsing fails */ json(fallback?: any): any; } } export enum UnicodeRange { BasicLatin = "latin", Whitespace = "any-whitespace", Punctuation = "any-punctuation", Unknown = "any-unknown", JapanesePunctuation = "jp-punctuation", JapaneseHiragana = "jp-hiragana", JapaneseKatakana = "jp-katakana", JapaneseFWLatinHWKatakana = "jp-full-width-latin-half-width-katakana", JapaneseKanji = "jp-kanji", } type RangeTally = Record; String.prototype.range = function() { var code = this.charCodeAt(0); if (0x09 == code) return UnicodeRange.Whitespace; // tab if (0x20 == code) return UnicodeRange.Whitespace; // space if (0x21 == code) return UnicodeRange.Punctuation; // exclamation mark if (0x2e == code) return UnicodeRange.Punctuation; // full stop if (0x3f == code) return UnicodeRange.Punctuation; // question mark if (0x0000 <= code && code <= 0x007f) return UnicodeRange.BasicLatin; if (0x3000 <= code && code <= 0x303f) return UnicodeRange.JapanesePunctuation; if (0x3040 <= code && code <= 0x309f) return UnicodeRange.JapaneseHiragana; if (0x30a0 <= code && code <= 0x30ff) return UnicodeRange.JapaneseKatakana; if (0xff00 <= code && code <= 0xffef) return UnicodeRange.JapaneseFWLatinHWKatakana; if (0x4e00 <= code && code <= 0x9faf) return UnicodeRange.JapaneseKanji; return UnicodeRange.Unknown; } String.prototype.rangeTally = function() { var tally = Object.keys(UnicodeRange).reduce((a: any,c) => (a[c] = 0, a), {}) as RangeTally; for (var char of this) tally[char.range()]++; return tally; }; String.prototype.jp = function() { return new JapaneseString(this); } String.prototype.parseTags = function() { return parseTags(this as string); } String.prototype.removeAll = function(searchValue: string | RegExp) { return this.replaceAll(searchValue, ""); } String.prototype.json = function(fallback?: any) { if (fallback) { try { return JSON.parse(this as string); } catch { return fallback; } } else { return JSON.parse(this as string); } }