1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
|
import { TokenTags, parseTags } from "../language/tags.ts";
declare global {
interface String {
/** @summary get UnicodeRange for character at index 0 */
range(): UnicodeRange;
/** @summary create a RangeTally object for counting used unicode ranges in string */
rangeTally(): RangeTally;
/** @summary parse concatenated tag string to TokenTags */
parseTags(): TokenTags;
/**
* @summary Remove all instances of a substring in a string, using a regular expression or search string
* @param searchValue A string to search for
*/
removeAll(searchValue: string | RegExp): string;
/**
* @summary parse string as JSON, with optional fallback value
*
* fallback is undefined by default. if fallback is specified, it will be
* returned if JSON.parse throws any error. if fallback is not specified,
* no errors will be caught.
*
* @argument fallback return this value if parsing fails
*/
json(fallback?: any): any;
}
}
export enum UnicodeRange {
BasicLatin = "latin",
Whitespace = "any-whitespace",
Punctuation = "any-punctuation",
Unknown = "any-unknown",
JapanesePunctuation = "jp-punctuation",
JapaneseHiragana = "jp-hiragana",
JapaneseKatakana = "jp-katakana",
JapaneseFWLatinHWKatakana = "jp-full-width-latin-half-width-katakana",
JapaneseKanji = "jp-kanji",
}
type RangeTally = Record<UnicodeRange, number>;
String.prototype.range = function() {
var code = this.charCodeAt(0);
if (0x09 == code) return UnicodeRange.Whitespace; // tab
if (0x20 == code) return UnicodeRange.Whitespace; // space
if (0x21 == code) return UnicodeRange.Punctuation; // exclamation mark
if (0x2e == code) return UnicodeRange.Punctuation; // full stop
if (0x3f == code) return UnicodeRange.Punctuation; // question mark
if (0x0000 <= code && code <= 0x007f) return UnicodeRange.BasicLatin;
if (0x3000 <= code && code <= 0x303f) return UnicodeRange.JapanesePunctuation;
if (0x3040 <= code && code <= 0x309f) return UnicodeRange.JapaneseHiragana;
if (0x30a0 <= code && code <= 0x30ff) return UnicodeRange.JapaneseKatakana;
if (0xff00 <= code && code <= 0xffef) return UnicodeRange.JapaneseFWLatinHWKatakana;
if (0x4e00 <= code && code <= 0x9faf) return UnicodeRange.JapaneseKanji;
return UnicodeRange.Unknown;
}
String.prototype.rangeTally = function() {
var tally = Object.keys(UnicodeRange).reduce((a: any,c) => (a[c] = 0, a), {}) as RangeTally;
for (var char of this) tally[char.range()]++;
return tally;
};
String.prototype.parseTags = function() {
return parseTags(this as string);
}
String.prototype.removeAll = function(searchValue: string | RegExp) {
return this.replaceAll(searchValue, "");
}
String.prototype.json = function(fallback?: any) {
if (fallback) {
try {
return JSON.parse(this as string);
} catch {
return fallback;
}
} else {
return JSON.parse(this as string);
}
}
|