1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
|
import { TokenTags, parseTags } from "../search/tags.ts";
import { Wrapper } from "./wrap.ts";
declare global {
interface String {
/** @summary get UnicodeRange for character at index 0 */
range(): UnicodeRange;
/** @summary create a RangeTally object for counting used unicode ranges in string */
rangeTally(): RangeTally;
/** @summary parse concatenated tag string to TokenTags */
parseTags(): TokenTags;
/**
* @summary Remove all instances of a substring in a string, using a regular expression or search string
* @param searchValue A string to search for
*/
removeAll(searchValue: string | RegExp): string;
/**
* @summary parse string as JSON, with optional fallback value
*
* fallback is undefined by default. if fallback is specified, it will be
* returned if JSON.parse throws any error. if fallback is not specified,
* no errors will be caught.
*
* @argument fallback return this value if parsing fails
*/
json(fallback?: any): any;
/**
* @summary map each character of a string to another character using
* `mapFn`
*/
map(mapFn: (char: string) => string): string;
/**
* @summary return length of the match of searchValue from startIndex (default: 0)
*
* Similar to String.prototype.startsWith, but returns the length of the
* match instead of a boolean true or false.
*
* @param searchString string to search for
* @param position index to search from (0 by default = start of string)
*/
cmpLen(searchString: string, position?: number): number;
/** @summary remove `length` characters from end of string */
removeEnd(length: number): string;
/** @summary replace last instance of `searchString` with `replaceValue` */
replaceLast(searchString: string, replaceValue: string): string;
/** @summary wrap string using Wrapper */
wrap(wrapper: Wrapper): string;
}
}
export enum UnicodeRange {
BasicLatin = "latin",
Whitespace = "any-whitespace",
Punctuation = "any-punctuation",
Unknown = "any-unknown",
JapaneseFWPunctuation = "jp-full-width-punctuation",
JapaneseHWPunctuation = "jp-half-width-punctuation",
JapaneseFWHiragana = "jp-full-width-hiragana",
JapaneseFWKatakana = "jp-full-width-katakana",
JapaneseFWLatin = "jp-full-width-latin",
JapaneseHWKatakana = "jp-half-width-katakana",
JapaneseKanji = "jp-kanji",
JapaneseKanjiRadicals = "jp-kanji-radicals",
JapaneseAuxiliary = "jp-aux",
}
type RangeTally = Record<UnicodeRange, number>;
String.prototype.range = function() {
var code = this.charCodeAt(0);
if (0x09 == code) return UnicodeRange.Whitespace; // tab
if (0x20 == code) return UnicodeRange.Whitespace; // space
if (0x21 == code) return UnicodeRange.Punctuation; // exclamation mark
if (0x2e == code) return UnicodeRange.Punctuation; // full stop
if (0x3f == code) return UnicodeRange.Punctuation; // question mark
// https://stackoverflow.com/a/53807563
if (0x0000 <= code && code <= 0x007f) return UnicodeRange.BasicLatin;
if (0x2e80 <= code && code <= 0x2fd5) return UnicodeRange.JapaneseKanjiRadicals;
if (0x3000 <= code && code <= 0x303f) return UnicodeRange.JapaneseFWPunctuation;
if (0xff5f <= code && code <= 0xff60) return UnicodeRange.JapaneseFWPunctuation;
if (0x3041 <= code && code <= 0x3096) return UnicodeRange.JapaneseFWHiragana;
if (0x30a1 <= code && code <= 0x30ff) return UnicodeRange.JapaneseFWKatakana;
if (0x3400 <= code && code <= 0x4db5) return UnicodeRange.JapaneseKanji;
if (0x4e00 <= code && code <= 0x9fcb) return UnicodeRange.JapaneseKanji;
if (0xf900 <= code && code <= 0xfa6a) return UnicodeRange.JapaneseKanji;
if (0xff61 <= code && code <= 0xff65) return UnicodeRange.JapaneseHWPunctuation;
if (0xff66 <= code && code <= 0xff9f) return UnicodeRange.JapaneseHWKatakana;
if (0x31f0 <= code && code <= 0x31ff) return UnicodeRange.JapaneseAuxiliary;
if (0x3220 <= code && code <= 0x3243) return UnicodeRange.JapaneseAuxiliary;
if (0x3280 <= code && code <= 0x337f) return UnicodeRange.JapaneseAuxiliary;
if (0xff01 <= code && code <= 0xff5e) return UnicodeRange.JapaneseFWLatin;
return UnicodeRange.Unknown;
}
String.prototype.rangeTally = function() {
var tally = Object.keys(UnicodeRange).reduce((a: any,c) => (a[c] = 0, a), {}) as RangeTally;
for (var char of this) tally[char.range()]++;
return tally;
};
String.prototype.parseTags = function() {
return parseTags(this as string);
}
String.prototype.removeAll = function(searchValue) {
return this.replaceAll(searchValue, "");
}
String.prototype.json = function(fallback) {
if (fallback) {
try {
return JSON.parse(this as string);
} catch {
return fallback;
}
} else {
return JSON.parse(this as string);
}
}
String.prototype.map = function(mapFn) {
var out = "";
for (var char of this) out += mapFn(char);
return out;
}
String.prototype.cmpLen = function(searchString, position = 0) {
let len = 0;
for (let i = 0; i < searchString.length; i++) {
if (i + position >= this.length) break;
if (this[i + position] == searchString[i]) len++;
else break;
}
return len;
}
String.prototype.removeEnd = function(length) {
return this.substring(0, this.length - length);
}
String.prototype.replaceLast = function(find, replace) {
var i = this.lastIndexOf(find);
if (i == -1) return this as string; // not found
return this.substring(0, i) + replace + this.substring(i + find.length);
}
String.prototype.wrap = function(wrapper) {
return wrapper[0] + this + wrapper[1];
}
|