aboutsummaryrefslogtreecommitdiff
path: root/util/string.ts
blob: bb6bc0ff8b53ccf0c308f8ea303ff5d60d4742f2 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
import { TokenTags, parseTags } from "../language/tags.ts";

declare global {
	interface String {
		/** @summary get UnicodeRange for character at index 0 */
		range(): UnicodeRange;
		/** @summary create a RangeTally object for counting used unicode ranges in string */
		rangeTally(): RangeTally;

		/** @summary parse concatenated tag string to TokenTags */
		parseTags(): TokenTags;

		/**
		 * @summary Remove all instances of a substring in a string, using a regular expression or search string
		 * @param searchValue  A string to search for
		 */
		removeAll(searchValue: string | RegExp): string;

		/**
		 * @summary parse string as JSON, with optional fallback value
		 *
		 * fallback is undefined by default. if fallback is specified, it will be
		 * returned if JSON.parse throws any error. if fallback is not specified,
		 * no errors will be caught.
		 *
		 * @argument fallback  return this value if parsing fails
		 */
		json(fallback?: any): any;

		/**
		 * @summary map each character of a string to another character using
		 * `mapFn`
		 */
		map(mapFn: (char: string) => string): string;

		/**
		 * @summary return length of the match of searchValue from startIndex (default: 0)
		 *
		 * Similar to String.prototype.startsWith, but returns the length of the
		 * match instead of a boolean true or false.
		 *
		 * @param searchString  string to search for
		 * @param position  index to search from (0 by default = start of string)
		 */
		cmpLen(searchString: string, position?: number): number;

		/** @summary remove `length` characters from end of string */
		removeEnd(length: number): string;

		/** @summary replace last instance of `searchString` with `replaceValue` */
		replaceLast(searchString: string, replaceValue: string): string;
	}
}

export enum UnicodeRange {
	BasicLatin = "latin",
	Whitespace = "any-whitespace",
	Punctuation = "any-punctuation",
	Unknown = "any-unknown",
	JapaneseFWPunctuation = "jp-full-width-punctuation",
	JapaneseHWPunctuation = "jp-half-width-punctuation",
	JapaneseFWHiragana = "jp-full-width-hiragana",
	JapaneseFWKatakana = "jp-full-width-katakana",
	JapaneseFWLatin = "jp-full-width-latin",
	JapaneseHWKatakana = "jp-half-width-katakana",
	JapaneseKanji = "jp-kanji",
	JapaneseKanjiRadicals = "jp-kanji-radicals",
	JapaneseAuxiliary = "jp-aux",
}

type RangeTally = Record<UnicodeRange, number>;

String.prototype.range = function() {
	var code = this.charCodeAt(0);

	if (0x09 == code) return UnicodeRange.Whitespace; // tab
	if (0x20 == code) return UnicodeRange.Whitespace; // space
	if (0x21 == code) return UnicodeRange.Punctuation; // exclamation mark
	if (0x2e == code) return UnicodeRange.Punctuation; // full stop
	if (0x3f == code) return UnicodeRange.Punctuation; // question mark

	// https://stackoverflow.com/a/53807563
	if (0x0000 <= code && code <= 0x007f) return UnicodeRange.BasicLatin;
	if (0x2e80 <= code && code <= 0x2fd5) return UnicodeRange.JapaneseKanjiRadicals;
	if (0x3000 <= code && code <= 0x303f) return UnicodeRange.JapaneseFWPunctuation;
	if (0xff5f <= code && code <= 0xff60) return UnicodeRange.JapaneseFWPunctuation;
	if (0x3041 <= code && code <= 0x3096) return UnicodeRange.JapaneseFWHiragana;
	if (0x30a1 <= code && code <= 0x30ff) return UnicodeRange.JapaneseFWKatakana;
	if (0x3400 <= code && code <= 0x4db5) return UnicodeRange.JapaneseKanji;
	if (0x4e00 <= code && code <= 0x9fcb) return UnicodeRange.JapaneseKanji;
	if (0xf900 <= code && code <= 0xfa6a) return UnicodeRange.JapaneseKanji;
	if (0xff61 <= code && code <= 0xff65) return UnicodeRange.JapaneseHWPunctuation;
	if (0xff66 <= code && code <= 0xff9f) return UnicodeRange.JapaneseHWKatakana;
	if (0x31f0 <= code && code <= 0x31ff) return UnicodeRange.JapaneseAuxiliary;
	if (0x3220 <= code && code <= 0x3243) return UnicodeRange.JapaneseAuxiliary;
	if (0x3280 <= code && code <= 0x337f) return UnicodeRange.JapaneseAuxiliary;
	if (0xff01 <= code && code <= 0xff5e) return UnicodeRange.JapaneseFWLatin;
	return UnicodeRange.Unknown;
}

String.prototype.rangeTally = function() {
	var tally = Object.keys(UnicodeRange).reduce((a: any,c) => (a[c] = 0, a), {}) as RangeTally;
	for (var char of this) tally[char.range()]++;
	return tally;
};

String.prototype.parseTags = function() {
	return parseTags(this as string);
}

String.prototype.removeAll = function(searchValue) {
	return this.replaceAll(searchValue, "");
}

String.prototype.json = function(fallback) {
	if (fallback) {
		try {
			return JSON.parse(this as string);
		} catch {
			return fallback;
		}
	} else {
		return JSON.parse(this as string);
	}
}

String.prototype.map = function(mapFn) {
	var out = "";
	for (var char of this) out += mapFn(char);
	return out;
}

String.prototype.cmpLen = function(searchString, position = 0) {
	let len = 0;
	for (let i = 0; i < searchString.length; i++) {
		if (i + position >= this.length) break;
		if (this[i + position] == searchString[i]) len++;
		else break;
	}
	return len;
}

String.prototype.removeEnd = function(length) {
	return this.substring(0, this.length - length);
}

String.prototype.replaceLast = function(find, replace) {
	var i = this.lastIndexOf(find);
	if (i == -1) return this as string; // not found
	return this.substring(0, i) + replace + this.substring(i + find.length);
}