aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorlonkaars <loek@pipeframe.xyz>2023-06-30 23:23:47 +0200
committerlonkaars <loek@pipeframe.xyz>2023-06-30 23:23:47 +0200
commit8ff39cbe6300ca479584fe7d85ff03a1f65bc9b0 (patch)
treec354294dbbeb6c86d5909955762326272664f07b
parent722127ef4059020876f708b1d5406c04fd07b0da (diff)
WIP move more stuff around + more broken examples
-rw-r--r--api/sentence-word.ts10
-rw-r--r--api/sentence.ts3
-rw-r--r--api/word.ts4
-rw-r--r--examples/furigana-html.ts19
-rw-r--r--examples/reading-correction-break.ts21
-rw-r--r--examples/readme.md1
-rw-r--r--examples/sentence-word-lookup.ts2
-rw-r--r--language/japanese.ts137
-rw-r--r--util/japanese.ts145
-rw-r--r--util/string.ts9
10 files changed, 201 insertions, 150 deletions
diff --git a/api/sentence-word.ts b/api/sentence-word.ts
new file mode 100644
index 0000000..a5bb9ca
--- /dev/null
+++ b/api/sentence-word.ts
@@ -0,0 +1,10 @@
+import Word from "./word.ts";
+
+export default class SentenceWord extends Word {
+ public indexStart: number = 0;
+ public indexEnd: number = 0;
+
+ constructor() {
+ super();
+ }
+}
diff --git a/api/sentence.ts b/api/sentence.ts
index dc14cb2..6f67844 100644
--- a/api/sentence.ts
+++ b/api/sentence.ts
@@ -1,9 +1,10 @@
import APIBase from "./base.ts";
+import SentenceWord from "./sentence-word.ts";
import Word from "./word.ts";
import Yomikun from "./yomikun.ts";
export default class Sentence extends APIBase {
- public words: Array<Word> = [];
+ public words: Array<SentenceWord> = [];
constructor() {
super();
diff --git a/api/word.ts b/api/word.ts
index d2df69e..3e8fce6 100644
--- a/api/word.ts
+++ b/api/word.ts
@@ -2,8 +2,8 @@ import Glossary from "./glossary.ts";
import APIBase from "./base.ts";
export default class Word extends APIBase {
- public writing: string = "TODO";
- public reading: string = "TODO";
+ public writing = "TODO";
+ public reading = "TODO";
constructor() {
super();
diff --git a/examples/furigana-html.ts b/examples/furigana-html.ts
new file mode 100644
index 0000000..5f59b4f
--- /dev/null
+++ b/examples/furigana-html.ts
@@ -0,0 +1,19 @@
+import Yomikun from "../api/yomikun.ts";
+import DirectCoreClient from "../core/direct/client.ts";
+
+// Create a direct (local) API instance
+var api = new Yomikun(new DirectCoreClient());
+
+// Excplicitly wait until everything is ready
+// await api.ready;
+
+// This sentence does not contain all information until it is explicitly
+// fetched by the user. Each subclass instantiated from an API instance keeps a
+// reference to that API instance for fetching additional data.
+var sentence = await api.sentence("日本に来て一番驚いたことは自動販売機の多さだ。");
+
+// Copy the sentence verbatim but add furigana to each word's kanji
+var furigana = sentence.furigana({ format: "HTML" });
+
+console.log(furigana);
+
diff --git a/examples/reading-correction-break.ts b/examples/reading-correction-break.ts
new file mode 100644
index 0000000..3f0359b
--- /dev/null
+++ b/examples/reading-correction-break.ts
@@ -0,0 +1,21 @@
+import Yomikun from "../api/yomikun.ts";
+import DirectCoreClient from "../core/direct/client.ts";
+
+// Create a direct (local) API instance
+var api = new Yomikun(new DirectCoreClient());
+
+// Excplicitly wait until everything is ready
+// await api.ready;
+
+// index sentence (generates wrong readings)
+var sentence = await api.sentence("日本に来て一番驚いたことは自動販売機の多さだ。");
+
+// generated reading (wrong)
+console.log(sentence.furigana());
+
+// insert parser break
+sentence.break(sentence.at("漢字"));
+
+// generated reading (correct)
+console.log(sentence.furigana());
+
diff --git a/examples/readme.md b/examples/readme.md
index 9eb8760..bc7fa9d 100644
--- a/examples/readme.md
+++ b/examples/readme.md
@@ -14,4 +14,5 @@ Examples (checked = working):
- [ ] Login as root and import a dictionary from a local file
- [ ] Series-specific search with a lot of jargon
- [ ] Lookup kanji details of a word
+- [ ] Choose alternate kanji reading for word
diff --git a/examples/sentence-word-lookup.ts b/examples/sentence-word-lookup.ts
index ff82853..6fd57bd 100644
--- a/examples/sentence-word-lookup.ts
+++ b/examples/sentence-word-lookup.ts
@@ -1,5 +1,6 @@
import Yomikun from "../api/yomikun.ts";
import DirectCoreClient from "../core/direct/client.ts";
+// import "../util/string.ts";
// Create a direct (local) API instance
var api = new Yomikun(new DirectCoreClient());
@@ -11,7 +12,6 @@ var api = new Yomikun(new DirectCoreClient());
// fetched by the user. Each subclass instantiated from an API instance keeps a
// reference to that API instance for fetching additional data.
var sentence = await api.sentence("この紅茶は甘すぎる");
-console.log(await sentence.test());
// Pick the word 紅茶 from the sentence in some different ways:
var word = sentence.words.find(w => w.writing == "紅茶"); // filter terms by writing (matches first only)
diff --git a/language/japanese.ts b/language/japanese.ts
deleted file mode 100644
index c0ad825..0000000
--- a/language/japanese.ts
+++ /dev/null
@@ -1,137 +0,0 @@
-import { UnicodeRange } from "../util/string.ts";
-
-enum StringOnlyReturnValue {
- TallyAdd,
- TallyIgnore,
- TallyStop,
-}
-
-export default class JapaneseString extends String {
- /** @summary check tally for allowed scripts (internal use only) */
- private stringOnly(check: (key: string, val: number) => StringOnlyReturnValue): boolean {
- var tally = this.rangeTally();
- var ok = false;
- for (var [key, val] of Object.entries(tally)) {
- switch(check(key, val)) {
- case StringOnlyReturnValue.TallyAdd: {
- ok = true;
- break;
- }
- case StringOnlyReturnValue.TallyIgnore: { break; }
- case StringOnlyReturnValue.TallyStop: { return false; }
- }
- }
- return ok;
- }
-
- /**
- * @summary check if string is hiragana only
- *
- * @argument strict don't allow ascii whitespace and punctuation (default: false)
- *
- * return `true` if at least one hiragana character is in string, and no other
- * unicode ranges are found. ascii whitespace and punctuation is still allowed,
- * but not counted as hiragana. this behavior can be turned off by setting
- * `strict` to true
- */
- hiraganaOnly(strict = false) {
- return this.stringOnly((key, val) => {
- if (key == UnicodeRange.JapaneseHiragana)
- return StringOnlyReturnValue.TallyAdd; // count hiragana characters
- else if (!strict && key.startsWith("any-"))
- return StringOnlyReturnValue.TallyIgnore; // allow any- (ascii whitespace and punctuation)
- else if (val > 0)
- return StringOnlyReturnValue.TallyStop; // don't allow any other ranges
- return StringOnlyReturnValue.TallyIgnore;
- });
- }
-
- /**
- * @summary check if string is katakana only
- *
- * @argument strict don't allow ascii whitespace and punctuation (default: false)
- *
- * return `true` if at least one katakana character is in string, and no other
- * unicode ranges are found. ascii whitespace and punctuation is still allowed,
- * but not counted as katakana. this behavior can be turned off by setting
- * `strict` to true
- */
- katakanaOnly(strict = false) {
- return this.stringOnly((key, val) => {
- if (key == UnicodeRange.JapaneseKatakana)
- return StringOnlyReturnValue.TallyAdd; // count katakana characters
- else if (!strict && key.startsWith("any-"))
- return StringOnlyReturnValue.TallyIgnore; // allow any- (ascii whitespace and punctuation)
- else if (val > 0)
- return StringOnlyReturnValue.TallyStop; // don't allow any other ranges
- return StringOnlyReturnValue.TallyIgnore;
- });
- }
-
- /**
- * @summary check if string is kanji only
- *
- * @argument strict don't allow ascii whitespace and punctuation (default: false)
- *
- * return `true` if at least one kanji character is in string, and no other
- * unicode ranges are found. ascii whitespace and punctuation is still allowed,
- * but not counted as kanji. this behavior can be turned off by setting
- * `strict` to true
- */
- kanjiOnly(strict = false) {
- return this.stringOnly((key, val) => {
- if (key == UnicodeRange.JapaneseKanji)
- return StringOnlyReturnValue.TallyAdd; // count kanji characters
- else if (!strict && key.startsWith("any-"))
- return StringOnlyReturnValue.TallyIgnore; // allow any- (ascii whitespace and punctuation)
- else if (val > 0)
- return StringOnlyReturnValue.TallyStop; // don't allow any other ranges
- return StringOnlyReturnValue.TallyIgnore;
- });
- }
-
- /**
- * @summary check if string is kana only
- *
- * @argument strict don't allow ascii whitespace and punctuation (default: false)
- *
- * return `true` if at least one kana character is in string, and no other
- * unicode ranges are found. ascii whitespace and punctuation is still allowed,
- * but not counted as kana. this behavior can be turned off by setting `strict`
- * to true
- */
- kanaOnly(strict = false) {
- return this.stringOnly((key, val) => {
- if (key == UnicodeRange.JapaneseHiragana || key == UnicodeRange.JapaneseKatakana)
- return StringOnlyReturnValue.TallyAdd; // count kana characters
- else if (!strict && key.startsWith("any-"))
- return StringOnlyReturnValue.TallyIgnore; // allow any- (ascii whitespace and punctuation)
- else if (val > 0)
- return StringOnlyReturnValue.TallyStop; // don't allow any other ranges
- return StringOnlyReturnValue.TallyIgnore;
- });
- }
-
- /**
- * @summary check if string is japanese only
- *
- * @argument strict don't allow ascii whitespace and punctuation (default: false)
- *
- * return `true` if at least one japanese character is in string, and no other
- * unicode ranges are found. ascii whitespace and punctuation is still allowed,
- * but not counted as japanese. this behavior can be turned off by setting
- * `strict` to true
- */
- japaneseOnly(strict = false) {
- return this.stringOnly((key, val) => {
- if (key.startsWith("jp-"))
- return StringOnlyReturnValue.TallyAdd; // count japanese characters
- else if (!strict && key.startsWith("any-"))
- return StringOnlyReturnValue.TallyIgnore; // allow any- (ascii whitespace and punctuation)
- else if (val > 0)
- return StringOnlyReturnValue.TallyStop; // don't allow any other ranges
- return StringOnlyReturnValue.TallyIgnore;
- });
- }
-}
-
diff --git a/util/japanese.ts b/util/japanese.ts
new file mode 100644
index 0000000..2017280
--- /dev/null
+++ b/util/japanese.ts
@@ -0,0 +1,145 @@
+import { UnicodeRange } from "./string.ts";
+
+declare global {
+ interface String {
+ /**
+ * @summary check if string is hiragana only
+ *
+ * @argument strict don't allow ascii whitespace and punctuation (default: false)
+ *
+ * return `true` if at least one hiragana character is in string, and no other
+ * unicode ranges are found. ascii whitespace and punctuation is still allowed,
+ * but not counted as hiragana. this behavior can be turned off by setting
+ * `strict` to true
+ */
+ hiraganaOnly(strict?: boolean): boolean
+ /**
+ * @summary check if string is katakana only
+ *
+ * @argument strict don't allow ascii whitespace and punctuation (default: false)
+ *
+ * return `true` if at least one katakana character is in string, and no other
+ * unicode ranges are found. ascii whitespace and punctuation is still allowed,
+ * but not counted as katakana. this behavior can be turned off by setting
+ * `strict` to true
+ */
+ katakanaOnly(strict?: boolean): boolean
+ /**
+ * @summary check if string is kanji only
+ *
+ * @argument strict don't allow ascii whitespace and punctuation (default: false)
+ *
+ * return `true` if at least one kanji character is in string, and no other
+ * unicode ranges are found. ascii whitespace and punctuation is still allowed,
+ * but not counted as kanji. this behavior can be turned off by setting
+ * `strict` to true
+ */
+ kanjiOnly(strict?: boolean): boolean
+ /**
+ * @summary check if string is kana only
+ *
+ * @argument strict don't allow ascii whitespace and punctuation (default: false)
+ *
+ * return `true` if at least one kana character is in string, and no other
+ * unicode ranges are found. ascii whitespace and punctuation is still allowed,
+ * but not counted as kana. this behavior can be turned off by setting `strict`
+ * to true
+ */
+ kanaOnly(strict?: boolean): boolean
+ /**
+ * @summary check if string is japanese only
+ *
+ * @argument strict don't allow ascii whitespace and punctuation (default: false)
+ *
+ * return `true` if at least one japanese character is in string, and no other
+ * unicode ranges are found. ascii whitespace and punctuation is still allowed,
+ * but not counted as japanese. this behavior can be turned off by setting
+ * `strict` to true
+ */
+ japaneseOnly(strict?: boolean): boolean
+ }
+}
+
+enum StringOnlyReturnValue {
+ TallyAdd,
+ TallyIgnore,
+ TallyStop,
+}
+
+/** @summary check tally for allowed scripts (internal use only) */
+function stringOnly(input: string, check: (key: string, val: number) => StringOnlyReturnValue): boolean {
+ var tally = input.rangeTally();
+ var ok = false;
+ for (var [key, val] of Object.entries(tally)) {
+ switch(check(key, val)) {
+ case StringOnlyReturnValue.TallyAdd: {
+ ok = true;
+ break;
+ }
+ case StringOnlyReturnValue.TallyIgnore: { break; }
+ case StringOnlyReturnValue.TallyStop: { return false; }
+ }
+ }
+ return ok;
+}
+
+String.prototype.hiraganaOnly = function(strict = false) {
+ return stringOnly(this as string, (key, val) => {
+ if (key == UnicodeRange.JapaneseHiragana)
+ return StringOnlyReturnValue.TallyAdd; // count hiragana characters
+ else if (!strict && key.startsWith("any-"))
+ return StringOnlyReturnValue.TallyIgnore; // allow any- (ascii whitespace and punctuation)
+ else if (val > 0)
+ return StringOnlyReturnValue.TallyStop; // don't allow any other ranges
+ return StringOnlyReturnValue.TallyIgnore;
+ });
+}
+
+String.prototype.katakanaOnly = function(strict = false) {
+ return stringOnly(this as string, (key, val) => {
+ if (key == UnicodeRange.JapaneseKatakana)
+ return StringOnlyReturnValue.TallyAdd; // count katakana characters
+ else if (!strict && key.startsWith("any-"))
+ return StringOnlyReturnValue.TallyIgnore; // allow any- (ascii whitespace and punctuation)
+ else if (val > 0)
+ return StringOnlyReturnValue.TallyStop; // don't allow any other ranges
+ return StringOnlyReturnValue.TallyIgnore;
+ });
+}
+
+String.prototype.kanjiOnly = function(strict = false) {
+ return stringOnly(this as string, (key, val) => {
+ if (key == UnicodeRange.JapaneseKanji)
+ return StringOnlyReturnValue.TallyAdd; // count kanji characters
+ else if (!strict && key.startsWith("any-"))
+ return StringOnlyReturnValue.TallyIgnore; // allow any- (ascii whitespace and punctuation)
+ else if (val > 0)
+ return StringOnlyReturnValue.TallyStop; // don't allow any other ranges
+ return StringOnlyReturnValue.TallyIgnore;
+ });
+}
+
+String.prototype.kanaOnly = function(strict = false) {
+ return stringOnly(this as string, (key, val) => {
+ if (key == UnicodeRange.JapaneseHiragana || key == UnicodeRange.JapaneseKatakana)
+ return StringOnlyReturnValue.TallyAdd; // count kana characters
+ else if (!strict && key.startsWith("any-"))
+ return StringOnlyReturnValue.TallyIgnore; // allow any- (ascii whitespace and punctuation)
+ else if (val > 0)
+ return StringOnlyReturnValue.TallyStop; // don't allow any other ranges
+ return StringOnlyReturnValue.TallyIgnore;
+ });
+}
+
+String.prototype.japaneseOnly = function(strict = false) {
+ return stringOnly(this as string, (key, val) => {
+ if (key.startsWith("jp-"))
+ return StringOnlyReturnValue.TallyAdd; // count japanese characters
+ else if (!strict && key.startsWith("any-"))
+ return StringOnlyReturnValue.TallyIgnore; // allow any- (ascii whitespace and punctuation)
+ else if (val > 0)
+ return StringOnlyReturnValue.TallyStop; // don't allow any other ranges
+ return StringOnlyReturnValue.TallyIgnore;
+ });
+}
+
diff --git a/util/string.ts b/util/string.ts
index 16d8f0a..397dcd6 100644
--- a/util/string.ts
+++ b/util/string.ts
@@ -1,17 +1,12 @@
import { TokenTags, parseTags } from "../language/tags.ts";
-import JapaneseString from "../language/japanese.ts";
declare global {
- /** @summary extended String prototype functions */
interface String {
/** @summary get UnicodeRange for character at index 0 */
range(): UnicodeRange;
/** @summary create a RangeTally object for counting used unicode ranges in string */
rangeTally(): RangeTally;
- /** @summary get JapaneseString from this string */
- jp(): JapaneseString;
-
/** @summary parse concatenated tag string to TokenTags */
parseTags(): TokenTags;
@@ -72,10 +67,6 @@ String.prototype.rangeTally = function() {
return tally;
};
-String.prototype.jp = function() {
- return new JapaneseString(this);
-}
-
String.prototype.parseTags = function() {
return parseTags(this as string);
}