From 8ff39cbe6300ca479584fe7d85ff03a1f65bc9b0 Mon Sep 17 00:00:00 2001 From: lonkaars Date: Fri, 30 Jun 2023 23:23:47 +0200 Subject: WIP move more stuff around + more broken examples --- api/sentence-word.ts | 10 +++ api/sentence.ts | 3 +- api/word.ts | 4 +- examples/furigana-html.ts | 19 +++++ examples/reading-correction-break.ts | 21 +++++ examples/readme.md | 1 + examples/sentence-word-lookup.ts | 2 +- language/japanese.ts | 137 --------------------------------- util/japanese.ts | 145 +++++++++++++++++++++++++++++++++++ util/string.ts | 9 --- 10 files changed, 201 insertions(+), 150 deletions(-) create mode 100644 api/sentence-word.ts create mode 100644 examples/furigana-html.ts create mode 100644 examples/reading-correction-break.ts delete mode 100644 language/japanese.ts create mode 100644 util/japanese.ts diff --git a/api/sentence-word.ts b/api/sentence-word.ts new file mode 100644 index 0000000..a5bb9ca --- /dev/null +++ b/api/sentence-word.ts @@ -0,0 +1,10 @@ +import Word from "./word.ts"; + +export default class SentenceWord extends Word { + public indexStart: number = 0; + public indexEnd: number = 0; + + constructor() { + super(); + } +} diff --git a/api/sentence.ts b/api/sentence.ts index dc14cb2..6f67844 100644 --- a/api/sentence.ts +++ b/api/sentence.ts @@ -1,9 +1,10 @@ import APIBase from "./base.ts"; +import SentenceWord from "./sentence-word.ts"; import Word from "./word.ts"; import Yomikun from "./yomikun.ts"; export default class Sentence extends APIBase { - public words: Array = []; + public words: Array = []; constructor() { super(); diff --git a/api/word.ts b/api/word.ts index d2df69e..3e8fce6 100644 --- a/api/word.ts +++ b/api/word.ts @@ -2,8 +2,8 @@ import Glossary from "./glossary.ts"; import APIBase from "./base.ts"; export default class Word extends APIBase { - public writing: string = "TODO"; - public reading: string = "TODO"; + public writing = "TODO"; + public reading = "TODO"; constructor() { super(); diff --git a/examples/furigana-html.ts b/examples/furigana-html.ts new file mode 100644 index 0000000..5f59b4f --- /dev/null +++ b/examples/furigana-html.ts @@ -0,0 +1,19 @@ +import Yomikun from "../api/yomikun.ts"; +import DirectCoreClient from "../core/direct/client.ts"; + +// Create a direct (local) API instance +var api = new Yomikun(new DirectCoreClient()); + +// Excplicitly wait until everything is ready +// await api.ready; + +// This sentence does not contain all information until it is explicitly +// fetched by the user. Each subclass instantiated from an API instance keeps a +// reference to that API instance for fetching additional data. +var sentence = await api.sentence("日本に来て一番驚いたことは自動販売機の多さだ。"); + +// Copy the sentence verbatim but add furigana to each word's kanji +var furigana = sentence.furigana({ format: "HTML" }); + +console.log(furigana); + diff --git a/examples/reading-correction-break.ts b/examples/reading-correction-break.ts new file mode 100644 index 0000000..3f0359b --- /dev/null +++ b/examples/reading-correction-break.ts @@ -0,0 +1,21 @@ +import Yomikun from "../api/yomikun.ts"; +import DirectCoreClient from "../core/direct/client.ts"; + +// Create a direct (local) API instance +var api = new Yomikun(new DirectCoreClient()); + +// Excplicitly wait until everything is ready +// await api.ready; + +// index sentence (generates wrong readings) +var sentence = await api.sentence("日本に来て一番驚いたことは自動販売機の多さだ。"); + +// generated reading (wrong) +console.log(sentence.furigana()); + +// insert parser break +sentence.break(sentence.at("漢字")); + +// generated reading (correct) +console.log(sentence.furigana()); + diff --git a/examples/readme.md b/examples/readme.md index 9eb8760..bc7fa9d 100644 --- a/examples/readme.md +++ b/examples/readme.md @@ -14,4 +14,5 @@ Examples (checked = working): - [ ] Login as root and import a dictionary from a local file - [ ] Series-specific search with a lot of jargon - [ ] Lookup kanji details of a word +- [ ] Choose alternate kanji reading for word diff --git a/examples/sentence-word-lookup.ts b/examples/sentence-word-lookup.ts index ff82853..6fd57bd 100644 --- a/examples/sentence-word-lookup.ts +++ b/examples/sentence-word-lookup.ts @@ -1,5 +1,6 @@ import Yomikun from "../api/yomikun.ts"; import DirectCoreClient from "../core/direct/client.ts"; +// import "../util/string.ts"; // Create a direct (local) API instance var api = new Yomikun(new DirectCoreClient()); @@ -11,7 +12,6 @@ var api = new Yomikun(new DirectCoreClient()); // fetched by the user. Each subclass instantiated from an API instance keeps a // reference to that API instance for fetching additional data. var sentence = await api.sentence("この紅茶は甘すぎる"); -console.log(await sentence.test()); // Pick the word 紅茶 from the sentence in some different ways: var word = sentence.words.find(w => w.writing == "紅茶"); // filter terms by writing (matches first only) diff --git a/language/japanese.ts b/language/japanese.ts deleted file mode 100644 index c0ad825..0000000 --- a/language/japanese.ts +++ /dev/null @@ -1,137 +0,0 @@ -import { UnicodeRange } from "../util/string.ts"; - -enum StringOnlyReturnValue { - TallyAdd, - TallyIgnore, - TallyStop, -} - -export default class JapaneseString extends String { - /** @summary check tally for allowed scripts (internal use only) */ - private stringOnly(check: (key: string, val: number) => StringOnlyReturnValue): boolean { - var tally = this.rangeTally(); - var ok = false; - for (var [key, val] of Object.entries(tally)) { - switch(check(key, val)) { - case StringOnlyReturnValue.TallyAdd: { - ok = true; - break; - } - case StringOnlyReturnValue.TallyIgnore: { break; } - case StringOnlyReturnValue.TallyStop: { return false; } - } - } - return ok; - } - - /** - * @summary check if string is hiragana only - * - * @argument strict don't allow ascii whitespace and punctuation (default: false) - * - * return `true` if at least one hiragana character is in string, and no other - * unicode ranges are found. ascii whitespace and punctuation is still allowed, - * but not counted as hiragana. this behavior can be turned off by setting - * `strict` to true - */ - hiraganaOnly(strict = false) { - return this.stringOnly((key, val) => { - if (key == UnicodeRange.JapaneseHiragana) - return StringOnlyReturnValue.TallyAdd; // count hiragana characters - else if (!strict && key.startsWith("any-")) - return StringOnlyReturnValue.TallyIgnore; // allow any- (ascii whitespace and punctuation) - else if (val > 0) - return StringOnlyReturnValue.TallyStop; // don't allow any other ranges - return StringOnlyReturnValue.TallyIgnore; - }); - } - - /** - * @summary check if string is katakana only - * - * @argument strict don't allow ascii whitespace and punctuation (default: false) - * - * return `true` if at least one katakana character is in string, and no other - * unicode ranges are found. ascii whitespace and punctuation is still allowed, - * but not counted as katakana. this behavior can be turned off by setting - * `strict` to true - */ - katakanaOnly(strict = false) { - return this.stringOnly((key, val) => { - if (key == UnicodeRange.JapaneseKatakana) - return StringOnlyReturnValue.TallyAdd; // count katakana characters - else if (!strict && key.startsWith("any-")) - return StringOnlyReturnValue.TallyIgnore; // allow any- (ascii whitespace and punctuation) - else if (val > 0) - return StringOnlyReturnValue.TallyStop; // don't allow any other ranges - return StringOnlyReturnValue.TallyIgnore; - }); - } - - /** - * @summary check if string is kanji only - * - * @argument strict don't allow ascii whitespace and punctuation (default: false) - * - * return `true` if at least one kanji character is in string, and no other - * unicode ranges are found. ascii whitespace and punctuation is still allowed, - * but not counted as kanji. this behavior can be turned off by setting - * `strict` to true - */ - kanjiOnly(strict = false) { - return this.stringOnly((key, val) => { - if (key == UnicodeRange.JapaneseKanji) - return StringOnlyReturnValue.TallyAdd; // count kanji characters - else if (!strict && key.startsWith("any-")) - return StringOnlyReturnValue.TallyIgnore; // allow any- (ascii whitespace and punctuation) - else if (val > 0) - return StringOnlyReturnValue.TallyStop; // don't allow any other ranges - return StringOnlyReturnValue.TallyIgnore; - }); - } - - /** - * @summary check if string is kana only - * - * @argument strict don't allow ascii whitespace and punctuation (default: false) - * - * return `true` if at least one kana character is in string, and no other - * unicode ranges are found. ascii whitespace and punctuation is still allowed, - * but not counted as kana. this behavior can be turned off by setting `strict` - * to true - */ - kanaOnly(strict = false) { - return this.stringOnly((key, val) => { - if (key == UnicodeRange.JapaneseHiragana || key == UnicodeRange.JapaneseKatakana) - return StringOnlyReturnValue.TallyAdd; // count kana characters - else if (!strict && key.startsWith("any-")) - return StringOnlyReturnValue.TallyIgnore; // allow any- (ascii whitespace and punctuation) - else if (val > 0) - return StringOnlyReturnValue.TallyStop; // don't allow any other ranges - return StringOnlyReturnValue.TallyIgnore; - }); - } - - /** - * @summary check if string is japanese only - * - * @argument strict don't allow ascii whitespace and punctuation (default: false) - * - * return `true` if at least one japanese character is in string, and no other - * unicode ranges are found. ascii whitespace and punctuation is still allowed, - * but not counted as japanese. this behavior can be turned off by setting - * `strict` to true - */ - japaneseOnly(strict = false) { - return this.stringOnly((key, val) => { - if (key.startsWith("jp-")) - return StringOnlyReturnValue.TallyAdd; // count japanese characters - else if (!strict && key.startsWith("any-")) - return StringOnlyReturnValue.TallyIgnore; // allow any- (ascii whitespace and punctuation) - else if (val > 0) - return StringOnlyReturnValue.TallyStop; // don't allow any other ranges - return StringOnlyReturnValue.TallyIgnore; - }); - } -} - diff --git a/util/japanese.ts b/util/japanese.ts new file mode 100644 index 0000000..2017280 --- /dev/null +++ b/util/japanese.ts @@ -0,0 +1,145 @@ +import { UnicodeRange } from "./string.ts"; + +declare global { + interface String { + /** + * @summary check if string is hiragana only + * + * @argument strict don't allow ascii whitespace and punctuation (default: false) + * + * return `true` if at least one hiragana character is in string, and no other + * unicode ranges are found. ascii whitespace and punctuation is still allowed, + * but not counted as hiragana. this behavior can be turned off by setting + * `strict` to true + */ + hiraganaOnly(strict?: boolean): boolean + /** + * @summary check if string is katakana only + * + * @argument strict don't allow ascii whitespace and punctuation (default: false) + * + * return `true` if at least one katakana character is in string, and no other + * unicode ranges are found. ascii whitespace and punctuation is still allowed, + * but not counted as katakana. this behavior can be turned off by setting + * `strict` to true + */ + katakanaOnly(strict?: boolean): boolean + /** + * @summary check if string is kanji only + * + * @argument strict don't allow ascii whitespace and punctuation (default: false) + * + * return `true` if at least one kanji character is in string, and no other + * unicode ranges are found. ascii whitespace and punctuation is still allowed, + * but not counted as kanji. this behavior can be turned off by setting + * `strict` to true + */ + kanjiOnly(strict?: boolean): boolean + /** + * @summary check if string is kana only + * + * @argument strict don't allow ascii whitespace and punctuation (default: false) + * + * return `true` if at least one kana character is in string, and no other + * unicode ranges are found. ascii whitespace and punctuation is still allowed, + * but not counted as kana. this behavior can be turned off by setting `strict` + * to true + */ + kanaOnly(strict?: boolean): boolean + /** + * @summary check if string is japanese only + * + * @argument strict don't allow ascii whitespace and punctuation (default: false) + * + * return `true` if at least one japanese character is in string, and no other + * unicode ranges are found. ascii whitespace and punctuation is still allowed, + * but not counted as japanese. this behavior can be turned off by setting + * `strict` to true + */ + japaneseOnly(strict?: boolean): boolean + } +} + +enum StringOnlyReturnValue { + TallyAdd, + TallyIgnore, + TallyStop, +} + +/** @summary check tally for allowed scripts (internal use only) */ +function stringOnly(input: string, check: (key: string, val: number) => StringOnlyReturnValue): boolean { + var tally = input.rangeTally(); + var ok = false; + for (var [key, val] of Object.entries(tally)) { + switch(check(key, val)) { + case StringOnlyReturnValue.TallyAdd: { + ok = true; + break; + } + case StringOnlyReturnValue.TallyIgnore: { break; } + case StringOnlyReturnValue.TallyStop: { return false; } + } + } + return ok; +} + +String.prototype.hiraganaOnly = function(strict = false) { + return stringOnly(this as string, (key, val) => { + if (key == UnicodeRange.JapaneseHiragana) + return StringOnlyReturnValue.TallyAdd; // count hiragana characters + else if (!strict && key.startsWith("any-")) + return StringOnlyReturnValue.TallyIgnore; // allow any- (ascii whitespace and punctuation) + else if (val > 0) + return StringOnlyReturnValue.TallyStop; // don't allow any other ranges + return StringOnlyReturnValue.TallyIgnore; + }); +} + +String.prototype.katakanaOnly = function(strict = false) { + return stringOnly(this as string, (key, val) => { + if (key == UnicodeRange.JapaneseKatakana) + return StringOnlyReturnValue.TallyAdd; // count katakana characters + else if (!strict && key.startsWith("any-")) + return StringOnlyReturnValue.TallyIgnore; // allow any- (ascii whitespace and punctuation) + else if (val > 0) + return StringOnlyReturnValue.TallyStop; // don't allow any other ranges + return StringOnlyReturnValue.TallyIgnore; + }); +} + +String.prototype.kanjiOnly = function(strict = false) { + return stringOnly(this as string, (key, val) => { + if (key == UnicodeRange.JapaneseKanji) + return StringOnlyReturnValue.TallyAdd; // count kanji characters + else if (!strict && key.startsWith("any-")) + return StringOnlyReturnValue.TallyIgnore; // allow any- (ascii whitespace and punctuation) + else if (val > 0) + return StringOnlyReturnValue.TallyStop; // don't allow any other ranges + return StringOnlyReturnValue.TallyIgnore; + }); +} + +String.prototype.kanaOnly = function(strict = false) { + return stringOnly(this as string, (key, val) => { + if (key == UnicodeRange.JapaneseHiragana || key == UnicodeRange.JapaneseKatakana) + return StringOnlyReturnValue.TallyAdd; // count kana characters + else if (!strict && key.startsWith("any-")) + return StringOnlyReturnValue.TallyIgnore; // allow any- (ascii whitespace and punctuation) + else if (val > 0) + return StringOnlyReturnValue.TallyStop; // don't allow any other ranges + return StringOnlyReturnValue.TallyIgnore; + }); +} + +String.prototype.japaneseOnly = function(strict = false) { + return stringOnly(this as string, (key, val) => { + if (key.startsWith("jp-")) + return StringOnlyReturnValue.TallyAdd; // count japanese characters + else if (!strict && key.startsWith("any-")) + return StringOnlyReturnValue.TallyIgnore; // allow any- (ascii whitespace and punctuation) + else if (val > 0) + return StringOnlyReturnValue.TallyStop; // don't allow any other ranges + return StringOnlyReturnValue.TallyIgnore; + }); +} + diff --git a/util/string.ts b/util/string.ts index 16d8f0a..397dcd6 100644 --- a/util/string.ts +++ b/util/string.ts @@ -1,17 +1,12 @@ import { TokenTags, parseTags } from "../language/tags.ts"; -import JapaneseString from "../language/japanese.ts"; declare global { - /** @summary extended String prototype functions */ interface String { /** @summary get UnicodeRange for character at index 0 */ range(): UnicodeRange; /** @summary create a RangeTally object for counting used unicode ranges in string */ rangeTally(): RangeTally; - /** @summary get JapaneseString from this string */ - jp(): JapaneseString; - /** @summary parse concatenated tag string to TokenTags */ parseTags(): TokenTags; @@ -72,10 +67,6 @@ String.prototype.rangeTally = function() { return tally; }; -String.prototype.jp = function() { - return new JapaneseString(this); -} - String.prototype.parseTags = function() { return parseTags(this as string); } -- cgit v1.2.3