From 413501fde6bac909f31ad399781626caa16c8d04 Mon Sep 17 00:00:00 2001 From: lonkaars Date: Wed, 12 Jul 2023 00:31:12 +0200 Subject: implement parser breaks --- api/sentence.ts | 47 ++++++++++++++++++++++++++++++------ core/raw/api.ts | 5 +++- examples/reading-correction-break.ts | 12 ++++----- examples/readme.md | 4 +-- search/search.ts | 13 +++++++--- util/number.ts | 12 +++++++++ 6 files changed, 73 insertions(+), 20 deletions(-) diff --git a/api/sentence.ts b/api/sentence.ts index 1d22be3..6b1a1e4 100644 --- a/api/sentence.ts +++ b/api/sentence.ts @@ -7,25 +7,24 @@ export default class Sentence extends APIBase { public words: Array = []; protected query?: SearchSentenceResult; protected original: string = ""; + protected breaks: Array = []; + protected frozen = false; public ready: Promise; private _resolveReady: () => void = () => {}; constructor(input: string) { super(); - this.ready = new Promise(res => this._resolveReady = res); - this.fetch(input); + this.original = input; + this.update(); } first(searchValue: RegExp | string): Word | undefined { - return this.words[0]; + return this.words[0]; // TODO: implement } - private async fetch(input: string) { - this.original = input; - this.query = await (await this.api)["core"].search.sentence(input); - await this.updateWords(); - this._resolveReady(); + private async fetch() { + this.query = await (await this.api)["core"].search.sentence(this.original, { breaks: this.breaks }); } private async updateWords() { @@ -53,4 +52,36 @@ export default class Sentence extends APIBase { return out + word.furigana(format); }, ""); } + + public async update() { + if (this.frozen) return; + // unresolve ready + this.ready = new Promise(res => this._resolveReady = res); + + // fetch sentence from DB + await this.fetch(); + // parse words into Word + await this.updateWords(); + + // mark ready again + this._resolveReady(); + } + + public at(term: string) { + return this.original.indexOf(term); + } + + public async break(location: number) { + this.breaks.push(location); + await this.update(); + } + + public async freeze() { + this.frozen = true; + } + + public async unfreeze() { + this.frozen = false; + await this.update(); + } } diff --git a/core/raw/api.ts b/core/raw/api.ts index 6046a26..76cdb5b 100644 --- a/core/raw/api.ts +++ b/core/raw/api.ts @@ -1,19 +1,22 @@ import Core, { CoreExport, CoreImport, CoreSearch, CoreUser } from "../api.ts"; import YomikunError from "../../util/error.ts"; import Search from "../../search/search.ts"; +import DB from "../../db/db.ts"; /** @summary internal Core (DO NOT USE DIRECTLY) */ export default class RawCore implements Core { public ready: Promise; private _search: Search; + private _db: DB; constructor() { if (this.constructor === RawCore) { throw new YomikunError("RawCore instantiated! Use DirectCoreClient instead!"); } - this._search = new Search(); + this._db = new DB(); + this._search = new Search(this._db); this.ready = new Promise(async resolve => { await this._search.ready; diff --git a/examples/reading-correction-break.ts b/examples/reading-correction-break.ts index 6761165..a72e545 100644 --- a/examples/reading-correction-break.ts +++ b/examples/reading-correction-break.ts @@ -1,23 +1,23 @@ import Yomikun from "../api/yomikun.ts"; import DirectCoreClient from "../core/direct/client.ts"; -// WIP - // Create a direct (local) API instance var api = new Yomikun(new DirectCoreClient()); // Explicitly wait until everything is ready (not required) await api.ready; -// index sentence (generates wrong readings) -var sentence = await api.sentence("日本に来て一番驚いたことは自動販売機の多さだ。"); +// index sentence (generates wrong reading for 「この辺に」) +var sentence = await api.sentence("やっぱりこの辺にある武器も朽ちてるみたいだし"); // generated reading (wrong) console.log(sentence.furigana()); -// insert parser break -sentence.break(sentence.at("漢字")); +// insert parser break in the middle of a (wrong) expression +await sentence.break(sentence.at("この辺") + 2); // generated reading (correct) console.log(sentence.furigana()); +// TODO: this is a bad example, find an example that uses adjacent kanji that +// can become a larger compound, but should be two separate words. diff --git a/examples/readme.md b/examples/readme.md index bc7fa9d..707a034 100644 --- a/examples/readme.md +++ b/examples/readme.md @@ -8,8 +8,8 @@ using the Yomikun API.~ Examples (checked = working): - [ ] Lookup a word in a sentence -- [ ] Get furigana in HTML for a sentence -- [ ] Correct the reading of a word (because of ambiguous word boundries) by inserting a break +- [x] Get furigana in HTML for a sentence +- [x] Correct the reading of a word (because of ambiguous word boundries) by inserting a break - [ ] Login as a regular user and ignore an expression - [ ] Login as root and import a dictionary from a local file - [ ] Series-specific search with a lot of jargon diff --git a/search/search.ts b/search/search.ts index 0a50773..57bd0e2 100644 --- a/search/search.ts +++ b/search/search.ts @@ -4,15 +4,18 @@ import DB from "../db/db.ts"; import "../util/array.ts"; import "../util/set.ts"; import { DeepPartial } from "../util/types.ts"; +import { min } from "../util/number.ts"; /** @summary main Search class */ export default class Search { db: DB; ready: Promise; - constructor() { - this.db = new DB(); + /** @constant guaranteed lookahead delimiters */ + private SCAN_DELIMITERS = [" ", " ", "。", "、", "「", "」"]; + constructor(db: DB) { + this.db = db; this.ready = new Promise(async resolve => { await this.db.ready; resolve(); @@ -89,7 +92,11 @@ export default class Search { }; for (let start = 0; start < sentence.length; start++) { - var lookahead = props.lookahead; // TODO: stop at next delimiter (optimization) + var lookahead = min( + props.lookahead, + (props.breaks.filter(b => b > start)[0] - start) ?? props.lookahead, // lookahead as fallback instead of NaN + // sentence.first(this.SCAN_DELIMITERS, start), + ); var term = sentence.substring(start, start + lookahead); var results = (await this.terms(term)).map(term => { var word = term as SearchWord; diff --git a/util/number.ts b/util/number.ts index c28864f..98b7e50 100644 --- a/util/number.ts +++ b/util/number.ts @@ -9,3 +9,15 @@ Number.prototype.toChar = function() { return String.fromCharCode(this as number); } +/** @summary get minimum of valid numbers, returns NaN when no valid values are entered */ +export function min(...values: Array) { + values.push(NaN); // make sure .reduce doesn't crash + return (values.filter(v => typeof v === "number") as Array)!.reduce((acc, v) => acc = v < acc ? v : acc); +} + +/** @summary get maximum of valid numbers, returns NaN when no valid values are entered */ +export function max(...values: Array) { + values.push(NaN); // make sure .reduce doesn't crash + return (values.filter(v => typeof v === "number") as Array)!.reduce((acc, v) => acc = v > acc ? v : acc); +} + -- cgit v1.2.3