aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorlonkaars <loek@pipeframe.xyz>2023-07-12 00:31:12 +0200
committerlonkaars <loek@pipeframe.xyz>2023-07-12 00:31:12 +0200
commit413501fde6bac909f31ad399781626caa16c8d04 (patch)
tree7514b0f551a7c41f618d58a50fa56972e6b06170
parente99ae80f7adc0f0e677381c3cc1549235d3877ab (diff)
implement parser breaks
-rw-r--r--api/sentence.ts47
-rw-r--r--core/raw/api.ts5
-rw-r--r--examples/reading-correction-break.ts12
-rw-r--r--examples/readme.md4
-rw-r--r--search/search.ts13
-rw-r--r--util/number.ts12
6 files changed, 73 insertions, 20 deletions
diff --git a/api/sentence.ts b/api/sentence.ts
index 1d22be3..6b1a1e4 100644
--- a/api/sentence.ts
+++ b/api/sentence.ts
@@ -7,25 +7,24 @@ export default class Sentence extends APIBase {
public words: Array<Word> = [];
protected query?: SearchSentenceResult;
protected original: string = "";
+ protected breaks: Array<number> = [];
+ protected frozen = false;
public ready: Promise<void>;
private _resolveReady: () => void = () => {};
constructor(input: string) {
super();
- this.ready = new Promise(res => this._resolveReady = res);
- this.fetch(input);
+ this.original = input;
+ this.update();
}
first(searchValue: RegExp | string): Word | undefined {
- return this.words[0];
+ return this.words[0]; // TODO: implement
}
- private async fetch(input: string) {
- this.original = input;
- this.query = await (await this.api)["core"].search.sentence(input);
- await this.updateWords();
- this._resolveReady();
+ private async fetch() {
+ this.query = await (await this.api)["core"].search.sentence(this.original, { breaks: this.breaks });
}
private async updateWords() {
@@ -53,4 +52,36 @@ export default class Sentence extends APIBase {
return out + word.furigana(format);
}, "");
}
+
+ public async update() {
+ if (this.frozen) return;
+ // unresolve ready
+ this.ready = new Promise(res => this._resolveReady = res);
+
+ // fetch sentence from DB
+ await this.fetch();
+ // parse words into Word
+ await this.updateWords();
+
+ // mark ready again
+ this._resolveReady();
+ }
+
+ public at(term: string) {
+ return this.original.indexOf(term);
+ }
+
+ public async break(location: number) {
+ this.breaks.push(location);
+ await this.update();
+ }
+
+ public async freeze() {
+ this.frozen = true;
+ }
+
+ public async unfreeze() {
+ this.frozen = false;
+ await this.update();
+ }
}
diff --git a/core/raw/api.ts b/core/raw/api.ts
index 6046a26..76cdb5b 100644
--- a/core/raw/api.ts
+++ b/core/raw/api.ts
@@ -1,19 +1,22 @@
import Core, { CoreExport, CoreImport, CoreSearch, CoreUser } from "../api.ts";
import YomikunError from "../../util/error.ts";
import Search from "../../search/search.ts";
+import DB from "../../db/db.ts";
/** @summary internal Core (DO NOT USE DIRECTLY) */
export default class RawCore implements Core {
public ready: Promise<void>;
private _search: Search;
+ private _db: DB;
constructor() {
if (this.constructor === RawCore) {
throw new YomikunError("RawCore instantiated! Use DirectCoreClient instead!");
}
- this._search = new Search();
+ this._db = new DB();
+ this._search = new Search(this._db);
this.ready = new Promise(async resolve => {
await this._search.ready;
diff --git a/examples/reading-correction-break.ts b/examples/reading-correction-break.ts
index 6761165..a72e545 100644
--- a/examples/reading-correction-break.ts
+++ b/examples/reading-correction-break.ts
@@ -1,23 +1,23 @@
import Yomikun from "../api/yomikun.ts";
import DirectCoreClient from "../core/direct/client.ts";
-// WIP
-
// Create a direct (local) API instance
var api = new Yomikun(new DirectCoreClient());
// Explicitly wait until everything is ready (not required)
await api.ready;
-// index sentence (generates wrong readings)
-var sentence = await api.sentence("日本に来て一番驚いたことは自動販売機の多さだ。");
+// index sentence (generates wrong reading for 「この辺に」)
+var sentence = await api.sentence("やっぱりこの辺にある武器も朽ちてるみたいだし");
// generated reading (wrong)
console.log(sentence.furigana());
-// insert parser break
-sentence.break(sentence.at("漢字"));
+// insert parser break in the middle of a (wrong) expression
+await sentence.break(sentence.at("この辺") + 2);
// generated reading (correct)
console.log(sentence.furigana());
+// TODO: this is a bad example, find an example that uses adjacent kanji that
+// can become a larger compound, but should be two separate words.
diff --git a/examples/readme.md b/examples/readme.md
index bc7fa9d..707a034 100644
--- a/examples/readme.md
+++ b/examples/readme.md
@@ -8,8 +8,8 @@ using the Yomikun API.~
Examples (checked = working):
- [ ] Lookup a word in a sentence
-- [ ] Get furigana in HTML for a sentence
-- [ ] Correct the reading of a word (because of ambiguous word boundries) by inserting a break
+- [x] Get furigana in HTML for a sentence
+- [x] Correct the reading of a word (because of ambiguous word boundries) by inserting a break
- [ ] Login as a regular user and ignore an expression
- [ ] Login as root and import a dictionary from a local file
- [ ] Series-specific search with a lot of jargon
diff --git a/search/search.ts b/search/search.ts
index 0a50773..57bd0e2 100644
--- a/search/search.ts
+++ b/search/search.ts
@@ -4,15 +4,18 @@ import DB from "../db/db.ts";
import "../util/array.ts";
import "../util/set.ts";
import { DeepPartial } from "../util/types.ts";
+import { min } from "../util/number.ts";
/** @summary main Search class */
export default class Search {
db: DB;
ready: Promise<void>;
- constructor() {
- this.db = new DB();
+ /** @constant guaranteed lookahead delimiters */
+ private SCAN_DELIMITERS = [" ", " ", "。", "、", "「", "」"];
+ constructor(db: DB) {
+ this.db = db;
this.ready = new Promise<void>(async resolve => {
await this.db.ready;
resolve();
@@ -89,7 +92,11 @@ export default class Search {
};
for (let start = 0; start < sentence.length; start++) {
- var lookahead = props.lookahead; // TODO: stop at next delimiter (optimization)
+ var lookahead = min(
+ props.lookahead,
+ (props.breaks.filter(b => b > start)[0] - start) ?? props.lookahead, // lookahead as fallback instead of NaN
+ // sentence.first(this.SCAN_DELIMITERS, start),
+ );
var term = sentence.substring(start, start + lookahead);
var results = (await this.terms(term)).map(term => {
var word = term as SearchWord;
diff --git a/util/number.ts b/util/number.ts
index c28864f..98b7e50 100644
--- a/util/number.ts
+++ b/util/number.ts
@@ -9,3 +9,15 @@ Number.prototype.toChar = function() {
return String.fromCharCode(this as number);
}
+/** @summary get minimum of valid numbers, returns NaN when no valid values are entered */
+export function min(...values: Array<number | null | undefined>) {
+ values.push(NaN); // make sure .reduce doesn't crash
+ return (values.filter(v => typeof v === "number") as Array<number>)!.reduce((acc, v) => acc = v < acc ? v : acc);
+}
+
+/** @summary get maximum of valid numbers, returns NaN when no valid values are entered */
+export function max(...values: Array<number | null | undefined>) {
+ values.push(NaN); // make sure .reduce doesn't crash
+ return (values.filter(v => typeof v === "number") as Array<number>)!.reduce((acc, v) => acc = v > acc ? v : acc);
+}
+