From 413501fde6bac909f31ad399781626caa16c8d04 Mon Sep 17 00:00:00 2001 From: lonkaars Date: Wed, 12 Jul 2023 00:31:12 +0200 Subject: implement parser breaks --- search/search.ts | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) (limited to 'search') diff --git a/search/search.ts b/search/search.ts index 0a50773..57bd0e2 100644 --- a/search/search.ts +++ b/search/search.ts @@ -4,15 +4,18 @@ import DB from "../db/db.ts"; import "../util/array.ts"; import "../util/set.ts"; import { DeepPartial } from "../util/types.ts"; +import { min } from "../util/number.ts"; /** @summary main Search class */ export default class Search { db: DB; ready: Promise; - constructor() { - this.db = new DB(); + /** @constant guaranteed lookahead delimiters */ + private SCAN_DELIMITERS = [" ", " ", "。", "、", "「", "」"]; + constructor(db: DB) { + this.db = db; this.ready = new Promise(async resolve => { await this.db.ready; resolve(); @@ -89,7 +92,11 @@ export default class Search { }; for (let start = 0; start < sentence.length; start++) { - var lookahead = props.lookahead; // TODO: stop at next delimiter (optimization) + var lookahead = min( + props.lookahead, + (props.breaks.filter(b => b > start)[0] - start) ?? props.lookahead, // lookahead as fallback instead of NaN + // sentence.first(this.SCAN_DELIMITERS, start), + ); var term = sentence.substring(start, start + lookahead); var results = (await this.terms(term)).map(term => { var word = term as SearchWord; -- cgit v1.2.3