import { Tag, TagGroup } from "./tags.ts"; import { SearchSentenceProps, SearchSentenceResult, SearchTermResult, SearchWord } from "./types.ts"; import DB from "../db/db.ts"; import "../util/array.ts"; import "../util/set.ts"; import { DeepPartial } from "../util/types.ts"; import { min } from "../util/number.ts"; /** @summary main Search class */ export default class Search { db: DB; ready: Promise; /** @constant guaranteed lookahead delimiters */ private SCAN_DELIMITERS = [" ", " ", "。", "、", "「", "」"]; constructor(db: DB) { this.db = db; this.ready = new Promise(async resolve => { await this.db.ready; resolve(); }); } /** @summary find possible terms at start of string by deconjugating */ public async terms(term: string): Promise> { await this.ready; var results = await this.db.findTerm(term); // skip filtering valid results if there are none if (results.length == 0) return []; // filter invalid deconjugations/results results = results.filter(result => { // ignore ignored by user terms if (result.sort < 0) return false; // deconjugated words if (result.depth > 0) { // check if this word can be conjugated at all if (!result.tags.anyOf(TagGroup.Conjugable as string[])) return false; // ignore other wrong deconjugations if (result.tags.includes(Tag.Class.Verb.U) && !result.tags.includes(Tag.Inflection.Reason.U)) return false; if (result.tags.includes(Tag.Class.Verb.Ru) && !result.tags.includes(Tag.Inflection.Reason.Ru)) return false; if (result.tags.includes(Tag.Class.Verb.Suru) && !result.tags.anyOf([ Tag.Inflection.Reason.Suru, Tag.Inflection.Suru ])) return false; if (result.tags.includes(Tag.Class.Adjective.I) && !result.tags.includes(Tag.Inflection.Reason.Adjective.I)) return false; if (result.tags.includes(Tag.Class.Adjective.Na) && !result.tags.includes(Tag.Inflection.Reason.Adjective.Na)) return false; } // all other results should be valid return true; }); return results.map(result => ({ id: result.id, writing: result.expression, reading: result.reading, tags: result.tags, source: result.original, sort: result.sort, depth: result.depth, match: { reading: result.match.reading, writing: result.match.writing, }, })); } /** @summary parse sentence into terms with readings */ public async sentence(sentence: string, optional?: DeepPartial): Promise { await this.ready; var props: SearchSentenceProps = { lookahead: optional?.lookahead ?? 15, priorityMod: { high: optional?.priorityMod?.high ?? 10, low: optional?.priorityMod?.low ?? -10, }, breaks: optional?.breaks ?? [], } var parseResult: SearchSentenceResult = { input: sentence, words: [], }; for (let start = 0; start < sentence.length; start++) { var lookahead = min( props.lookahead, (props.breaks.filter(b => b > start)[0] - start) ?? props.lookahead, // lookahead as fallback instead of NaN // sentence.first(this.SCAN_DELIMITERS, start), ); var term = sentence.substring(start, start + lookahead); var results = (await this.terms(term)).map(term => { var word = term as SearchWord; word.start = start; return word; }); // current starting point did not yield results, try again at next character or until end of input if (results.length == 0) continue; // bias search results by modifying sort value results = results.map(result => { // true if last token was a name else false const lastTokenName = parseResult.words.peek()?.tags.anyOf(Object.values(Tag.Name)); // give higher priority to suffixes when last token was a name, else lower priority if (result.tags.includes(Tag.Class.Suffix)) result.sort += lastTokenName ? props.priorityMod.high : props.priorityMod.low; // give lower priority to terms matched only by their readings, and are // usually written in kanji if (!result.tags.includes(Tag.Auxiliary.UsuallyKana) && !result.match.writing) result.sort += props.priorityMod.low; return result; }); results.sort((a, b) => { // sort by original string length (long to short) if (a.source.length != b.source.length) return b.source.length - a.source.length; // then by sort index (high to low) if (a.sort != b.sort) return b.sort - a.sort; // then by depth (high to low) if (a.depth != b.depth) return b.depth - a.depth; // else keep current order (random) return 0; }); // pick top result const result = results[0]; parseResult.words.push(result); start += result.source.length - 1; // -1 because loop already increments start continue; // extra verbose end of iteration } return parseResult; } };