diff options
Diffstat (limited to 'search/search.ts')
-rw-r--r-- | search/search.ts | 141 |
1 files changed, 141 insertions, 0 deletions
diff --git a/search/search.ts b/search/search.ts new file mode 100644 index 0000000..0a50773 --- /dev/null +++ b/search/search.ts @@ -0,0 +1,141 @@ +import { Tag, TagGroup } from "./tags.ts"; +import { SearchSentenceProps, SearchSentenceResult, SearchTermResult, SearchWord } from "./types.ts"; +import DB from "../db/db.ts"; +import "../util/array.ts"; +import "../util/set.ts"; +import { DeepPartial } from "../util/types.ts"; + +/** @summary main Search class */ +export default class Search { + db: DB; + ready: Promise<void>; + + constructor() { + this.db = new DB(); + + this.ready = new Promise<void>(async resolve => { + await this.db.ready; + resolve(); + }); + } + + /** @summary find possible terms at start of string by deconjugating */ + public async terms(term: string): Promise<Array<SearchTermResult>> { + await this.ready; + + var results = await this.db.findTerm(term); + + // skip filtering valid results if there are none + if (results.length == 0) return []; + + // filter invalid deconjugations/results + results = results.filter(result => { + // ignore ignored by user terms + if (result.sort < 0) return false; + + // deconjugated words + if (result.depth > 0) { + // check if this word can be conjugated at all + if (!result.tags.anyOf(TagGroup.Conjugable as string[])) return false; + + // ignore other wrong deconjugations + if (result.tags.includes(Tag.Class.Verb.U) && + !result.tags.includes(Tag.Inflection.Reason.U)) return false; + if (result.tags.includes(Tag.Class.Verb.Ru) && + !result.tags.includes(Tag.Inflection.Reason.Ru)) return false; + if (result.tags.includes(Tag.Class.Verb.Suru) && + !result.tags.includes(Tag.Inflection.Reason.Suru)) return false; + if (result.tags.includes(Tag.Class.Adjective.I) && + !result.tags.includes(Tag.Inflection.Reason.Adjective.I)) return false; + if (result.tags.includes(Tag.Class.Adjective.Na) && + !result.tags.includes(Tag.Inflection.Reason.Adjective.Na)) return false; + } + + // all other results should be valid + return true; + }); + + return results.map(result => ({ + id: result.id, + writing: result.expression, + reading: result.reading, + tags: result.tags, + source: result.original, + sort: result.sort, + depth: result.depth, + match: { + reading: result.match.reading, + writing: result.match.writing, + }, + })); + } + + /** @summary parse sentence into terms with readings */ + public async sentence(sentence: string, optional?: DeepPartial<SearchSentenceProps>): Promise<SearchSentenceResult> { + await this.ready; + + var props: SearchSentenceProps = { + lookahead: optional?.lookahead ?? 15, + priorityMod: { + high: optional?.priorityMod?.high ?? 10, + low: optional?.priorityMod?.low ?? -10, + }, + breaks: optional?.breaks ?? [], + } + + var parseResult: SearchSentenceResult = { + input: sentence, + words: [], + }; + + for (let start = 0; start < sentence.length; start++) { + var lookahead = props.lookahead; // TODO: stop at next delimiter (optimization) + var term = sentence.substring(start, start + lookahead); + var results = (await this.terms(term)).map(term => { + var word = term as SearchWord; + word.start = start; + return word; + }); + + // current starting point did not yield results, try again at next character or until end of input + if (results.length == 0) continue; + + // bias search results by modifying sort value + results = results.map(result => { + // true if last token was a name else false + const lastTokenName = parseResult.words.peek()?.tags.anyOf(Object.values(Tag.Name)); + + // give higher priority to suffixes when last token was a name, else lower priority + if (result.tags.includes(Tag.Class.Suffix)) + result.sort += lastTokenName ? props.priorityMod.high : props.priorityMod.low; + + // give lower priority to terms matched only by their readings, and are + // usually written in kanji + if (!result.tags.includes(Tag.Auxiliary.UsuallyKana) && !result.match.writing) + result.sort += props.priorityMod.low; + + return result; + }); + + results.sort((a, b) => { + // sort by original string length (long to short) + if (a.source.length != b.source.length) return b.source.length - a.source.length; + // then by sort index (high to low) + if (a.sort != b.sort) return b.sort - a.sort; + // then by depth (high to low) + if (a.depth != b.depth) return b.depth - a.depth; + // else keep current order (random) + return 0; + }); + + // pick top result + const result = results[0]; + + parseResult.words.push(result); + start += result.source.length - 1; // -1 because loop already increments start + continue; // extra verbose end of iteration + } + return parseResult; + } +}; + |