aboutsummaryrefslogtreecommitdiff
path: root/search/search.ts
diff options
context:
space:
mode:
Diffstat (limited to 'search/search.ts')
-rw-r--r--search/search.ts141
1 files changed, 141 insertions, 0 deletions
diff --git a/search/search.ts b/search/search.ts
new file mode 100644
index 0000000..0a50773
--- /dev/null
+++ b/search/search.ts
@@ -0,0 +1,141 @@
+import { Tag, TagGroup } from "./tags.ts";
+import { SearchSentenceProps, SearchSentenceResult, SearchTermResult, SearchWord } from "./types.ts";
+import DB from "../db/db.ts";
+import "../util/array.ts";
+import "../util/set.ts";
+import { DeepPartial } from "../util/types.ts";
+
+/** @summary main Search class */
+export default class Search {
+ db: DB;
+ ready: Promise<void>;
+
+ constructor() {
+ this.db = new DB();
+
+ this.ready = new Promise<void>(async resolve => {
+ await this.db.ready;
+ resolve();
+ });
+ }
+
+ /** @summary find possible terms at start of string by deconjugating */
+ public async terms(term: string): Promise<Array<SearchTermResult>> {
+ await this.ready;
+
+ var results = await this.db.findTerm(term);
+
+ // skip filtering valid results if there are none
+ if (results.length == 0) return [];
+
+ // filter invalid deconjugations/results
+ results = results.filter(result => {
+ // ignore ignored by user terms
+ if (result.sort < 0) return false;
+
+ // deconjugated words
+ if (result.depth > 0) {
+ // check if this word can be conjugated at all
+ if (!result.tags.anyOf(TagGroup.Conjugable as string[])) return false;
+
+ // ignore other wrong deconjugations
+ if (result.tags.includes(Tag.Class.Verb.U) &&
+ !result.tags.includes(Tag.Inflection.Reason.U)) return false;
+ if (result.tags.includes(Tag.Class.Verb.Ru) &&
+ !result.tags.includes(Tag.Inflection.Reason.Ru)) return false;
+ if (result.tags.includes(Tag.Class.Verb.Suru) &&
+ !result.tags.includes(Tag.Inflection.Reason.Suru)) return false;
+ if (result.tags.includes(Tag.Class.Adjective.I) &&
+ !result.tags.includes(Tag.Inflection.Reason.Adjective.I)) return false;
+ if (result.tags.includes(Tag.Class.Adjective.Na) &&
+ !result.tags.includes(Tag.Inflection.Reason.Adjective.Na)) return false;
+ }
+
+ // all other results should be valid
+ return true;
+ });
+
+ return results.map(result => ({
+ id: result.id,
+ writing: result.expression,
+ reading: result.reading,
+ tags: result.tags,
+ source: result.original,
+ sort: result.sort,
+ depth: result.depth,
+ match: {
+ reading: result.match.reading,
+ writing: result.match.writing,
+ },
+ }));
+ }
+
+ /** @summary parse sentence into terms with readings */
+ public async sentence(sentence: string, optional?: DeepPartial<SearchSentenceProps>): Promise<SearchSentenceResult> {
+ await this.ready;
+
+ var props: SearchSentenceProps = {
+ lookahead: optional?.lookahead ?? 15,
+ priorityMod: {
+ high: optional?.priorityMod?.high ?? 10,
+ low: optional?.priorityMod?.low ?? -10,
+ },
+ breaks: optional?.breaks ?? [],
+ }
+
+ var parseResult: SearchSentenceResult = {
+ input: sentence,
+ words: [],
+ };
+
+ for (let start = 0; start < sentence.length; start++) {
+ var lookahead = props.lookahead; // TODO: stop at next delimiter (optimization)
+ var term = sentence.substring(start, start + lookahead);
+ var results = (await this.terms(term)).map(term => {
+ var word = term as SearchWord;
+ word.start = start;
+ return word;
+ });
+
+ // current starting point did not yield results, try again at next character or until end of input
+ if (results.length == 0) continue;
+
+ // bias search results by modifying sort value
+ results = results.map(result => {
+ // true if last token was a name else false
+ const lastTokenName = parseResult.words.peek()?.tags.anyOf(Object.values(Tag.Name));
+
+ // give higher priority to suffixes when last token was a name, else lower priority
+ if (result.tags.includes(Tag.Class.Suffix))
+ result.sort += lastTokenName ? props.priorityMod.high : props.priorityMod.low;
+
+ // give lower priority to terms matched only by their readings, and are
+ // usually written in kanji
+ if (!result.tags.includes(Tag.Auxiliary.UsuallyKana) && !result.match.writing)
+ result.sort += props.priorityMod.low;
+
+ return result;
+ });
+
+ results.sort((a, b) => {
+ // sort by original string length (long to short)
+ if (a.source.length != b.source.length) return b.source.length - a.source.length;
+ // then by sort index (high to low)
+ if (a.sort != b.sort) return b.sort - a.sort;
+ // then by depth (high to low)
+ if (a.depth != b.depth) return b.depth - a.depth;
+ // else keep current order (random)
+ return 0;
+ });
+
+ // pick top result
+ const result = results[0];
+
+ parseResult.words.push(result);
+ start += result.source.length - 1; // -1 because loop already increments start
+ continue; // extra verbose end of iteration
+ }
+ return parseResult;
+ }
+};
+