1 files changed, 141 insertions, 0 deletions
diff --git a/search/search.ts b/search/search.ts
new file mode 100644
index 0000000..0a50773
--- /dev/null
+++ b/search/search.ts
@@ -0,0 +1,141 @@
+import { Tag, TagGroup } from "./tags.ts";
+import { SearchSentenceProps, SearchSentenceResult, SearchTermResult, SearchWord } from "./types.ts";
+import DB from "../db/db.ts";
+import "../util/array.ts";
+import "../util/set.ts";
+import { DeepPartial } from "../util/types.ts";
+
+/** @summary main Search class */
+export default class Search {
+	db: DB;
+	ready: Promise<void>;
+
+	constructor() {
+		this.db = new DB();
+
+		this.ready = new Promise<void>(async resolve => {
+			await this.db.ready;
+			resolve();
+		});
+	}
+
+  /** @summary find possible terms at start of string by deconjugating */
+  public async terms(term: string): Promise<Array<SearchTermResult>> {
+    await this.ready;
+
+    var results = await this.db.findTerm(term);
+
+    // skip filtering valid results if there are none
+    if (results.length == 0) return [];
+
+    // filter invalid deconjugations/results
+    results = results.filter(result => {
+      // ignore ignored by user terms
+      if (result.sort < 0) return false;
+
+      // deconjugated words
+      if (result.depth > 0) {
+        // check if this word can be conjugated at all
+        if (!result.tags.anyOf(TagGroup.Conjugable as string[])) return false;
+
+        // ignore other wrong deconjugations
+        if (result.tags.includes(Tag.Class.Verb.U) &&
+            !result.tags.includes(Tag.Inflection.Reason.U)) return false;
+        if (result.tags.includes(Tag.Class.Verb.Ru) &&
+            !result.tags.includes(Tag.Inflection.Reason.Ru)) return false;
+        if (result.tags.includes(Tag.Class.Verb.Suru) &&
+            !result.tags.includes(Tag.Inflection.Reason.Suru)) return false;
+        if (result.tags.includes(Tag.Class.Adjective.I) &&
+            !result.tags.includes(Tag.Inflection.Reason.Adjective.I)) return false;
+        if (result.tags.includes(Tag.Class.Adjective.Na) &&
+            !result.tags.includes(Tag.Inflection.Reason.Adjective.Na)) return false;
+      }
+
+      // all other results should be valid
+      return true;
+    });
+
+    return results.map(result => ({
+      id: result.id,
+      writing: result.expression,
+      reading: result.reading,
+      tags: result.tags,
+      source: result.original,
+      sort: result.sort,
+      depth: result.depth,
+      match: {
+        reading: result.match.reading,
+        writing: result.match.writing,
+      },
+    }));
+  }
+
+	/** @summary parse sentence into terms with readings */
+	public async sentence(sentence: string, optional?: DeepPartial<SearchSentenceProps>): Promise<SearchSentenceResult> {
+		await this.ready;
+
+		var props: SearchSentenceProps = {
+			lookahead: optional?.lookahead ?? 15,
+			priorityMod: {
+				high: optional?.priorityMod?.high ?? 10,
+				low: optional?.priorityMod?.low ?? -10,
+			},
+			breaks: optional?.breaks ?? [],
+		}
+
+		var parseResult: SearchSentenceResult = {
+			input: sentence,
+			words: [],
+		};
+
+		for (let start = 0; start < sentence.length; start++) {
+			var lookahead = props.lookahead; // TODO: stop at next delimiter (optimization)
+      var term = sentence.substring(start, start + lookahead);
+      var results = (await this.terms(term)).map(term => {
+        var word = term as SearchWord;
+        word.start = start;
+        return word;
+      });
+
+			// current starting point did not yield results, try again at next character or until end of input
+			if (results.length == 0) continue;
+	
+			// bias search results by modifying sort value
+			results = results.map(result => {
+				// true if last token was a name else false
+				const lastTokenName = parseResult.words.peek()?.tags.anyOf(Object.values(Tag.Name));
+
+				// give higher priority to suffixes when last token was a name, else lower priority
+				if (result.tags.includes(Tag.Class.Suffix))
+					result.sort += lastTokenName ? props.priorityMod.high : props.priorityMod.low;
+
+				// give lower priority to terms matched only by their readings, and are
+				// usually written in kanji
+				if (!result.tags.includes(Tag.Auxiliary.UsuallyKana) && !result.match.writing)
+					result.sort += props.priorityMod.low;
+
+				return result;
+			});
+
+			results.sort((a, b) => {
+				// sort by original string length (long to short)
+				if (a.source.length != b.source.length) return b.source.length - a.source.length;
+				// then by sort index (high to low)
+				if (a.sort != b.sort) return b.sort - a.sort;
+				// then by depth (high to low)
+				if (a.depth != b.depth) return b.depth - a.depth;
+				// else keep current order (random)
+				return 0;
+			});
+
+			// pick top result
+			const result = results[0];
+
+			parseResult.words.push(result);
+			start += result.source.length - 1; // -1 because loop already increments start
+			continue; // extra verbose end of iteration
+		}
+		return parseResult;
+	}
+};
+