aboutsummaryrefslogtreecommitdiff
path: root/language/parser.ts
diff options
context:
space:
mode:
Diffstat (limited to 'language/parser.ts')
-rw-r--r--language/parser.ts144
1 files changed, 0 insertions, 144 deletions
diff --git a/language/parser.ts b/language/parser.ts
deleted file mode 100644
index 7fd3981..0000000
--- a/language/parser.ts
+++ /dev/null
@@ -1,144 +0,0 @@
-import { Tag, TagGroup } from "./tags.ts";
-import { ParseResult, InputSentenceProps, ParseDepth } from "./types.ts";
-import DB from "../db/db.ts";
-import "../util/array.ts";
-import "../util/set.ts";
-import { DeepPartial } from "../util/types.ts";
-
-// TODO: rename Parser to Search
-/** @summary main Parser class */
-export default class Parser {
- db: DB;
- ready: Promise<void>;
-
- constructor() {
- this.db = new DB();
-
- this.ready = new Promise<void>(async resolve => {
- await this.db.ready;
- resolve();
- });
- }
-
- // Search.sentence()
- async parse(sentence: string, optional?: DeepPartial<InputSentenceProps>): Promise<ParseResult> {
- await this.ready;
-
- // initialize default options
- var props: InputSentenceProps = {
- lookahead: optional?.lookahead ?? 15,
- depth: optional?.depth ?? ParseDepth.Term,
- priorityMod: {
- high: optional?.priorityMod?.high ?? 10,
- low: optional?.priorityMod?.low ?? -10,
- },
- breaks: optional?.breaks ?? [],
- }
-
- let parseResult = await this.parseTerms(sentence, props);
- if (props.depth <= ParseDepth.Term) return parseResult;
-
- parseResult = await this.addGlossary(parseResult, props);
- if (props.depth <= ParseDepth.Term) return parseResult;
-
- return parseResult;
- }
-
- /** @summary parse sentence into terms with readings */
- private async parseTerms(sentence: string, options: InputSentenceProps): Promise<ParseResult> {
- var parseResult: ParseResult = {
- tokens: [],
- depth: ParseDepth.Term,
- input: sentence,
- };
-
- for (let start = 0; start < sentence.length; start++) {
- var lookahead = options.lookahead;
-
- var results = await this.db.findTerm(sentence.substring(start, start + lookahead));
- // current starting point did not yield results, try again at next character or until end of input
- if (results.length == 0) continue;
-
- results = results.filter(result => {
- // ignore ignored by user terms
- if (result.sort < 0) return false;
-
- // deconjugated words
- if (result.depth > 0) {
- // check if this word can be conjugated at all
- if (!result.tags.anyOf(TagGroup.Conjugable as string[])) return false;
-
- // ignore other wrong deconjugations
- if (result.tags.includes(Tag.Class.Verb.U) &&
- !result.tags.includes(Tag.Inflection.Reason.U)) return false;
- if (result.tags.includes(Tag.Class.Verb.Ru) &&
- !result.tags.includes(Tag.Inflection.Reason.Ru)) return false;
- if (result.tags.includes(Tag.Class.Verb.Suru) &&
- !result.tags.includes(Tag.Inflection.Reason.Suru)) return false;
- if (result.tags.includes(Tag.Class.Adjective.I) &&
- !result.tags.includes(Tag.Inflection.Reason.Adjective.I)) return false;
- if (result.tags.includes(Tag.Class.Adjective.Na) &&
- !result.tags.includes(Tag.Inflection.Reason.Adjective.Na)) return false;
- }
-
- // all other results should be valid grammatically
- return true;
- });
-
- // no valid results left after filter, try again at next character or until end of input
- if (results.length == 0) continue;
-
- // bias search results by modifying sort value
- results = results.map(result => {
- // true if last token was a name else false
- const lastTokenName = parseResult.tokens.peek()?.tags.anyOf(Object.values(Tag.Name));
-
- // give higher priority to suffixes when last token was a name, else lower priority
- if (result.tags.includes(Tag.Class.Suffix))
- result.sort += lastTokenName ? options.priorityMod.high : options.priorityMod.low;
-
- // give lower priority to terms matched only by their readings, and are
- // usually written in kanji
- if (!result.tags.includes(Tag.Auxiliary.UsuallyKana) && !result.match.kanji)
- result.sort += options.priorityMod.low;
-
- return result;
- });
-
-
- results.sort((a, b) => {
- // sort by original string length (long to short)
- if (a.original.length != b.original.length) return b.original.length - a.original.length;
- // then by sort index (high to low)
- if (a.sort != b.sort) return b.sort - a.sort;
- // then by depth (high to low)
- if (a.depth != b.depth) return b.depth - a.depth;
- // else keep current order (random)
- return 0;
- });
-
- // pick top result
- const result = results[0];
-
- parseResult.tokens.push({
- writing: result.expression,
- reading: result.reading,
- tags: result.tags,
- term_id: result.id,
- source: result.original,
- start: start,
- });
-
- start += result.original.length - 1; // -1 because loop already increments start
- continue; // extra verbose end of iteration
- }
- return parseResult;
- }
-
- private async addGlossary(input: ParseResult, options: InputSentenceProps): Promise<ParseResult> {
- // TODO: annotate input with glossaries from DB
- options; // prevent unused warning
- return input;
- }
-};
-