diff options
Diffstat (limited to 'language/parser.ts')
-rw-r--r-- | language/parser.ts | 144 |
1 files changed, 0 insertions, 144 deletions
diff --git a/language/parser.ts b/language/parser.ts deleted file mode 100644 index 7fd3981..0000000 --- a/language/parser.ts +++ /dev/null @@ -1,144 +0,0 @@ -import { Tag, TagGroup } from "./tags.ts"; -import { ParseResult, InputSentenceProps, ParseDepth } from "./types.ts"; -import DB from "../db/db.ts"; -import "../util/array.ts"; -import "../util/set.ts"; -import { DeepPartial } from "../util/types.ts"; - -// TODO: rename Parser to Search -/** @summary main Parser class */ -export default class Parser { - db: DB; - ready: Promise<void>; - - constructor() { - this.db = new DB(); - - this.ready = new Promise<void>(async resolve => { - await this.db.ready; - resolve(); - }); - } - - // Search.sentence() - async parse(sentence: string, optional?: DeepPartial<InputSentenceProps>): Promise<ParseResult> { - await this.ready; - - // initialize default options - var props: InputSentenceProps = { - lookahead: optional?.lookahead ?? 15, - depth: optional?.depth ?? ParseDepth.Term, - priorityMod: { - high: optional?.priorityMod?.high ?? 10, - low: optional?.priorityMod?.low ?? -10, - }, - breaks: optional?.breaks ?? [], - } - - let parseResult = await this.parseTerms(sentence, props); - if (props.depth <= ParseDepth.Term) return parseResult; - - parseResult = await this.addGlossary(parseResult, props); - if (props.depth <= ParseDepth.Term) return parseResult; - - return parseResult; - } - - /** @summary parse sentence into terms with readings */ - private async parseTerms(sentence: string, options: InputSentenceProps): Promise<ParseResult> { - var parseResult: ParseResult = { - tokens: [], - depth: ParseDepth.Term, - input: sentence, - }; - - for (let start = 0; start < sentence.length; start++) { - var lookahead = options.lookahead; - - var results = await this.db.findTerm(sentence.substring(start, start + lookahead)); - // current starting point did not yield results, try again at next character or until end of input - if (results.length == 0) continue; - - results = results.filter(result => { - // ignore ignored by user terms - if (result.sort < 0) return false; - - // deconjugated words - if (result.depth > 0) { - // check if this word can be conjugated at all - if (!result.tags.anyOf(TagGroup.Conjugable as string[])) return false; - - // ignore other wrong deconjugations - if (result.tags.includes(Tag.Class.Verb.U) && - !result.tags.includes(Tag.Inflection.Reason.U)) return false; - if (result.tags.includes(Tag.Class.Verb.Ru) && - !result.tags.includes(Tag.Inflection.Reason.Ru)) return false; - if (result.tags.includes(Tag.Class.Verb.Suru) && - !result.tags.includes(Tag.Inflection.Reason.Suru)) return false; - if (result.tags.includes(Tag.Class.Adjective.I) && - !result.tags.includes(Tag.Inflection.Reason.Adjective.I)) return false; - if (result.tags.includes(Tag.Class.Adjective.Na) && - !result.tags.includes(Tag.Inflection.Reason.Adjective.Na)) return false; - } - - // all other results should be valid grammatically - return true; - }); - - // no valid results left after filter, try again at next character or until end of input - if (results.length == 0) continue; - - // bias search results by modifying sort value - results = results.map(result => { - // true if last token was a name else false - const lastTokenName = parseResult.tokens.peek()?.tags.anyOf(Object.values(Tag.Name)); - - // give higher priority to suffixes when last token was a name, else lower priority - if (result.tags.includes(Tag.Class.Suffix)) - result.sort += lastTokenName ? options.priorityMod.high : options.priorityMod.low; - - // give lower priority to terms matched only by their readings, and are - // usually written in kanji - if (!result.tags.includes(Tag.Auxiliary.UsuallyKana) && !result.match.kanji) - result.sort += options.priorityMod.low; - - return result; - }); - - - results.sort((a, b) => { - // sort by original string length (long to short) - if (a.original.length != b.original.length) return b.original.length - a.original.length; - // then by sort index (high to low) - if (a.sort != b.sort) return b.sort - a.sort; - // then by depth (high to low) - if (a.depth != b.depth) return b.depth - a.depth; - // else keep current order (random) - return 0; - }); - - // pick top result - const result = results[0]; - - parseResult.tokens.push({ - writing: result.expression, - reading: result.reading, - tags: result.tags, - term_id: result.id, - source: result.original, - start: start, - }); - - start += result.original.length - 1; // -1 because loop already increments start - continue; // extra verbose end of iteration - } - return parseResult; - } - - private async addGlossary(input: ParseResult, options: InputSentenceProps): Promise<ParseResult> { - // TODO: annotate input with glossaries from DB - options; // prevent unused warning - return input; - } -}; - |