diff options
Diffstat (limited to 'language/parser.ts')
-rw-r--r-- | language/parser.ts | 44 |
1 files changed, 30 insertions, 14 deletions
diff --git a/language/parser.ts b/language/parser.ts index 27aa5ee..bc80637 100644 --- a/language/parser.ts +++ b/language/parser.ts @@ -3,7 +3,9 @@ import { ParseResult, InputSentenceProps, ParseDepth } from "./types.ts"; import DB from "../db/db.ts"; import "../util/array.ts"; import "../util/set.ts"; +import { DeepPartial } from "../util/types.ts"; +// TODO: rename Parser to Search /** @summary main Parser class */ export default class Parser { db: DB; @@ -18,28 +20,41 @@ export default class Parser { }); } - async parse(sentence: string, options?: InputSentenceProps): Promise<ParseResult> { + async parse(sentence: string, optional?: DeepPartial<InputSentenceProps>): Promise<ParseResult> { await this.ready; - let parseResult = await this.parseTerms(sentence, options); - if ((options?.depth || ParseDepth.Term) <= ParseDepth.Term) return parseResult; - parseResult = await this.addGlossary(parseResult, options); - if ((options?.depth || ParseDepth.Term) <= ParseDepth.Term) return parseResult; + + // initialize default options + var props: InputSentenceProps = { + lookahead: optional?.lookahead ?? 15, + depth: optional?.depth ?? ParseDepth.Term, + priorityMod: { + high: optional?.priorityMod?.high ?? 10, + low: optional?.priorityMod?.low ?? 0.1, + }, + breaks: optional?.breaks ?? [], + } + + let parseResult = await this.parseTerms(sentence, props); + if (props.depth <= ParseDepth.Term) return parseResult; + + parseResult = await this.addGlossary(parseResult, props); + if (props.depth <= ParseDepth.Term) return parseResult; + return parseResult; } /** @summary parse sentence into terms with readings */ - private async parseTerms(sentence: string, options?: InputSentenceProps): Promise<ParseResult> { - const MAX_LOOKAHEAD = options?.lookahead ?? 15; - const PRIORITY_MOD_HIGHER = options?.priorityMod?.high ?? 10; - const PRIORITY_MOD_LOWER = options?.priorityMod?.low ?? 0.1; - + private async parseTerms(sentence: string, options: InputSentenceProps): Promise<ParseResult> { var parseResult: ParseResult = { tokens: [], depth: ParseDepth.Term, + input: sentence, }; for (let start = 0; start < sentence.length; start++) { - var results = await this.db.findTerm(sentence.substring(start, start + MAX_LOOKAHEAD)); + var lookahead = options.lookahead; + + var results = await this.db.findTerm(sentence.substring(start, start + lookahead)); // current starting point did not yield results, try again at next character or until end of input if (results.length == 0) continue; @@ -75,12 +90,12 @@ export default class Parser { // give higher priority to suffixes when last token was a name, else lower priority if (result.tags.includes(Tag.Class.Suffix)) - result.sort *= lastTokenName ? PRIORITY_MOD_HIGHER : PRIORITY_MOD_LOWER; + result.sort *= lastTokenName ? options.priorityMod.high : options.priorityMod.low; // give lower priority to terms matched only by their readings, and are // usually written in kanji if (!result.tags.includes(Tag.Auxiliary.UsuallyKana) && !result.match.kanji) - result.sort *= PRIORITY_MOD_LOWER; + result.sort *= options.priorityMod.low; return result; }); @@ -105,6 +120,7 @@ export default class Parser { tags: result.tags, term_id: result.id, source: result.original, + start: start, }); start += result.original.length - 1; // -1 because loop already increments start @@ -113,7 +129,7 @@ export default class Parser { return parseResult; } - private async addGlossary(input: ParseResult, options?: InputSentenceProps): Promise<ParseResult> { + private async addGlossary(input: ParseResult, options: InputSentenceProps): Promise<ParseResult> { // TODO: annotate input with glossaries from DB options; // prevent unused warning return input; |