aboutsummaryrefslogtreecommitdiff
path: root/language/parser.ts
diff options
context:
space:
mode:
Diffstat (limited to 'language/parser.ts')
-rw-r--r--language/parser.ts44
1 files changed, 30 insertions, 14 deletions
diff --git a/language/parser.ts b/language/parser.ts
index 27aa5ee..bc80637 100644
--- a/language/parser.ts
+++ b/language/parser.ts
@@ -3,7 +3,9 @@ import { ParseResult, InputSentenceProps, ParseDepth } from "./types.ts";
import DB from "../db/db.ts";
import "../util/array.ts";
import "../util/set.ts";
+import { DeepPartial } from "../util/types.ts";
+// TODO: rename Parser to Search
/** @summary main Parser class */
export default class Parser {
db: DB;
@@ -18,28 +20,41 @@ export default class Parser {
});
}
- async parse(sentence: string, options?: InputSentenceProps): Promise<ParseResult> {
+ async parse(sentence: string, optional?: DeepPartial<InputSentenceProps>): Promise<ParseResult> {
await this.ready;
- let parseResult = await this.parseTerms(sentence, options);
- if ((options?.depth || ParseDepth.Term) <= ParseDepth.Term) return parseResult;
- parseResult = await this.addGlossary(parseResult, options);
- if ((options?.depth || ParseDepth.Term) <= ParseDepth.Term) return parseResult;
+
+ // initialize default options
+ var props: InputSentenceProps = {
+ lookahead: optional?.lookahead ?? 15,
+ depth: optional?.depth ?? ParseDepth.Term,
+ priorityMod: {
+ high: optional?.priorityMod?.high ?? 10,
+ low: optional?.priorityMod?.low ?? 0.1,
+ },
+ breaks: optional?.breaks ?? [],
+ }
+
+ let parseResult = await this.parseTerms(sentence, props);
+ if (props.depth <= ParseDepth.Term) return parseResult;
+
+ parseResult = await this.addGlossary(parseResult, props);
+ if (props.depth <= ParseDepth.Term) return parseResult;
+
return parseResult;
}
/** @summary parse sentence into terms with readings */
- private async parseTerms(sentence: string, options?: InputSentenceProps): Promise<ParseResult> {
- const MAX_LOOKAHEAD = options?.lookahead ?? 15;
- const PRIORITY_MOD_HIGHER = options?.priorityMod?.high ?? 10;
- const PRIORITY_MOD_LOWER = options?.priorityMod?.low ?? 0.1;
-
+ private async parseTerms(sentence: string, options: InputSentenceProps): Promise<ParseResult> {
var parseResult: ParseResult = {
tokens: [],
depth: ParseDepth.Term,
+ input: sentence,
};
for (let start = 0; start < sentence.length; start++) {
- var results = await this.db.findTerm(sentence.substring(start, start + MAX_LOOKAHEAD));
+ var lookahead = options.lookahead;
+
+ var results = await this.db.findTerm(sentence.substring(start, start + lookahead));
// current starting point did not yield results, try again at next character or until end of input
if (results.length == 0) continue;
@@ -75,12 +90,12 @@ export default class Parser {
// give higher priority to suffixes when last token was a name, else lower priority
if (result.tags.includes(Tag.Class.Suffix))
- result.sort *= lastTokenName ? PRIORITY_MOD_HIGHER : PRIORITY_MOD_LOWER;
+ result.sort *= lastTokenName ? options.priorityMod.high : options.priorityMod.low;
// give lower priority to terms matched only by their readings, and are
// usually written in kanji
if (!result.tags.includes(Tag.Auxiliary.UsuallyKana) && !result.match.kanji)
- result.sort *= PRIORITY_MOD_LOWER;
+ result.sort *= options.priorityMod.low;
return result;
});
@@ -105,6 +120,7 @@ export default class Parser {
tags: result.tags,
term_id: result.id,
source: result.original,
+ start: start,
});
start += result.original.length - 1; // -1 because loop already increments start
@@ -113,7 +129,7 @@ export default class Parser {
return parseResult;
}
- private async addGlossary(input: ParseResult, options?: InputSentenceProps): Promise<ParseResult> {
+ private async addGlossary(input: ParseResult, options: InputSentenceProps): Promise<ParseResult> {
// TODO: annotate input with glossaries from DB
options; // prevent unused warning
return input;