diff options
author | lonkaars <loek@pipeframe.xyz> | 2023-07-01 16:37:50 +0200 |
---|---|---|
committer | lonkaars <loek@pipeframe.xyz> | 2023-07-01 16:37:50 +0200 |
commit | ce9e0788317b25e5d297ed38d9fed0754a341288 (patch) | |
tree | 29563a39c73ded16cd93eb7b5c5664d1ece944ac | |
parent | 8ff39cbe6300ca479584fe7d85ff03a1f65bc9b0 (diff) |
WIP sentence API
-rw-r--r-- | api/sentence-word.ts | 3 | ||||
-rw-r--r-- | api/sentence.ts | 38 | ||||
-rw-r--r-- | api/word.ts | 1 | ||||
-rw-r--r-- | api/yomikun.ts | 5 | ||||
-rw-r--r-- | core/api.ts | 5 | ||||
-rw-r--r-- | core/http/client.ts | 8 | ||||
-rw-r--r-- | core/http/server.ts | 3 | ||||
-rw-r--r-- | core/http/types.ts | 4 | ||||
-rw-r--r-- | core/raw/api.ts | 14 | ||||
-rw-r--r-- | examples/sentence-word-lookup.ts | 1 | ||||
-rw-r--r-- | language/parser.ts | 44 | ||||
-rw-r--r-- | language/types.ts | 16 | ||||
-rw-r--r-- | util/array.ts | 6 | ||||
-rw-r--r-- | util/types.ts | 8 |
14 files changed, 118 insertions, 38 deletions
diff --git a/api/sentence-word.ts b/api/sentence-word.ts index a5bb9ca..6b2ef22 100644 --- a/api/sentence-word.ts +++ b/api/sentence-word.ts @@ -1,10 +1,11 @@ +import { ParseToken } from "../language/types.ts"; import Word from "./word.ts"; export default class SentenceWord extends Word { public indexStart: number = 0; public indexEnd: number = 0; - constructor() { + constructor(source: string | ParseToken) { super(); } } diff --git a/api/sentence.ts b/api/sentence.ts index 6f67844..276a6c5 100644 --- a/api/sentence.ts +++ b/api/sentence.ts @@ -1,16 +1,50 @@ +import { ParseResult } from "../language/types.ts"; import APIBase from "./base.ts"; import SentenceWord from "./sentence-word.ts"; import Word from "./word.ts"; -import Yomikun from "./yomikun.ts"; export default class Sentence extends APIBase { public words: Array<SentenceWord> = []; + protected query?: ParseResult; + protected original: string = ""; - constructor() { + public ready: Promise<void>; + private _resolveReady: () => void = () => {}; + + constructor(input: string) { super(); + this.ready = new Promise(res => this._resolveReady = res); + this.fetch(input); } first(searchValue: RegExp | string): Word | undefined { return this.words[0]; } + + private async fetch(input: string) { + this.original = input; + this.query = await (await this.api)["core"].parseSentence(input); + await this.updateWords(); + this._resolveReady(); + } + + private async updateWords() { + this.words.clear(); + let token = 0; + let i = 0; + while (i < this.original.length) { + this.words.push(new SentenceWord(this.query!.tokens[token]).withParent(await this.api)); + + i += this.query!.tokens[token].source.length; + if (i == this.original.length) break; + token++; + + // continue if there are no unrecognized gaps between tokens + if (this.query!.tokens[token]?.start == i) continue; + var remainder = this.original.substring(i, this.query!.tokens[token]?.start); + + this.words.push(new SentenceWord(remainder).withParent(await this.api)); + i += remainder.length; + } + } } diff --git a/api/word.ts b/api/word.ts index 3e8fce6..63dce10 100644 --- a/api/word.ts +++ b/api/word.ts @@ -1,5 +1,6 @@ import Glossary from "./glossary.ts"; import APIBase from "./base.ts"; +import { ParseToken } from "../language/types.ts"; export default class Word extends APIBase { public writing = "TODO"; diff --git a/api/yomikun.ts b/api/yomikun.ts index 4971567..60d49ba 100644 --- a/api/yomikun.ts +++ b/api/yomikun.ts @@ -1,5 +1,6 @@ import Core from "../core/api.ts"; import RemoteCoreClient from "../core/http/client.ts"; +import { ParseResult } from "../language/types.ts"; import Sentence from "./sentence.ts"; export default class Yomikun { @@ -10,7 +11,9 @@ export default class Yomikun { } async sentence(input: string): Promise<Sentence> { - return new Sentence().withParent(this); + var sentence = new Sentence(input).withParent(this); + await sentence.ready; + return sentence; } } diff --git a/core/api.ts b/core/api.ts index 51f976a..0720c8b 100644 --- a/core/api.ts +++ b/core/api.ts @@ -1,4 +1,5 @@ -import { ParseResult } from "../language/types.ts"; +import { InputSentenceProps, ParseResult } from "../language/types.ts"; +import { DeepPartial } from "../util/types.ts"; /** * @summary Core interface @@ -12,6 +13,6 @@ export default abstract class Core { abstract ready: Promise<void>; /** @summary parse sentence */ - abstract parseSentence(input: string): Promise<ParseResult>; + abstract parseSentence(input: string, options?: DeepPartial<InputSentenceProps>): Promise<ParseResult>; }; diff --git a/core/http/client.ts b/core/http/client.ts index 118e8f5..6b4e1a3 100644 --- a/core/http/client.ts +++ b/core/http/client.ts @@ -1,4 +1,6 @@ +import { InputSentenceProps } from "../../language/types.ts"; import "../../util/array.ts"; +import { DeepPartial } from "../../util/types.ts"; import Core from "../api.ts"; import { ConnectionProps, ConnectionPropsDefault } from "./props.ts"; @@ -30,12 +32,10 @@ export default class RemoteCoreClient implements Core { return response.json(); } - async parseSentence(input: string) { + async parseSentence(input: string, options?: DeepPartial<InputSentenceProps>) { var request: CoreRequestParseSentence = { command: "parseSentence", - options: { - input: input, - }, + options: { input, options, }, }; var { response } = await this.request(request) as CoreResponseParseSentence; return response; diff --git a/core/http/server.ts b/core/http/server.ts index 0a9a082..7781a22 100644 --- a/core/http/server.ts +++ b/core/http/server.ts @@ -13,10 +13,11 @@ export default class RemoteCoreServer extends RawCore { parseSentence: async _req => { var req = _req as CoreRequestParseSentence; var input = req.options?.input + var options = req.options?.options; if (!input) return new Response("", { status: 404 }); return new Response(JSON.stringify({ command: "parseSentence", - response: await this.parseSentence(input), + response: await this.parseSentence(input, options), } as CoreResponseParseSentence)); }, }; diff --git a/core/http/types.ts b/core/http/types.ts index af2cfea..3d55a98 100644 --- a/core/http/types.ts +++ b/core/http/types.ts @@ -1,4 +1,5 @@ -import { ParseResult } from "../../language/types.ts"; +import { InputSentenceProps, ParseResult } from "../../language/types.ts"; +import { DeepPartial } from "../../util/types.ts"; export interface CoreRequest { command: string; @@ -9,6 +10,7 @@ export interface CoreRequestParseSentence extends CoreRequest { command: "parseSentence"; options: { input: string; + options?: DeepPartial<InputSentenceProps>; }; }; diff --git a/core/raw/api.ts b/core/raw/api.ts index f47dead..593b932 100644 --- a/core/raw/api.ts +++ b/core/raw/api.ts @@ -1,27 +1,29 @@ import Core from "../api.ts"; import Parser from "../../language/parser.ts"; import YomikunError from "../../util/error.ts"; +import { DeepPartial } from "../../util/types.ts"; +import { InputSentenceProps } from "../../language/types.ts"; /** @summary internal Core (DO NOT USE DIRECTLY) */ export default class RawCore implements Core { - private _parser: Parser; - ready: Promise<void>; + private parser: Parser; + public ready: Promise<void>; constructor() { if (this.constructor === RawCore) { throw new YomikunError("RawCore instantiated! Use DirectCoreClient instead!"); } - this._parser = new Parser(); + this.parser = new Parser(); this.ready = new Promise(async resolve => { - await this._parser.ready; + await this.parser.ready; resolve(); }) } - async parseSentence(input: string) { - return await this._parser.parse(input); + async parseSentence(input: string, options?: DeepPartial<InputSentenceProps>) { + return await this.parser.parse(input, options); } }; diff --git a/examples/sentence-word-lookup.ts b/examples/sentence-word-lookup.ts index 6fd57bd..d60ffbf 100644 --- a/examples/sentence-word-lookup.ts +++ b/examples/sentence-word-lookup.ts @@ -17,6 +17,7 @@ var sentence = await api.sentence("この紅茶は甘すぎる"); var word = sentence.words.find(w => w.writing == "紅茶"); // filter terms by writing (matches first only) // var word = sentence.first("紅茶"); // reference substring (matches first only) // var word = sentence.words[1]; // reference word index (depends on correct deconjugations/parsing) +console.log(word); // Fetch definitions for word var glossary = await word?.glossary(); diff --git a/language/parser.ts b/language/parser.ts index 27aa5ee..bc80637 100644 --- a/language/parser.ts +++ b/language/parser.ts @@ -3,7 +3,9 @@ import { ParseResult, InputSentenceProps, ParseDepth } from "./types.ts"; import DB from "../db/db.ts"; import "../util/array.ts"; import "../util/set.ts"; +import { DeepPartial } from "../util/types.ts"; +// TODO: rename Parser to Search /** @summary main Parser class */ export default class Parser { db: DB; @@ -18,28 +20,41 @@ export default class Parser { }); } - async parse(sentence: string, options?: InputSentenceProps): Promise<ParseResult> { + async parse(sentence: string, optional?: DeepPartial<InputSentenceProps>): Promise<ParseResult> { await this.ready; - let parseResult = await this.parseTerms(sentence, options); - if ((options?.depth || ParseDepth.Term) <= ParseDepth.Term) return parseResult; - parseResult = await this.addGlossary(parseResult, options); - if ((options?.depth || ParseDepth.Term) <= ParseDepth.Term) return parseResult; + + // initialize default options + var props: InputSentenceProps = { + lookahead: optional?.lookahead ?? 15, + depth: optional?.depth ?? ParseDepth.Term, + priorityMod: { + high: optional?.priorityMod?.high ?? 10, + low: optional?.priorityMod?.low ?? 0.1, + }, + breaks: optional?.breaks ?? [], + } + + let parseResult = await this.parseTerms(sentence, props); + if (props.depth <= ParseDepth.Term) return parseResult; + + parseResult = await this.addGlossary(parseResult, props); + if (props.depth <= ParseDepth.Term) return parseResult; + return parseResult; } /** @summary parse sentence into terms with readings */ - private async parseTerms(sentence: string, options?: InputSentenceProps): Promise<ParseResult> { - const MAX_LOOKAHEAD = options?.lookahead ?? 15; - const PRIORITY_MOD_HIGHER = options?.priorityMod?.high ?? 10; - const PRIORITY_MOD_LOWER = options?.priorityMod?.low ?? 0.1; - + private async parseTerms(sentence: string, options: InputSentenceProps): Promise<ParseResult> { var parseResult: ParseResult = { tokens: [], depth: ParseDepth.Term, + input: sentence, }; for (let start = 0; start < sentence.length; start++) { - var results = await this.db.findTerm(sentence.substring(start, start + MAX_LOOKAHEAD)); + var lookahead = options.lookahead; + + var results = await this.db.findTerm(sentence.substring(start, start + lookahead)); // current starting point did not yield results, try again at next character or until end of input if (results.length == 0) continue; @@ -75,12 +90,12 @@ export default class Parser { // give higher priority to suffixes when last token was a name, else lower priority if (result.tags.includes(Tag.Class.Suffix)) - result.sort *= lastTokenName ? PRIORITY_MOD_HIGHER : PRIORITY_MOD_LOWER; + result.sort *= lastTokenName ? options.priorityMod.high : options.priorityMod.low; // give lower priority to terms matched only by their readings, and are // usually written in kanji if (!result.tags.includes(Tag.Auxiliary.UsuallyKana) && !result.match.kanji) - result.sort *= PRIORITY_MOD_LOWER; + result.sort *= options.priorityMod.low; return result; }); @@ -105,6 +120,7 @@ export default class Parser { tags: result.tags, term_id: result.id, source: result.original, + start: start, }); start += result.original.length - 1; // -1 because loop already increments start @@ -113,7 +129,7 @@ export default class Parser { return parseResult; } - private async addGlossary(input: ParseResult, options?: InputSentenceProps): Promise<ParseResult> { + private async addGlossary(input: ParseResult, options: InputSentenceProps): Promise<ParseResult> { // TODO: annotate input with glossaries from DB options; // prevent unused warning return input; diff --git a/language/types.ts b/language/types.ts index 40ea4ba..1aeb9da 100644 --- a/language/types.ts +++ b/language/types.ts @@ -25,25 +25,29 @@ export interface ParseToken { glossary?: Glossary; term_id: number; source: string; + start: number; }; export interface ParseResult { depth: ParseDepth; - tokens: ParseToken[] + tokens: ParseToken[]; + input: string; }; /** @summary option struct for Parser */ export interface InputSentenceProps { /** @prop max amount of characters to look ahead when attempting to deconjugate */ - lookahead?: number; + lookahead: number; /** @prop amount of detail to return in search results */ - depth?: ParseDepth; + depth: ParseDepth; /** @prop search bias multipliers */ - priorityMod?: { + priorityMod: { /** @prop multiplier for negative bias */ - low?: number; + low: number; /** @prop multiplier for positive bias */ - high?: number; + high: number; }; + /** @prop list of breaks treated as delimiter */ + breaks: Array<number>; }; diff --git a/util/array.ts b/util/array.ts index 5b8c512..f032935 100644 --- a/util/array.ts +++ b/util/array.ts @@ -6,6 +6,8 @@ declare global { peek(): T; /** @summary create Set from this array */ set(): Set<T>; + /** @summary clear array */ + clear(): void; } } @@ -21,3 +23,7 @@ Array.prototype.set = function() { return new Set(this); } +Array.prototype.clear = function() { + while (this.length > 0) this.pop(); +} + diff --git a/util/types.ts b/util/types.ts new file mode 100644 index 0000000..f03a242 --- /dev/null +++ b/util/types.ts @@ -0,0 +1,8 @@ +/** + * @type DeepPartial<T> makes all properties of type T optional, but keep + * array types intact + */ +export type DeepPartial<T> = { + [K in keyof T]?: T[K] extends Array<infer U> ? Array<U> : DeepPartial<T[K]>; +}; + |