aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorlonkaars <loek@pipeframe.xyz>2023-07-01 16:37:50 +0200
committerlonkaars <loek@pipeframe.xyz>2023-07-01 16:37:50 +0200
commitce9e0788317b25e5d297ed38d9fed0754a341288 (patch)
tree29563a39c73ded16cd93eb7b5c5664d1ece944ac
parent8ff39cbe6300ca479584fe7d85ff03a1f65bc9b0 (diff)
WIP sentence API
-rw-r--r--api/sentence-word.ts3
-rw-r--r--api/sentence.ts38
-rw-r--r--api/word.ts1
-rw-r--r--api/yomikun.ts5
-rw-r--r--core/api.ts5
-rw-r--r--core/http/client.ts8
-rw-r--r--core/http/server.ts3
-rw-r--r--core/http/types.ts4
-rw-r--r--core/raw/api.ts14
-rw-r--r--examples/sentence-word-lookup.ts1
-rw-r--r--language/parser.ts44
-rw-r--r--language/types.ts16
-rw-r--r--util/array.ts6
-rw-r--r--util/types.ts8
14 files changed, 118 insertions, 38 deletions
diff --git a/api/sentence-word.ts b/api/sentence-word.ts
index a5bb9ca..6b2ef22 100644
--- a/api/sentence-word.ts
+++ b/api/sentence-word.ts
@@ -1,10 +1,11 @@
+import { ParseToken } from "../language/types.ts";
import Word from "./word.ts";
export default class SentenceWord extends Word {
public indexStart: number = 0;
public indexEnd: number = 0;
- constructor() {
+ constructor(source: string | ParseToken) {
super();
}
}
diff --git a/api/sentence.ts b/api/sentence.ts
index 6f67844..276a6c5 100644
--- a/api/sentence.ts
+++ b/api/sentence.ts
@@ -1,16 +1,50 @@
+import { ParseResult } from "../language/types.ts";
import APIBase from "./base.ts";
import SentenceWord from "./sentence-word.ts";
import Word from "./word.ts";
-import Yomikun from "./yomikun.ts";
export default class Sentence extends APIBase {
public words: Array<SentenceWord> = [];
+ protected query?: ParseResult;
+ protected original: string = "";
- constructor() {
+ public ready: Promise<void>;
+ private _resolveReady: () => void = () => {};
+
+ constructor(input: string) {
super();
+ this.ready = new Promise(res => this._resolveReady = res);
+ this.fetch(input);
}
first(searchValue: RegExp | string): Word | undefined {
return this.words[0];
}
+
+ private async fetch(input: string) {
+ this.original = input;
+ this.query = await (await this.api)["core"].parseSentence(input);
+ await this.updateWords();
+ this._resolveReady();
+ }
+
+ private async updateWords() {
+ this.words.clear();
+ let token = 0;
+ let i = 0;
+ while (i < this.original.length) {
+ this.words.push(new SentenceWord(this.query!.tokens[token]).withParent(await this.api));
+
+ i += this.query!.tokens[token].source.length;
+ if (i == this.original.length) break;
+ token++;
+
+ // continue if there are no unrecognized gaps between tokens
+ if (this.query!.tokens[token]?.start == i) continue;
+ var remainder = this.original.substring(i, this.query!.tokens[token]?.start);
+
+ this.words.push(new SentenceWord(remainder).withParent(await this.api));
+ i += remainder.length;
+ }
+ }
}
diff --git a/api/word.ts b/api/word.ts
index 3e8fce6..63dce10 100644
--- a/api/word.ts
+++ b/api/word.ts
@@ -1,5 +1,6 @@
import Glossary from "./glossary.ts";
import APIBase from "./base.ts";
+import { ParseToken } from "../language/types.ts";
export default class Word extends APIBase {
public writing = "TODO";
diff --git a/api/yomikun.ts b/api/yomikun.ts
index 4971567..60d49ba 100644
--- a/api/yomikun.ts
+++ b/api/yomikun.ts
@@ -1,5 +1,6 @@
import Core from "../core/api.ts";
import RemoteCoreClient from "../core/http/client.ts";
+import { ParseResult } from "../language/types.ts";
import Sentence from "./sentence.ts";
export default class Yomikun {
@@ -10,7 +11,9 @@ export default class Yomikun {
}
async sentence(input: string): Promise<Sentence> {
- return new Sentence().withParent(this);
+ var sentence = new Sentence(input).withParent(this);
+ await sentence.ready;
+ return sentence;
}
}
diff --git a/core/api.ts b/core/api.ts
index 51f976a..0720c8b 100644
--- a/core/api.ts
+++ b/core/api.ts
@@ -1,4 +1,5 @@
-import { ParseResult } from "../language/types.ts";
+import { InputSentenceProps, ParseResult } from "../language/types.ts";
+import { DeepPartial } from "../util/types.ts";
/**
* @summary Core interface
@@ -12,6 +13,6 @@ export default abstract class Core {
abstract ready: Promise<void>;
/** @summary parse sentence */
- abstract parseSentence(input: string): Promise<ParseResult>;
+ abstract parseSentence(input: string, options?: DeepPartial<InputSentenceProps>): Promise<ParseResult>;
};
diff --git a/core/http/client.ts b/core/http/client.ts
index 118e8f5..6b4e1a3 100644
--- a/core/http/client.ts
+++ b/core/http/client.ts
@@ -1,4 +1,6 @@
+import { InputSentenceProps } from "../../language/types.ts";
import "../../util/array.ts";
+import { DeepPartial } from "../../util/types.ts";
import Core from "../api.ts";
import { ConnectionProps, ConnectionPropsDefault } from "./props.ts";
@@ -30,12 +32,10 @@ export default class RemoteCoreClient implements Core {
return response.json();
}
- async parseSentence(input: string) {
+ async parseSentence(input: string, options?: DeepPartial<InputSentenceProps>) {
var request: CoreRequestParseSentence = {
command: "parseSentence",
- options: {
- input: input,
- },
+ options: { input, options, },
};
var { response } = await this.request(request) as CoreResponseParseSentence;
return response;
diff --git a/core/http/server.ts b/core/http/server.ts
index 0a9a082..7781a22 100644
--- a/core/http/server.ts
+++ b/core/http/server.ts
@@ -13,10 +13,11 @@ export default class RemoteCoreServer extends RawCore {
parseSentence: async _req => {
var req = _req as CoreRequestParseSentence;
var input = req.options?.input
+ var options = req.options?.options;
if (!input) return new Response("", { status: 404 });
return new Response(JSON.stringify({
command: "parseSentence",
- response: await this.parseSentence(input),
+ response: await this.parseSentence(input, options),
} as CoreResponseParseSentence));
},
};
diff --git a/core/http/types.ts b/core/http/types.ts
index af2cfea..3d55a98 100644
--- a/core/http/types.ts
+++ b/core/http/types.ts
@@ -1,4 +1,5 @@
-import { ParseResult } from "../../language/types.ts";
+import { InputSentenceProps, ParseResult } from "../../language/types.ts";
+import { DeepPartial } from "../../util/types.ts";
export interface CoreRequest {
command: string;
@@ -9,6 +10,7 @@ export interface CoreRequestParseSentence extends CoreRequest {
command: "parseSentence";
options: {
input: string;
+ options?: DeepPartial<InputSentenceProps>;
};
};
diff --git a/core/raw/api.ts b/core/raw/api.ts
index f47dead..593b932 100644
--- a/core/raw/api.ts
+++ b/core/raw/api.ts
@@ -1,27 +1,29 @@
import Core from "../api.ts";
import Parser from "../../language/parser.ts";
import YomikunError from "../../util/error.ts";
+import { DeepPartial } from "../../util/types.ts";
+import { InputSentenceProps } from "../../language/types.ts";
/** @summary internal Core (DO NOT USE DIRECTLY) */
export default class RawCore implements Core {
- private _parser: Parser;
- ready: Promise<void>;
+ private parser: Parser;
+ public ready: Promise<void>;
constructor() {
if (this.constructor === RawCore) {
throw new YomikunError("RawCore instantiated! Use DirectCoreClient instead!");
}
- this._parser = new Parser();
+ this.parser = new Parser();
this.ready = new Promise(async resolve => {
- await this._parser.ready;
+ await this.parser.ready;
resolve();
})
}
- async parseSentence(input: string) {
- return await this._parser.parse(input);
+ async parseSentence(input: string, options?: DeepPartial<InputSentenceProps>) {
+ return await this.parser.parse(input, options);
}
};
diff --git a/examples/sentence-word-lookup.ts b/examples/sentence-word-lookup.ts
index 6fd57bd..d60ffbf 100644
--- a/examples/sentence-word-lookup.ts
+++ b/examples/sentence-word-lookup.ts
@@ -17,6 +17,7 @@ var sentence = await api.sentence("この紅茶は甘すぎる");
var word = sentence.words.find(w => w.writing == "紅茶"); // filter terms by writing (matches first only)
// var word = sentence.first("紅茶"); // reference substring (matches first only)
// var word = sentence.words[1]; // reference word index (depends on correct deconjugations/parsing)
+console.log(word);
// Fetch definitions for word
var glossary = await word?.glossary();
diff --git a/language/parser.ts b/language/parser.ts
index 27aa5ee..bc80637 100644
--- a/language/parser.ts
+++ b/language/parser.ts
@@ -3,7 +3,9 @@ import { ParseResult, InputSentenceProps, ParseDepth } from "./types.ts";
import DB from "../db/db.ts";
import "../util/array.ts";
import "../util/set.ts";
+import { DeepPartial } from "../util/types.ts";
+// TODO: rename Parser to Search
/** @summary main Parser class */
export default class Parser {
db: DB;
@@ -18,28 +20,41 @@ export default class Parser {
});
}
- async parse(sentence: string, options?: InputSentenceProps): Promise<ParseResult> {
+ async parse(sentence: string, optional?: DeepPartial<InputSentenceProps>): Promise<ParseResult> {
await this.ready;
- let parseResult = await this.parseTerms(sentence, options);
- if ((options?.depth || ParseDepth.Term) <= ParseDepth.Term) return parseResult;
- parseResult = await this.addGlossary(parseResult, options);
- if ((options?.depth || ParseDepth.Term) <= ParseDepth.Term) return parseResult;
+
+ // initialize default options
+ var props: InputSentenceProps = {
+ lookahead: optional?.lookahead ?? 15,
+ depth: optional?.depth ?? ParseDepth.Term,
+ priorityMod: {
+ high: optional?.priorityMod?.high ?? 10,
+ low: optional?.priorityMod?.low ?? 0.1,
+ },
+ breaks: optional?.breaks ?? [],
+ }
+
+ let parseResult = await this.parseTerms(sentence, props);
+ if (props.depth <= ParseDepth.Term) return parseResult;
+
+ parseResult = await this.addGlossary(parseResult, props);
+ if (props.depth <= ParseDepth.Term) return parseResult;
+
return parseResult;
}
/** @summary parse sentence into terms with readings */
- private async parseTerms(sentence: string, options?: InputSentenceProps): Promise<ParseResult> {
- const MAX_LOOKAHEAD = options?.lookahead ?? 15;
- const PRIORITY_MOD_HIGHER = options?.priorityMod?.high ?? 10;
- const PRIORITY_MOD_LOWER = options?.priorityMod?.low ?? 0.1;
-
+ private async parseTerms(sentence: string, options: InputSentenceProps): Promise<ParseResult> {
var parseResult: ParseResult = {
tokens: [],
depth: ParseDepth.Term,
+ input: sentence,
};
for (let start = 0; start < sentence.length; start++) {
- var results = await this.db.findTerm(sentence.substring(start, start + MAX_LOOKAHEAD));
+ var lookahead = options.lookahead;
+
+ var results = await this.db.findTerm(sentence.substring(start, start + lookahead));
// current starting point did not yield results, try again at next character or until end of input
if (results.length == 0) continue;
@@ -75,12 +90,12 @@ export default class Parser {
// give higher priority to suffixes when last token was a name, else lower priority
if (result.tags.includes(Tag.Class.Suffix))
- result.sort *= lastTokenName ? PRIORITY_MOD_HIGHER : PRIORITY_MOD_LOWER;
+ result.sort *= lastTokenName ? options.priorityMod.high : options.priorityMod.low;
// give lower priority to terms matched only by their readings, and are
// usually written in kanji
if (!result.tags.includes(Tag.Auxiliary.UsuallyKana) && !result.match.kanji)
- result.sort *= PRIORITY_MOD_LOWER;
+ result.sort *= options.priorityMod.low;
return result;
});
@@ -105,6 +120,7 @@ export default class Parser {
tags: result.tags,
term_id: result.id,
source: result.original,
+ start: start,
});
start += result.original.length - 1; // -1 because loop already increments start
@@ -113,7 +129,7 @@ export default class Parser {
return parseResult;
}
- private async addGlossary(input: ParseResult, options?: InputSentenceProps): Promise<ParseResult> {
+ private async addGlossary(input: ParseResult, options: InputSentenceProps): Promise<ParseResult> {
// TODO: annotate input with glossaries from DB
options; // prevent unused warning
return input;
diff --git a/language/types.ts b/language/types.ts
index 40ea4ba..1aeb9da 100644
--- a/language/types.ts
+++ b/language/types.ts
@@ -25,25 +25,29 @@ export interface ParseToken {
glossary?: Glossary;
term_id: number;
source: string;
+ start: number;
};
export interface ParseResult {
depth: ParseDepth;
- tokens: ParseToken[]
+ tokens: ParseToken[];
+ input: string;
};
/** @summary option struct for Parser */
export interface InputSentenceProps {
/** @prop max amount of characters to look ahead when attempting to deconjugate */
- lookahead?: number;
+ lookahead: number;
/** @prop amount of detail to return in search results */
- depth?: ParseDepth;
+ depth: ParseDepth;
/** @prop search bias multipliers */
- priorityMod?: {
+ priorityMod: {
/** @prop multiplier for negative bias */
- low?: number;
+ low: number;
/** @prop multiplier for positive bias */
- high?: number;
+ high: number;
};
+ /** @prop list of breaks treated as delimiter */
+ breaks: Array<number>;
};
diff --git a/util/array.ts b/util/array.ts
index 5b8c512..f032935 100644
--- a/util/array.ts
+++ b/util/array.ts
@@ -6,6 +6,8 @@ declare global {
peek(): T;
/** @summary create Set from this array */
set(): Set<T>;
+ /** @summary clear array */
+ clear(): void;
}
}
@@ -21,3 +23,7 @@ Array.prototype.set = function() {
return new Set(this);
}
+Array.prototype.clear = function() {
+ while (this.length > 0) this.pop();
+}
+
diff --git a/util/types.ts b/util/types.ts
new file mode 100644
index 0000000..f03a242
--- /dev/null
+++ b/util/types.ts
@@ -0,0 +1,8 @@
+/**
+ * @type DeepPartial<T> makes all properties of type T optional, but keep
+ * array types intact
+ */
+export type DeepPartial<T> = {
+ [K in keyof T]?: T[K] extends Array<infer U> ? Array<U> : DeepPartial<T[K]>;
+};
+