aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--api/sentence.ts16
-rw-r--r--api/word.ts8
-rw-r--r--api/yomikun.ts1
-rw-r--r--core/api.ts35
-rw-r--r--core/http/client.ts41
-rw-r--r--core/http/types.ts35
-rw-r--r--core/raw/api.ts30
-rw-r--r--db/db.ts6
-rw-r--r--db/dict/deinflections.sql53
-rw-r--r--db/find.sql2
-rw-r--r--language/parser.ts144
-rw-r--r--language/types.ts49
-rw-r--r--makefile9
-rw-r--r--readme.md2
-rw-r--r--search/readme.md (renamed from language/readme.md)4
-rw-r--r--search/search.ts141
-rw-r--r--search/tags.ts (renamed from language/tags.ts)4
-rw-r--r--search/types.ts60
-rw-r--r--test/deinflection/cases.ts16
-rw-r--r--test/deinflection/test.ts17
-rw-r--r--util/string.ts2
21 files changed, 373 insertions, 302 deletions
diff --git a/api/sentence.ts b/api/sentence.ts
index cde66a5..1d22be3 100644
--- a/api/sentence.ts
+++ b/api/sentence.ts
@@ -1,11 +1,11 @@
-import { ParseResult } from "../language/types.ts";
+import { SearchSentenceResult } from "../search/types.ts";
import APIBase from "./base.ts";
import { JapaneseFormatter } from "./japanese.ts";
import Word from "./word.ts";
export default class Sentence extends APIBase {
public words: Array<Word> = [];
- protected query?: ParseResult;
+ protected query?: SearchSentenceResult;
protected original: string = "";
public ready: Promise<void>;
@@ -23,7 +23,7 @@ export default class Sentence extends APIBase {
private async fetch(input: string) {
this.original = input;
- this.query = await (await this.api)["core"].parseSentence(input);
+ this.query = await (await this.api)["core"].search.sentence(input);
await this.updateWords();
this._resolveReady();
}
@@ -33,15 +33,15 @@ export default class Sentence extends APIBase {
let token = 0;
let i = 0;
while (i < this.original.length) {
- this.words.push(new Word(this.query!.tokens[token]).withParent(await this.api));
+ this.words.push(new Word(this.query!.words[token]).withParent(await this.api));
- i += this.query!.tokens[token].source.length;
+ i += this.query!.words[token].source.length;
if (i == this.original.length) break;
token++;
- // continue if there are no unrecognized gaps between tokens
- if (this.query!.tokens[token]?.start == i) continue;
- var remainder = this.original.substring(i, this.query!.tokens[token]?.start);
+ // continue if there are no unrecognized gaps between words
+ if (this.query!.words[token]?.start == i) continue;
+ var remainder = this.original.substring(i, this.query!.words[token]?.start);
this.words.push(new Word(remainder).withParent(await this.api));
i += remainder.length;
diff --git a/api/word.ts b/api/word.ts
index b7fc3e6..4dad4a3 100644
--- a/api/word.ts
+++ b/api/word.ts
@@ -1,10 +1,10 @@
import Glossary from "./glossary.ts";
import APIBase from "./base.ts";
-import { ParseToken } from "../language/types.ts";
import Japanese, { JapaneseFormatter } from "./japanese.ts";
import "../util/string.ts";
-import { Tag, TagGroup } from "../language/tags.ts";
+import { TagGroup } from "../search/tags.ts";
+import { SearchWord } from "../search/types.ts";
export default class Word extends APIBase {
/** @prop dictionary form of verb if this word is a verb */
@@ -16,7 +16,7 @@ export default class Word extends APIBase {
/** @prop this word represents an unrecognized sentence part between recognized terms */
protected filler: boolean;
- constructor(input: string | ParseToken) {
+ constructor(input: string | SearchWord) {
super();
if (typeof input === "string") {
this.filler = true;
@@ -26,7 +26,7 @@ export default class Word extends APIBase {
this.outputKanji = false;
} else {
this.filler = false;
- input = input as ParseToken;
+ input = input as SearchWord;
this.base = new Japanese(input.writing, input.reading);
if (input.tags.anyOf(TagGroup.Conjugable as string[])) {
var writingCommon = input.writing.cmpLen(input.source);
diff --git a/api/yomikun.ts b/api/yomikun.ts
index a7f214e..696361f 100644
--- a/api/yomikun.ts
+++ b/api/yomikun.ts
@@ -1,6 +1,5 @@
import Core from "../core/api.ts";
import RemoteCoreClient from "../core/http/client.ts";
-import { ParseResult } from "../language/types.ts";
import Sentence from "./sentence.ts";
export default class Yomikun {
diff --git a/core/api.ts b/core/api.ts
index 0720c8b..77195b2 100644
--- a/core/api.ts
+++ b/core/api.ts
@@ -1,6 +1,33 @@
-import { InputSentenceProps, ParseResult } from "../language/types.ts";
+import { SearchSentenceProps, SearchSentenceResult, SearchTermResult } from "../search/types.ts";
import { DeepPartial } from "../util/types.ts";
+/** @interface serach-related functions */
+export interface CoreSearch {
+ terms(term: string): Promise<Array<SearchTermResult>>;
+ sentence(sentence: string, optional?: DeepPartial<SearchSentenceProps>): Promise<SearchSentenceResult>;
+ // glossary: (input: string) => Promise<void>;
+};
+
+/** @interface user management */
+export interface CoreUser {
+ // TODO: list
+ // TODO: add
+ // TODO: remove
+ // TODO: get info
+};
+
+/** @interface dictionary/user data import functions */
+export interface CoreImport {
+ // TODO: import dictionary
+ // TODO: import user preferences
+};
+
+/** @interface dictionary/user data export functions */
+export interface CoreExport {
+ // TODO: export dictionary
+ // TODO: export user preferences
+};
+
/**
* @summary Core interface
*
@@ -12,7 +39,9 @@ export default abstract class Core {
/** @summary resolved when ready */
abstract ready: Promise<void>;
- /** @summary parse sentence */
- abstract parseSentence(input: string, options?: DeepPartial<InputSentenceProps>): Promise<ParseResult>;
+ abstract search: CoreSearch;
+ abstract user: CoreUser;
+ abstract import: CoreImport;
+ abstract export: CoreExport;
};
diff --git a/core/http/client.ts b/core/http/client.ts
index 6b4e1a3..80f77b3 100644
--- a/core/http/client.ts
+++ b/core/http/client.ts
@@ -1,10 +1,8 @@
-import { InputSentenceProps } from "../../language/types.ts";
import "../../util/array.ts";
-import { DeepPartial } from "../../util/types.ts";
-import Core from "../api.ts";
+import Core, { CoreExport, CoreImport, CoreSearch, CoreUser } from "../api.ts";
import { ConnectionProps, ConnectionPropsDefault } from "./props.ts";
-import { CoreRequest, CoreRequestParseSentence, CoreResponseParseSentence } from "./types.ts";
+import { CoreRequest, CoreRequestSearchSentence, CoreRequestSearchTerms, CoreResponseSearchSentence, CoreResponseSearchTerms } from "./types.ts";
/**
* @summary HTTP Core client
@@ -13,8 +11,9 @@ import { CoreRequest, CoreRequestParseSentence, CoreResponseParseSentence } from
* (de)serialization automatically.
*/
export default class RemoteCoreClient implements Core {
+ public ready: Promise<void> = Promise.resolve();
+
private props: ConnectionProps;
- ready: Promise<void> = Promise.resolve();
constructor(options?: ConnectionProps) {
this.props = { ...ConnectionPropsDefault, ...options };
@@ -32,13 +31,29 @@ export default class RemoteCoreClient implements Core {
return response.json();
}
- async parseSentence(input: string, options?: DeepPartial<InputSentenceProps>) {
- var request: CoreRequestParseSentence = {
- command: "parseSentence",
- options: { input, options, },
- };
- var { response } = await this.request(request) as CoreResponseParseSentence;
- return response;
- }
+ public search: CoreSearch = {
+ terms: async term => {
+ var request: CoreRequestSearchTerms = {
+ command: "search.terms",
+ options: { term, },
+ };
+ var { response } = await this.request(request) as CoreResponseSearchTerms;
+ return response;
+ },
+ sentence: async (sentence, optional?) => {
+ var request: CoreRequestSearchSentence = {
+ command: "search.sentence",
+ options: { sentence, optional, },
+ };
+ var { response } = await this.request(request) as CoreResponseSearchSentence;
+ return response;
+ },
+ };
+
+ public user: CoreUser = {};
+
+ public import: CoreImport = {};
+
+ public export: CoreExport = {};
}
diff --git a/core/http/types.ts b/core/http/types.ts
index 3d55a98..51c221a 100644
--- a/core/http/types.ts
+++ b/core/http/types.ts
@@ -1,4 +1,4 @@
-import { InputSentenceProps, ParseResult } from "../../language/types.ts";
+import { SearchTermResult, SearchSentenceResult, SearchSentenceProps } from "../../search/types.ts";
import { DeepPartial } from "../../util/types.ts";
export interface CoreRequest {
@@ -6,22 +6,33 @@ export interface CoreRequest {
options: any;
};
-export interface CoreRequestParseSentence extends CoreRequest {
- command: "parseSentence";
- options: {
- input: string;
- options?: DeepPartial<InputSentenceProps>;
- };
-};
-
export interface CoreResponse {
command: string;
response: any;
// final: boolean;
};
-export interface CoreResponseParseSentence extends CoreResponse {
- command: "parseSentence";
- response: ParseResult;
+export interface CoreRequestSearchSentence extends CoreRequest {
+ command: "search.sentence";
+ options: {
+ sentence: string;
+ optional?: DeepPartial<SearchSentenceProps>;
+ };
};
+export interface CoreResponseSearchSentence extends CoreResponse {
+ command: "search.sentence";
+ response: SearchSentenceResult;
+};
+
+export interface CoreRequestSearchTerms extends CoreRequest {
+ command: "search.terms";
+ options: {
+ term: string;
+ };
+};
+
+export interface CoreResponseSearchTerms extends CoreResponse {
+ command: "search.terms";
+ response: Array<SearchTermResult>;
+};
diff --git a/core/raw/api.ts b/core/raw/api.ts
index 593b932..6046a26 100644
--- a/core/raw/api.ts
+++ b/core/raw/api.ts
@@ -1,29 +1,39 @@
-import Core from "../api.ts";
-import Parser from "../../language/parser.ts";
+import Core, { CoreExport, CoreImport, CoreSearch, CoreUser } from "../api.ts";
import YomikunError from "../../util/error.ts";
-import { DeepPartial } from "../../util/types.ts";
-import { InputSentenceProps } from "../../language/types.ts";
+import Search from "../../search/search.ts";
/** @summary internal Core (DO NOT USE DIRECTLY) */
export default class RawCore implements Core {
- private parser: Parser;
public ready: Promise<void>;
+ private _search: Search;
+
constructor() {
if (this.constructor === RawCore) {
throw new YomikunError("RawCore instantiated! Use DirectCoreClient instead!");
}
- this.parser = new Parser();
+ this._search = new Search();
this.ready = new Promise(async resolve => {
- await this.parser.ready;
+ await this._search.ready;
resolve();
})
}
- async parseSentence(input: string, options?: DeepPartial<InputSentenceProps>) {
- return await this.parser.parse(input, options);
- }
+ public search: CoreSearch = {
+ terms: async term => {
+ return await this._search.terms(term);
+ },
+ sentence: async (sentence, optional?) => {
+ return await this._search.sentence(sentence, optional);
+ },
+ };
+
+ public user: CoreUser = {};
+
+ public import: CoreImport = {};
+
+ public export: CoreExport = {};
};
diff --git a/db/db.ts b/db/db.ts
index 5605f40..7bb315c 100644
--- a/db/db.ts
+++ b/db/db.ts
@@ -1,7 +1,7 @@
import { Database, Statement } from "https://deno.land/x/sqlite3@0.9.1/mod.ts";
import * as path from 'https://deno.land/std@0.102.0/path/mod.ts';
-import { TokenTags } from "../language/tags.ts";
+import { TokenTags } from "../search/tags.ts";
import "../util/string.ts";
export interface DBDictInfo {
@@ -20,7 +20,7 @@ export interface FindResult {
depth: number;
original: string;
match: {
- kanji: boolean;
+ writing: boolean;
reading: boolean;
};
}
@@ -97,7 +97,7 @@ export default class DB {
depth: term.depth,
original: term.original,
match: {
- kanji: term.expression == term.deinflected,
+ writing: term.expression == term.deinflected,
reading: term.reading == term.deinflected,
},
};
diff --git a/db/dict/deinflections.sql b/db/dict/deinflections.sql
index a79fff0..e768d33 100644
--- a/db/dict/deinflections.sql
+++ b/db/dict/deinflections.sql
@@ -126,20 +126,20 @@ insert into deinflection_temp values
('infl:tense:cont', 'いる', '', 'a', 'a'),
-- potential form <https://guidetojapanese.org/learn/grammar/potential>
- ('infl:potential', 'られる', 'る', 'a', 'ru'),
- ('infl:potential', 'える', 'う', 'a', 'u'),
- ('infl:potential', 'ける', 'く', 'a', 'u'),
- ('infl:potential', 'げる', 'ぐ', 'a', 'u'),
- ('infl:potential', 'せる', 'す', 'a', 'u'),
- ('infl:potential', 'てる', 'つ', 'a', 'u'),
- ('infl:potential', 'ねる', 'ぬ', 'a', 'u'),
- ('infl:potential', 'べる', 'ぶ', 'a', 'u'),
- ('infl:potential', 'める', 'む', 'a', 'u'),
- ('infl:potential', 'れる', 'る', 'a', 'u'),
- ('infl:potential', 'できる', 'する', 'a', 's'),
- ('infl:potential', 'こられる', 'くる', 'a', 'k'),
- ('infl:potential', 'ありうる', 'ある', 'a', ''), -- exception
- ('infl:potential', 'ありえる', 'ある', 'a', ''), -- exception
+ ('infl:potential', 'られる', 'る', 'ru', 'ru'),
+ ('infl:potential', 'える', 'う', 'ru', 'u'),
+ ('infl:potential', 'ける', 'く', 'ru', 'u'),
+ ('infl:potential', 'げる', 'ぐ', 'ru', 'u'),
+ ('infl:potential', 'せる', 'す', 'ru', 'u'),
+ ('infl:potential', 'てる', 'つ', 'ru', 'u'),
+ ('infl:potential', 'ねる', 'ぬ', 'ru', 'u'),
+ ('infl:potential', 'べる', 'ぶ', 'ru', 'u'),
+ ('infl:potential', 'める', 'む', 'ru', 'u'),
+ ('infl:potential', 'れる', 'る', 'ru', 'u'),
+ ('infl:potential', 'できる', 'する', 'ru', 's'),
+ ('infl:potential', 'こられる', 'くる', 'ru', 'k'),
+ ('infl:potential', 'ありうる', 'ある', 'ru', ''), -- exception
+ ('infl:potential', 'ありえる', 'ある', 'ru', ''), -- exception
-- conditionals <https://guidetojapanese.org/learn/grammar/conditionals>
('infl:cond:ba', 'えば', 'う', 'nt', 'u'),
@@ -271,19 +271,18 @@ insert into deinflection_temp values
('infl:causative', '来さす', '来る', 'a', 'k'),
-- passive <https://guidetojapanese.org/learn/grammar/causepass>
- ('infl:passive', 'られる', 'る', 'a', 'ru'),
- ('infl:passive', 'われる', 'う', 'a', 'u'),
- ('infl:passive', 'かれる', 'く', 'a', 'u'),
- ('infl:passive', 'がれる', 'ぐ', 'a', 'u'),
- ('infl:passive', 'される', 'す', 'a', 'u'),
- ('infl:passive', 'たれる', 'つ', 'a', 'u'),
- ('infl:passive', 'なれる', 'ぬ', 'a', 'u'),
- ('infl:passive', 'ばれる', 'ぶ', 'a', 'u'),
- ('infl:passive', 'まれる', 'む', 'a', 'u'),
- ('infl:passive', 'られる', 'る', 'a', 'u'),
- ('infl:passive', 'される', 'する', 'a', 's'),
- ('infl:passive', 'こられる', 'くる', 'a', 'k'),
- ('infl:passive', '来られる', '来る', 'a', 'k'),
+ ('infl:passive', 'られる', 'る', 'ru', 'ru u'),
+ ('infl:passive', 'われる', 'う', 'ru', 'u'),
+ ('infl:passive', 'かれる', 'く', 'ru', 'u'),
+ ('infl:passive', 'がれる', 'ぐ', 'ru', 'u'),
+ ('infl:passive', 'される', 'す', 'ru', 'u'),
+ ('infl:passive', 'たれる', 'つ', 'ru', 'u'),
+ ('infl:passive', 'なれる', 'ぬ', 'ru', 'u'),
+ ('infl:passive', 'ばれる', 'ぶ', 'ru', 'u'),
+ ('infl:passive', 'まれる', 'む', 'ru', 'u'),
+ ('infl:passive', 'される', 'する', 'ru', 's'),
+ ('infl:passive', 'こられる', 'くる', 'ru', 'k'),
+ ('infl:passive', '来られる', '来る', 'ru', 'k'),
-- auxiliary rules
('class:verb:suru-included', 'する', '', 's', ''); -- deconjugate suru verbs into stem
diff --git a/db/find.sql b/db/find.sql
index dd6a011..e2d6ad8 100644
--- a/db/find.sql
+++ b/db/find.sql
@@ -47,7 +47,7 @@ with results(id, expression, reading, tags, depth, rules, original, deinflected)
(substr(term, length(term) - length(kana_in) + 1) = kana_in) and
-- can't deconjugate to length <1
(length(term) > 0)
- limit 50 -- failsafe to catch any infinite loops
+ limit 100 -- failsafe to catch any infinite loops
)
select term, tags, depth, substr(:term, 1, deinflect.length), rules
from deinflect
diff --git a/language/parser.ts b/language/parser.ts
deleted file mode 100644
index 7fd3981..0000000
--- a/language/parser.ts
+++ /dev/null
@@ -1,144 +0,0 @@
-import { Tag, TagGroup } from "./tags.ts";
-import { ParseResult, InputSentenceProps, ParseDepth } from "./types.ts";
-import DB from "../db/db.ts";
-import "../util/array.ts";
-import "../util/set.ts";
-import { DeepPartial } from "../util/types.ts";
-
-// TODO: rename Parser to Search
-/** @summary main Parser class */
-export default class Parser {
- db: DB;
- ready: Promise<void>;
-
- constructor() {
- this.db = new DB();
-
- this.ready = new Promise<void>(async resolve => {
- await this.db.ready;
- resolve();
- });
- }
-
- // Search.sentence()
- async parse(sentence: string, optional?: DeepPartial<InputSentenceProps>): Promise<ParseResult> {
- await this.ready;
-
- // initialize default options
- var props: InputSentenceProps = {
- lookahead: optional?.lookahead ?? 15,
- depth: optional?.depth ?? ParseDepth.Term,
- priorityMod: {
- high: optional?.priorityMod?.high ?? 10,
- low: optional?.priorityMod?.low ?? -10,
- },
- breaks: optional?.breaks ?? [],
- }
-
- let parseResult = await this.parseTerms(sentence, props);
- if (props.depth <= ParseDepth.Term) return parseResult;
-
- parseResult = await this.addGlossary(parseResult, props);
- if (props.depth <= ParseDepth.Term) return parseResult;
-
- return parseResult;
- }
-
- /** @summary parse sentence into terms with readings */
- private async parseTerms(sentence: string, options: InputSentenceProps): Promise<ParseResult> {
- var parseResult: ParseResult = {
- tokens: [],
- depth: ParseDepth.Term,
- input: sentence,
- };
-
- for (let start = 0; start < sentence.length; start++) {
- var lookahead = options.lookahead;
-
- var results = await this.db.findTerm(sentence.substring(start, start + lookahead));
- // current starting point did not yield results, try again at next character or until end of input
- if (results.length == 0) continue;
-
- results = results.filter(result => {
- // ignore ignored by user terms
- if (result.sort < 0) return false;
-
- // deconjugated words
- if (result.depth > 0) {
- // check if this word can be conjugated at all
- if (!result.tags.anyOf(TagGroup.Conjugable as string[])) return false;
-
- // ignore other wrong deconjugations
- if (result.tags.includes(Tag.Class.Verb.U) &&
- !result.tags.includes(Tag.Inflection.Reason.U)) return false;
- if (result.tags.includes(Tag.Class.Verb.Ru) &&
- !result.tags.includes(Tag.Inflection.Reason.Ru)) return false;
- if (result.tags.includes(Tag.Class.Verb.Suru) &&
- !result.tags.includes(Tag.Inflection.Reason.Suru)) return false;
- if (result.tags.includes(Tag.Class.Adjective.I) &&
- !result.tags.includes(Tag.Inflection.Reason.Adjective.I)) return false;
- if (result.tags.includes(Tag.Class.Adjective.Na) &&
- !result.tags.includes(Tag.Inflection.Reason.Adjective.Na)) return false;
- }
-
- // all other results should be valid grammatically
- return true;
- });
-
- // no valid results left after filter, try again at next character or until end of input
- if (results.length == 0) continue;
-
- // bias search results by modifying sort value
- results = results.map(result => {
- // true if last token was a name else false
- const lastTokenName = parseResult.tokens.peek()?.tags.anyOf(Object.values(Tag.Name));
-
- // give higher priority to suffixes when last token was a name, else lower priority
- if (result.tags.includes(Tag.Class.Suffix))
- result.sort += lastTokenName ? options.priorityMod.high : options.priorityMod.low;
-
- // give lower priority to terms matched only by their readings, and are
- // usually written in kanji
- if (!result.tags.includes(Tag.Auxiliary.UsuallyKana) && !result.match.kanji)
- result.sort += options.priorityMod.low;
-
- return result;
- });
-
-
- results.sort((a, b) => {
- // sort by original string length (long to short)
- if (a.original.length != b.original.length) return b.original.length - a.original.length;
- // then by sort index (high to low)
- if (a.sort != b.sort) return b.sort - a.sort;
- // then by depth (high to low)
- if (a.depth != b.depth) return b.depth - a.depth;
- // else keep current order (random)
- return 0;
- });
-
- // pick top result
- const result = results[0];
-
- parseResult.tokens.push({
- writing: result.expression,
- reading: result.reading,
- tags: result.tags,
- term_id: result.id,
- source: result.original,
- start: start,
- });
-
- start += result.original.length - 1; // -1 because loop already increments start
- continue; // extra verbose end of iteration
- }
- return parseResult;
- }
-
- private async addGlossary(input: ParseResult, options: InputSentenceProps): Promise<ParseResult> {
- // TODO: annotate input with glossaries from DB
- options; // prevent unused warning
- return input;
- }
-};
-
diff --git a/language/types.ts b/language/types.ts
deleted file mode 100644
index d3585f8..0000000
--- a/language/types.ts
+++ /dev/null
@@ -1,49 +0,0 @@
-import { TokenTags } from "./tags.ts";
-
-export enum ParseDepth {
- Term,
- Glossary,
-};
-
-export interface GlossaryDefinition {
-
-};
-
-export interface Glossary {
- id: number;
- definitions: GlossaryDefinition[];
-};
-
-export interface ParseToken {
- writing: string;
- reading: string;
- tags: TokenTags;
- glossary?: Glossary;
- term_id: number;
- source: string;
- start: number;
-};
-
-export interface ParseResult {
- depth: ParseDepth;
- tokens: ParseToken[];
- input: string;
-};
-
-/** @summary option struct for Parser */
-export interface InputSentenceProps {
- /** @prop max amount of characters to look ahead when attempting to deconjugate */
- lookahead: number;
- /** @prop amount of detail to return in search results */
- depth: ParseDepth;
- /** @prop search bias multipliers */
- priorityMod: {
- /** @prop multiplier for negative bias */
- low: number;
- /** @prop multiplier for positive bias */
- high: number;
- };
- /** @prop list of breaks treated as delimiter */
- breaks: Array<number>;
-};
-
diff --git a/makefile b/makefile
index 6136ad6..f32cbcf 100644
--- a/makefile
+++ b/makefile
@@ -1,14 +1,7 @@
TARGET = yomikun
+# TODO: figure out distribution
SRCS += ./main.ts
-SRCS += ./core/yomikun.ts
-SRCS += ./db/db.ts
-SRCS += ./language/tags.ts
-SRCS += ./language/translator.ts
-SRCS += ./language/types.ts
-SRCS += ./util/array.ts
-SRCS += ./util/error.ts
-SRCS += ./util/string.ts
DENO_FLAGS += --unstable
DENO_FLAGS += --allow-ffi
diff --git a/readme.md b/readme.md
index 60f1eda..f27e247 100644
--- a/readme.md
+++ b/readme.md
@@ -29,6 +29,8 @@ scope is larger than Yomichan, it's still focused on Japanese only.**
- [ ] create primitive search page ui
- [ ] add code formatter config
- [ ] complete documentation
+- [ ] remove makefiles for database initialization
+- [ ] replace .sql script files with typescript sql query generation library
## ~New features (from Yomichan)~
diff --git a/language/readme.md b/search/readme.md
index 99a7d69..400c8ce 100644
--- a/language/readme.md
+++ b/search/readme.md
@@ -1,7 +1,7 @@
-# Language
+# Search
This directory contains files that provide an abstracted interface with the
-database for looking up sentences ~and words~.
+database for looking up sentences and words.
## Tags
diff --git a/search/search.ts b/search/search.ts
new file mode 100644
index 0000000..0a50773
--- /dev/null
+++ b/search/search.ts
@@ -0,0 +1,141 @@
+import { Tag, TagGroup } from "./tags.ts";
+import { SearchSentenceProps, SearchSentenceResult, SearchTermResult, SearchWord } from "./types.ts";
+import DB from "../db/db.ts";
+import "../util/array.ts";
+import "../util/set.ts";
+import { DeepPartial } from "../util/types.ts";
+
+/** @summary main Search class */
+export default class Search {
+ db: DB;
+ ready: Promise<void>;
+
+ constructor() {
+ this.db = new DB();
+
+ this.ready = new Promise<void>(async resolve => {
+ await this.db.ready;
+ resolve();
+ });
+ }
+
+ /** @summary find possible terms at start of string by deconjugating */
+ public async terms(term: string): Promise<Array<SearchTermResult>> {
+ await this.ready;
+
+ var results = await this.db.findTerm(term);
+
+ // skip filtering valid results if there are none
+ if (results.length == 0) return [];
+
+ // filter invalid deconjugations/results
+ results = results.filter(result => {
+ // ignore ignored by user terms
+ if (result.sort < 0) return false;
+
+ // deconjugated words
+ if (result.depth > 0) {
+ // check if this word can be conjugated at all
+ if (!result.tags.anyOf(TagGroup.Conjugable as string[])) return false;
+
+ // ignore other wrong deconjugations
+ if (result.tags.includes(Tag.Class.Verb.U) &&
+ !result.tags.includes(Tag.Inflection.Reason.U)) return false;
+ if (result.tags.includes(Tag.Class.Verb.Ru) &&
+ !result.tags.includes(Tag.Inflection.Reason.Ru)) return false;
+ if (result.tags.includes(Tag.Class.Verb.Suru) &&
+ !result.tags.includes(Tag.Inflection.Reason.Suru)) return false;
+ if (result.tags.includes(Tag.Class.Adjective.I) &&
+ !result.tags.includes(Tag.Inflection.Reason.Adjective.I)) return false;
+ if (result.tags.includes(Tag.Class.Adjective.Na) &&
+ !result.tags.includes(Tag.Inflection.Reason.Adjective.Na)) return false;
+ }
+
+ // all other results should be valid
+ return true;
+ });
+
+ return results.map(result => ({
+ id: result.id,
+ writing: result.expression,
+ reading: result.reading,
+ tags: result.tags,
+ source: result.original,
+ sort: result.sort,
+ depth: result.depth,
+ match: {
+ reading: result.match.reading,
+ writing: result.match.writing,
+ },
+ }));
+ }
+
+ /** @summary parse sentence into terms with readings */
+ public async sentence(sentence: string, optional?: DeepPartial<SearchSentenceProps>): Promise<SearchSentenceResult> {
+ await this.ready;
+
+ var props: SearchSentenceProps = {
+ lookahead: optional?.lookahead ?? 15,
+ priorityMod: {
+ high: optional?.priorityMod?.high ?? 10,
+ low: optional?.priorityMod?.low ?? -10,
+ },
+ breaks: optional?.breaks ?? [],
+ }
+
+ var parseResult: SearchSentenceResult = {
+ input: sentence,
+ words: [],
+ };
+
+ for (let start = 0; start < sentence.length; start++) {
+ var lookahead = props.lookahead; // TODO: stop at next delimiter (optimization)
+ var term = sentence.substring(start, start + lookahead);
+ var results = (await this.terms(term)).map(term => {
+ var word = term as SearchWord;
+ word.start = start;
+ return word;
+ });
+
+ // current starting point did not yield results, try again at next character or until end of input
+ if (results.length == 0) continue;
+
+ // bias search results by modifying sort value
+ results = results.map(result => {
+ // true if last token was a name else false
+ const lastTokenName = parseResult.words.peek()?.tags.anyOf(Object.values(Tag.Name));
+
+ // give higher priority to suffixes when last token was a name, else lower priority
+ if (result.tags.includes(Tag.Class.Suffix))
+ result.sort += lastTokenName ? props.priorityMod.high : props.priorityMod.low;
+
+ // give lower priority to terms matched only by their readings, and are
+ // usually written in kanji
+ if (!result.tags.includes(Tag.Auxiliary.UsuallyKana) && !result.match.writing)
+ result.sort += props.priorityMod.low;
+
+ return result;
+ });
+
+ results.sort((a, b) => {
+ // sort by original string length (long to short)
+ if (a.source.length != b.source.length) return b.source.length - a.source.length;
+ // then by sort index (high to low)
+ if (a.sort != b.sort) return b.sort - a.sort;
+ // then by depth (high to low)
+ if (a.depth != b.depth) return b.depth - a.depth;
+ // else keep current order (random)
+ return 0;
+ });
+
+ // pick top result
+ const result = results[0];
+
+ parseResult.words.push(result);
+ start += result.source.length - 1; // -1 because loop already increments start
+ continue; // extra verbose end of iteration
+ }
+ return parseResult;
+ }
+};
+
diff --git a/language/tags.ts b/search/tags.ts
index 72840fe..92279c5 100644
--- a/language/tags.ts
+++ b/search/tags.ts
@@ -200,6 +200,10 @@ export function parseTags(input: string) {
if (tag == Tag.Inflection.Desirable.Volitional &&
filteredTags.anyOf([Tag.Inflection.Attempt.ToSuru])) continue;
+ // skip conditional 〜ば if used for obligatory inflection
+ if (tag == Tag.Inflection.Conditional.Ba &&
+ filteredTags.anyOf([Tag.Inflection.Obligatory])) continue;
+
// normalize multiple Inflection.Negative to single Inflection.Affirmative or Inflection.Negative
if (tag == Tag.Inflection.Negative) {
negationCount++;
diff --git a/search/types.ts b/search/types.ts
new file mode 100644
index 0000000..d90afd6
--- /dev/null
+++ b/search/types.ts
@@ -0,0 +1,60 @@
+import { TokenTags } from "./tags.ts";
+
+export interface SearchGlossaryDefinition {
+
+};
+
+export interface SearchGlossary {
+ id: number;
+ definitions: SearchGlossaryDefinition[];
+};
+
+export interface SearchTermResult {
+ /** @property dictionary term id */
+ id: number;
+ /** @property (preferably) kanji writing of term */
+ writing: string;
+ /** @property kana-only reading of term */
+ reading: string;
+ /** @property word tags including deconjugation tags */
+ tags: TokenTags;
+ /** @property original conjugated string */
+ source: string;
+ /** @property numeric sorting value for term */
+ sort: number;
+ /** @property amount of steps that were needed to deconjugate */
+ depth: number;
+ /** @property matching results */
+ match: {
+ /** @property term matched by writing */
+ writing: boolean;
+ /** @property term matched by reading */
+ reading: boolean;
+ }
+};
+
+export interface SearchWord extends SearchTermResult {
+ /** @property starting index of word in sentence */
+ start: number;
+};
+
+export interface SearchSentenceResult {
+ words: SearchWord[];
+ input: string;
+};
+
+/** @summary options for Search.sentence() */
+export interface SearchSentenceProps {
+ /** @prop max amount of characters to look ahead when attempting to deconjugate words */
+ lookahead: number;
+ /** @prop search bias values */
+ priorityMod: {
+ /** @prop offset for negative bias */
+ low: number;
+ /** @prop offset for positive bias */
+ high: number;
+ };
+ /** @prop list of breaks treated as delimiter */
+ breaks: Array<number>;
+};
+
diff --git a/test/deinflection/cases.ts b/test/deinflection/cases.ts
index 08517d4..c29bdf1 100644
--- a/test/deinflection/cases.ts
+++ b/test/deinflection/cases.ts
@@ -1,11 +1,11 @@
-import { TokenTags, Tag } from "../../language/tags.ts";
+import { TokenTags, Tag } from "../../search/tags.ts";
const { Inflection } = Tag;
interface Test {
input: string;
mustHave: TokenTags;
mustNotHave: TokenTags;
- forceID?: number;
+ force?: { reading: string, writing: string };
};
export default [
@@ -32,7 +32,7 @@ export default [
{ input: "取るな", mustHave: [ Inflection.Negative, Inflection.Command ], mustNotHave: [], },
// other tests
{ input: "取ったり", mustHave: [ Inflection.Suffix.Tari ], mustNotHave: [ Inflection.Tense.Past ], },
- { input: "早く", mustHave: [ Inflection.Adverb ], mustNotHave: [], },
+ { input: "早く", force: { reading: "はやい", writing: "早い" }, mustHave: [ Inflection.Adverb ], mustNotHave: [], },
{ input: "遊んだり", mustHave: [ Inflection.Suffix.Tari ], mustNotHave: [ Inflection.Tense.Past ], },
{ input: "聞け", mustHave: [ Inflection.Command ], mustNotHave: [], },
{ input: "食べさせる", mustHave: [ Inflection.Causative ], mustNotHave: [], },
@@ -51,13 +51,12 @@ export default [
{ input: "しなくてはいけなかった", mustHave: [ Inflection.Obligatory, Inflection.Affirmative ], mustNotHave: [ Inflection.Negative ], },
{ input: "行かないとだめ", mustHave: [ Inflection.Obligatory, Inflection.Affirmative ], mustNotHave: [ Inflection.Negative ], },
{ input: "しないといけない", mustHave: [ Inflection.Obligatory, Inflection.Affirmative ], mustNotHave: [ Inflection.Negative ], },
- { input: "行かなければいけません", mustHave: [ Inflection.Obligatory, Inflection.Affirmative ], mustNotHave: [ Inflection.Negative ], },
- { input: "しなければだめ", mustHave: [ Inflection.Obligatory, Inflection.Affirmative ], mustNotHave: [ Inflection.Negative ], },
+ { input: "行かなければいけません", mustHave: [ Inflection.Obligatory, Inflection.Affirmative ], mustNotHave: [ Inflection.Negative, Inflection.Conditional.Ba ], },
+ { input: "しなければだめ", mustHave: [ Inflection.Obligatory, Inflection.Affirmative ], mustNotHave: [ Inflection.Negative, Inflection.Conditional.Ba ], },
{ input: "行かないと", mustHave: [ Inflection.Obligatory, Inflection.Affirmative ], mustNotHave: [ Inflection.Negative ], },
{ input: "買いたい", mustHave: [ Inflection.Desirable.Itai ], mustNotHave: [], },
{ input: "寝よう", mustHave: [ Inflection.Desirable.Volitional ], mustNotHave: [], },
- // TODO: for this test to work, a parseSentencePart function needs to be made that returns all possible words (currently clipped)
- // { input: "しましょう", forceID: 17327, mustHave: [ Inflection.Desirable.Volitional, Inflection.Polite.Masu ], mustNotHave: [], },
+ { input: "しましょう", force: { reading: "する", writing: "為る" }, mustHave: [ Inflection.Desirable.Volitional, Inflection.Polite.Masu ], mustNotHave: [], },
{ input: "きましょう", mustHave: [ Inflection.Desirable.Volitional, Inflection.Polite.Masu ], mustNotHave: [], },
{ input: "寝ましょう", mustHave: [ Inflection.Desirable.Volitional, Inflection.Polite.Masu ], mustNotHave: [], },
{ input: "行きましょう", mustHave: [ Inflection.Desirable.Volitional, Inflection.Polite.Masu ], mustNotHave: [], },
@@ -76,9 +75,10 @@ export default [
{ input: "聞きなさい", mustHave: [ Inflection.Polite.Nasai ], mustNotHave: [], },
{ input: "座りなさい", mustHave: [ Inflection.Polite.Nasai ], mustNotHave: [], },
{ input: "食べさせられる", mustHave: [ Inflection.Passive, Inflection.Causative ], mustNotHave: [], },
+ { input: "見極めなければならない", mustHave: [ Inflection.Obligatory, Inflection.Affirmative ], mustNotHave: [ Inflection.Conditional.Ba, Inflection.Negative ] }
// TODO: りゃ for いることは
// TODO: じゃ for では
// TODO: なきゃ + なくちゃ
// and more!
-] as Test[];
+] satisfies Test[] as Test[];
diff --git a/test/deinflection/test.ts b/test/deinflection/test.ts
index 017a5c7..1d2f172 100644
--- a/test/deinflection/test.ts
+++ b/test/deinflection/test.ts
@@ -1,17 +1,18 @@
import cases from "./cases.ts";
import { core } from '../base.ts';
-import { TokenTag } from '../../language/tags.ts';
+import { TokenTag } from "../../search/tags.ts";
-cases.forEach(({ input, mustHave, mustNotHave, forceID }) => {
+cases.forEach(({ input, mustHave, mustNotHave, force }) => {
Deno.test(`deinflection - ${input}`, async () => {
- var { tokens } = await core.parseSentence(input);
+ var terms = await core.search.terms(input);
- if (tokens.length == 0)
- throw new Error("No parsed tokens for input");
+ if (terms.length == 0)
+ throw new Error("No parsed terms for input");
- // console.log(tokens);
- var result = tokens.find(t => t.source == input);
- if (forceID) result = tokens.find(t => t.term_id == forceID);
+ // console.log(terms);
+ var result = terms.find(t => t.source == input);
+ if (force)
+ result = terms.find(t => t.reading == force.reading && t.writing == force.writing);
if (!result)
throw new Error("No deconjugation found for input");
diff --git a/util/string.ts b/util/string.ts
index b362f06..4704d03 100644
--- a/util/string.ts
+++ b/util/string.ts
@@ -1,4 +1,4 @@
-import { TokenTags, parseTags } from "../language/tags.ts";
+import { TokenTags, parseTags } from "../search/tags.ts";
import { Wrapper } from "./wrap.ts";
declare global {