diff options
author | lonkaars <loek@pipeframe.xyz> | 2023-07-13 16:39:01 +0200 |
---|---|---|
committer | lonkaars <loek@pipeframe.xyz> | 2023-07-13 16:39:01 +0200 |
commit | 28fb362147358819afeb0f0a5fa7a4ad136499ac (patch) | |
tree | 4c4224f503f66fcea1e25b464daf0f9659a09bab | |
parent | 919cdce6bcf12f564901ea536dae4227d025b031 (diff) |
ignore terms
-rw-r--r-- | api/base.ts | 10 | ||||
-rw-r--r-- | api/sentence.ts | 44 | ||||
-rw-r--r-- | api/word.ts | 76 | ||||
-rw-r--r-- | api/yomikun.ts | 14 | ||||
-rw-r--r-- | core/api.ts | 1 | ||||
-rw-r--r-- | core/raw/api.ts | 6 | ||||
-rw-r--r-- | db/db.ts | 22 | ||||
-rw-r--r-- | db/makefile | 2 | ||||
-rw-r--r-- | db/user/init.sql | 5 | ||||
-rw-r--r-- | examples/reading-correction-break.ts | 2 | ||||
-rw-r--r-- | examples/user-ignore.ts | 20 | ||||
-rw-r--r-- | search/search.ts | 3 | ||||
-rw-r--r-- | test/reading/cases.ts | 6 | ||||
-rw-r--r-- | util/wrap.ts | 4 |
14 files changed, 152 insertions, 63 deletions
diff --git a/api/base.ts b/api/base.ts index e89e76b..28940ef 100644 --- a/api/base.ts +++ b/api/base.ts @@ -1,18 +1,12 @@ -import Core from "../core/api.ts"; import Yomikun from "./yomikun.ts"; /** @summary generic class that keeps a reference to parent API reference */ export default abstract class APIBase { private _resolveAPI: (api: Yomikun) => void = _ => {}; - - protected api: Promise<Yomikun>; - - constructor() { - this.api = new Promise<Yomikun>(res => this._resolveAPI = res); - } + protected api = new Promise<Yomikun>(res => this._resolveAPI = res); /** @summary set API reference and return self (for use directly after constructor) */ - withParent(api: Yomikun) { + withAPI(api: Yomikun) { this._resolveAPI(api); return this; } diff --git a/api/sentence.ts b/api/sentence.ts index 6b1a1e4..2311913 100644 --- a/api/sentence.ts +++ b/api/sentence.ts @@ -10,8 +10,8 @@ export default class Sentence extends APIBase { protected breaks: Array<number> = []; protected frozen = false; - public ready: Promise<void>; private _resolveReady: () => void = () => {}; + public ready = new Promise<void>(res => this._resolveReady = res); constructor(input: string) { super(); @@ -20,7 +20,7 @@ export default class Sentence extends APIBase { } first(searchValue: RegExp | string): Word | undefined { - return this.words[0]; // TODO: implement + return this.at(this.original.search(searchValue)); } private async fetch() { @@ -28,11 +28,15 @@ export default class Sentence extends APIBase { } private async updateWords() { - this.words.clear(); + this.words = []; let token = 0; let i = 0; while (i < this.original.length) { - this.words.push(new Word(this.query!.words[token]).withParent(await this.api)); + this.words.push( + new Word(this.query!.words[token]) + .withAPI(await this.api) + .withParent(this) + ); i += this.query!.words[token].source.length; if (i == this.original.length) break; @@ -42,7 +46,11 @@ export default class Sentence extends APIBase { if (this.query!.words[token]?.start == i) continue; var remainder = this.original.substring(i, this.query!.words[token]?.start); - this.words.push(new Word(remainder).withParent(await this.api)); + this.words.push( + new Word(remainder) + .withAPI(await this.api) + .withParent(this) + ); i += remainder.length; } } @@ -53,8 +61,7 @@ export default class Sentence extends APIBase { }, ""); } - public async update() { - if (this.frozen) return; + public async forceUpdate() { // unresolve ready this.ready = new Promise(res => this._resolveReady = res); @@ -65,12 +72,29 @@ export default class Sentence extends APIBase { // mark ready again this._resolveReady(); + } + + public async update() { + if (this.frozen) return; + await this.forceUpdate(); } - public at(term: string) { - return this.original.indexOf(term); + public indexOf(searchString: string, position: number = 0) { + return this.original.indexOf(searchString, position); } + public at(indentifier: number | string): Word | undefined { + var index = typeof indentifier === "number" ? indentifier : this.indexOf(indentifier); + if (index == -1) return; + let wordIndex = 0; + for (let i = 0; wordIndex < this.words.length; wordIndex++) { + var length = this.words[wordIndex].length; + if (i + length > index) break; + i += length; + } + return this.words[wordIndex]; + } + public async break(location: number) { this.breaks.push(location); await this.update(); @@ -82,6 +106,6 @@ export default class Sentence extends APIBase { public async unfreeze() { this.frozen = false; - await this.update(); + await this.forceUpdate(); } } diff --git a/api/word.ts b/api/word.ts index 2e07c98..b519789 100644 --- a/api/word.ts +++ b/api/word.ts @@ -6,33 +6,33 @@ import "../util/object.ts"; import { Tag, TagGroup, TokenTags } from "../search/tags.ts"; import { SearchWord } from "../search/types.ts"; import { recursiveValues } from "../util/object.ts"; +import Sentence from "./sentence.ts"; + +// TODO: better irregular reading handling (should also work for counter words / 入る) // irregular stems taken from <https://en.wikipedia.org/wiki/Japanese_irregular_verbs#suru_and_kuru> -function irregularSuruStem(tags: TokenTags): string { +function irregularSuru(tags: TokenTags, conjugation: string): string { for (let i = 0, tag = tags[i]; i < tags.length; i++, tag = tags[i]) { if (!recursiveValues(Tag.Inflection).includes(tag)) continue; if (recursiveValues(Tag.Inflection.Reason).includes(tag)) continue; if ([ Tag.Inflection.Polite.Masu, Tag.Inflection.Suffix.Te, - Tag.Inflection.Tense.Past, Tag.Inflection.Desirable.Itai, // part of Wikipedia's -ta form - Tag.Inflection.Negative, - Tag.Inflection.Desirable.Volitional, Tag.Inflection.Command, - ].includes(tag as any)) return "し"; + ].includes(tag as any)) return "し" + conjugation; if ([ Tag.Inflection.Passive, Tag.Inflection.Causative, - ].includes(tag as any)) return "さ"; + ].includes(tag as any)) return "さ" + conjugation; // wikipedia has できる as the potential form for する, but できる here // means it's already foobar'd break; } - return "す"; + return conjugation; } -function irregularKuruStem(tags: TokenTags): string { +function irregularKuru(tags: TokenTags, conjugation: string): string { for (let i = 0, tag = tags[i]; i < tags.length; i++, tag = tags[i]) { if (!recursiveValues(Tag.Inflection).includes(tag)) continue; if (recursiveValues(Tag.Inflection.Reason).includes(tag)) continue; @@ -41,7 +41,7 @@ function irregularKuruStem(tags: TokenTags): string { Tag.Inflection.Suffix.Te, Tag.Inflection.Tense.Past, Tag.Inflection.Desirable.Itai, // part of Wikipedia's -ta form - ].includes(tag as any)) return "き"; + ].includes(tag as any)) return "き" + conjugation; if ([ Tag.Inflection.Negative, Tag.Inflection.Desirable.Volitional, @@ -49,10 +49,10 @@ function irregularKuruStem(tags: TokenTags): string { Tag.Inflection.Causative, Tag.Inflection.Potential, Tag.Inflection.Command, - ].includes(tag as any)) return "こ"; + ].includes(tag as any)) return "こ" + conjugation; break; } - return "く"; + return "く" + conjugation; } export default class Word extends APIBase { @@ -60,11 +60,22 @@ export default class Word extends APIBase { protected base: Japanese; /** @prop word as written in parent sentence */ protected text: Japanese; - /** @prop this.furigana should output kanji with reading */ - protected outputKanji: boolean = true; /** @prop this word represents an unrecognized sentence part between recognized terms */ protected filler: boolean; + private _resolveParent: (sentence: Sentence) => void = _ => {}; + /** @prop parent sentence */ + protected parent = new Promise<Sentence>(res => this._resolveParent = res); + + /** @prop length of word in sentence */ + public length: number; + /** @prop (conjugated) writing of term (*may* contain kanji) */ + public writing: string; + /** @prop (conjugated) reading of term (kana-only) */ + public reading: string; + /** @prop dictionary id for term */ + public id: number = -1; + constructor(input: string | SearchWord) { super(); if (typeof input === "string") { @@ -72,7 +83,6 @@ export default class Word extends APIBase { input = input as string; this.base = new Japanese(input, input); this.text = this.base; - this.outputKanji = false; } else { this.filler = false; input = input as SearchWord; @@ -87,26 +97,50 @@ export default class Word extends APIBase { // special reading for irregular verbs var reading = input.reading; - if (input.writing == '来る') reading = irregularKuruStem(input.tags) + conjugation; - else if (input.writing == '為る') reading = irregularSuruStem(input.tags) + conjugation; + if (input.writing == '来る') reading = irregularKuru(input.tags, conjugation); + else if (input.writing == '為る') reading = irregularSuru(input.tags, conjugation); else reading = reading.replaceLast(base, conjugation); // generate conjugated version of verb with kanji - this.text = new Japanese(input.writing.replaceLast(base, conjugation), reading); + this.text = new Japanese(input.source, reading); } else { this.text = this.base; } - this.outputKanji = input.source.hasKanji(); // only output kanji if input also uses kanji + this.id = input.id; } + this.writing = this.text.writing; + this.length = this.text.writing.length; + this.reading = this.text.reading; } furigana(format: JapaneseFormatter) { - if (!this.outputKanji) return this.text.reading; - else return this.text.furigana(format); + return this.text.furigana(format); } async glossary() { // TODO: output nothing if this.filler == true - return new Glossary().withParent(await this.api); + return new Glossary().withAPI(await this.api); } + + /** @summary check if this word is written as ~ */ + public written(as: string) { + return this.text.writing == as || this.base.writing == as; + } + + /** @summary check if this word is read as ~ */ + public read(as: string) { + return this.text.reading == as || this.base.reading == as; + } + + /** @summary ignore this word for currently logged in user and refresh the sentence */ + async ignore() { + await (await this.api)["setTermPriority"](this.base.writing, this.base.reading, -1); + await (await this.parent).update(); + } + + /** @summary set parent sentence for this word */ + public withParent(parent: Sentence) { + this._resolveParent(parent); + return this; + } } diff --git a/api/yomikun.ts b/api/yomikun.ts index 696361f..2b102fe 100644 --- a/api/yomikun.ts +++ b/api/yomikun.ts @@ -4,6 +4,9 @@ import Sentence from "./sentence.ts"; export default class Yomikun { protected core: Core; + protected user: string = "root"; + protected uid: number = 0; + public ready: Promise<void>; constructor(core?: Core) { @@ -16,9 +19,18 @@ export default class Yomikun { } async sentence(input: string): Promise<Sentence> { - var sentence = new Sentence(input).withParent(this); + var sentence = new Sentence(input).withAPI(this); await sentence.ready; return sentence; } + + public su(user: string) { + this.user = user; + // TODO: set this.uid + } + + private async setTermPriority(expression: string, reading: string, priority: number) { + this.core.user.termPriority(this.uid, expression, reading, priority); + } } diff --git a/core/api.ts b/core/api.ts index 77195b2..6fac346 100644 --- a/core/api.ts +++ b/core/api.ts @@ -10,6 +10,7 @@ export interface CoreSearch { /** @interface user management */ export interface CoreUser { + termPriority(userID: number, expression: string, reading: string, priority: number): Promise<void>; // TODO: list // TODO: add // TODO: remove diff --git a/core/raw/api.ts b/core/raw/api.ts index 76cdb5b..47e9075 100644 --- a/core/raw/api.ts +++ b/core/raw/api.ts @@ -33,7 +33,11 @@ export default class RawCore implements Core { }, }; - public user: CoreUser = {}; + public user: CoreUser = { + termPriority: async (uid, expression, reading, priority) => { + this._db.termPriority(uid, expression, reading, priority); + }, + }; public import: CoreImport = {}; @@ -47,7 +47,7 @@ interface DBFindResult { * const results = db.findTerm("なった"); */ export default class DB { - private connection: Database; + private connection: Database = new Database(":memory:", { create: false }); public ready: Promise<void>; private here = path.dirname(path.fromFileUrl(import.meta.url)); private paths = { @@ -59,22 +59,19 @@ export default class DB { find: path.resolve(this.here, 'find.sql'), }, } as const; - private statement: { - attach: Statement; - queryTerm: Statement; - }; + private statement = { + attach: this.connection.prepare("attach database ? as ?"), + queryTerm: this.connection.prepare(""), + termPriority: this.connection.prepare(""), + } satisfies { [name: string]: Statement }; constructor() { - this.connection = new Database(":memory:", { create: false }); - this.statement = { - attach: this.connection.prepare("attach database ? as ?"), - queryTerm: this.connection.prepare(""), - }; this.attach(this.paths.db.dict, 'dict'); this.attach(this.paths.db.user, 'user'); this.ready = new Promise<void>(async resolve => { const statement = await Deno.readTextFile(this.paths.query.find); this.statement.queryTerm = this.connection.prepare(statement); + this.statement.termPriority = this.connection.prepare("insert into sort_overlay (user_id, expression, reading, sort) values (?, ?, ?, ?)"); resolve(); }); } @@ -104,4 +101,9 @@ export default class DB { }); return terms; } + + async termPriority(userID: number, expression: string, reading: string, priority: number) { + await this.ready; + this.statement.termPriority.run(userID, expression, reading, priority); + } }; diff --git a/db/makefile b/db/makefile index 88d4bba..3968ae7 100644 --- a/db/makefile +++ b/db/makefile @@ -32,7 +32,7 @@ dict/dict.sql: $(DEFAULT_DICTS) user/base.sql: user/reset.sql user/init.sql cat $^ > $@ -user/full.sql: user/base.sql user/root.sql +user/full.sql: user/base.sql # user/root.sql cat $^ > $@ %.sql: %.dict.sql $(DICT_TEMPLATE) diff --git a/db/user/init.sql b/db/user/init.sql index 1d0e830..af60fa3 100644 --- a/db/user/init.sql +++ b/db/user/init.sql @@ -4,6 +4,8 @@ create table if not exists user ( unique(username) ); +insert into user (id, username) values (0, 'root'); + create table if not exists sort_overlay ( id integer primary key autoincrement, user_id int not null default 0, @@ -14,3 +16,6 @@ create table if not exists sort_overlay ( unique(user_id, expression, reading) on conflict replace ); +create index sort_overlay_expression on sort_overlay (expression); +create index sort_overlay_reading on sort_overlay (reading); + diff --git a/examples/reading-correction-break.ts b/examples/reading-correction-break.ts index a72e545..3c45d27 100644 --- a/examples/reading-correction-break.ts +++ b/examples/reading-correction-break.ts @@ -14,7 +14,7 @@ var sentence = await api.sentence("やっぱりこの辺にある武器も朽ち console.log(sentence.furigana()); // insert parser break in the middle of a (wrong) expression -await sentence.break(sentence.at("この辺") + 2); +await sentence.break(sentence.indexOf("この辺") + 2); // generated reading (correct) console.log(sentence.furigana()); diff --git a/examples/user-ignore.ts b/examples/user-ignore.ts index d0abbe3..f15baed 100644 --- a/examples/user-ignore.ts +++ b/examples/user-ignore.ts @@ -13,24 +13,26 @@ api.su("gert"); // Lookup sentence var sentence = await api.sentence("浮上したハイラル城の下にてゼルダ様達の捜索を行うこととなった"); +// Get reading for sentence +console.log(sentence.furigana()); + // Freeze disables automatic updating of words after database mutations. It's // used here because there are multiple sequential updates to the database. -sentence.freeze(); +await sentence.freeze(); // Ignore some expressions in JMdict (ignore applies to current user = gert in -// this case) -sentence.words.find(w => w.written("達") && w.read("だち"))?.ignore(); // wrong reading for this case -sentence.at("の下に").ignore(); // expression のもとに -sentence.at("下に").ignore(); // expression したに +// this case) (TODO: this only works on a fresh database) +await sentence.words.find(w => w.written("達") && w.read("だち"))?.ignore(); // wrong reading for this case +await sentence.at("の下に")?.ignore(); // expression のもとに +await sentence.forceUpdate(); // manual update to get last wrong term +await sentence.at("下に")?.ignore(); // expression したに // TODO: 達(だち) should not have to be ignored, but scored lower following // rendaku rules. <https://en.wikipedia.org/wiki/Rendaku> // Unfreeze allows updates again and implicitly calls .update() -sentence.unfreeze(); +await sentence.unfreeze(); // Get new reading for sentence -var furigana = sentence.furigana(); - -console.log(furigana); +console.log(sentence.furigana()); diff --git a/search/search.ts b/search/search.ts index 57bd0e2..89c8289 100644 --- a/search/search.ts +++ b/search/search.ts @@ -36,6 +36,7 @@ export default class Search { // ignore ignored by user terms if (result.sort < 0) return false; + // deconjugated words if (result.depth > 0) { // check if this word can be conjugated at all @@ -47,7 +48,7 @@ export default class Search { if (result.tags.includes(Tag.Class.Verb.Ru) && !result.tags.includes(Tag.Inflection.Reason.Ru)) return false; if (result.tags.includes(Tag.Class.Verb.Suru) && - !result.tags.includes(Tag.Inflection.Reason.Suru)) return false; + !result.tags.anyOf([ Tag.Inflection.Reason.Suru, Tag.Class.Verb.SuruIncluded ])) return false; if (result.tags.includes(Tag.Class.Adjective.I) && !result.tags.includes(Tag.Inflection.Reason.Adjective.I)) return false; if (result.tags.includes(Tag.Class.Adjective.Na) && diff --git a/test/reading/cases.ts b/test/reading/cases.ts index e6b0787..19221a9 100644 --- a/test/reading/cases.ts +++ b/test/reading/cases.ts @@ -9470,4 +9470,10 @@ export default [ output: "ええ~ デート?\n[違](ちが)わい!", tags: [ "お兄ちゃんはおしまい!" ] }, + { // https://youtu.be/ngmhaKdVOeY?t=30 + input: "ぜひ目次を見ながら欲しいものを探していってもらえると嬉しいです", + reading: "ぜひもくじをみながらほしいものをさがしていってもらえるとうれしいです", + output: "ぜひ[目次](もくじ)を[見](み)ながら[欲](ほ)しいものを[探](さが)していってもらえると[嬉](うれ)しいです", + tags: [ "YouTube", "トバログ" ] + }, ] satisfies Test[]; diff --git a/util/wrap.ts b/util/wrap.ts index 761a1f0..d616a23 100644 --- a/util/wrap.ts +++ b/util/wrap.ts @@ -32,6 +32,10 @@ export const Wrap = { ruby: WrapTag("ruby"), /** @prop \<rt>input\</rt> */ rubyText: WrapTag("rt"), + /** @prop \<p>input\</p> */ + paragraph: WrapTag("p"), + /** @prop \<pre>input\</pre> */ + preformatted: WrapTag("pre"), }, /** @prop \*input\* */ asterisk: WrapWith("*"), |