diff options
author | lonkaars <loek@pipeframe.xyz> | 2023-07-02 20:27:46 +0200 |
---|---|---|
committer | lonkaars <loek@pipeframe.xyz> | 2023-07-02 20:27:46 +0200 |
commit | fa68eb21bf1d0460303cae3a0233e2d6cefd00ca (patch) | |
tree | 2a7026d2ed3710f6b455fd867f8258452100136e | |
parent | f4963b89ee542592e9ae95ca29d74ddc57841c3f (diff) |
broken furigana adder :(
-rw-r--r-- | api/japanese.ts | 45 | ||||
-rw-r--r-- | api/sentence-word.ts | 11 | ||||
-rw-r--r-- | api/sentence.ts | 14 | ||||
-rw-r--r-- | api/word.ts | 27 | ||||
-rw-r--r-- | examples/furigana-html.ts | 2 | ||||
-rw-r--r-- | language/parser.ts | 3 | ||||
-rw-r--r-- | language/types.ts | 8 | ||||
-rw-r--r-- | util/string.ts | 20 |
8 files changed, 82 insertions, 48 deletions
diff --git a/api/japanese.ts b/api/japanese.ts index 0396821..9319cdd 100644 --- a/api/japanese.ts +++ b/api/japanese.ts @@ -4,6 +4,26 @@ import "../util/string.ts"; import "../util/japanese.ts"; import "../util/array.ts"; +const formatters = { + "HTML": tokens => tokens.reduce((out, token) => { + if (token.ruby) out += `<ruby>${escape(token.writing)}<rt>${escape(token.reading)}</rt></ruby>`; + else out += token.writing; + return out; + }, ""), + "parenthesis": tokens => tokens.reduce((out, token) => { + if (token.ruby) out += `${token.writing}(${token.reading}) `; + else out += token.writing; + return out; + }, ""), + "refold-tools": tokens => tokens.reduce((out, token) => { + if (token.ruby) out += `[${token.writing}](${token.reading})`; + else out += token.writing; + return out; + }, ""), +} satisfies { [name: string]: (tokens: Array<JapaneseToken>) => string }; + +export type JapaneseFormatter = keyof typeof formatters; + /** @interface Piece */ interface JapaneseToken { /** @prop token writing (kanji/katakana/hiragana) */ @@ -22,24 +42,6 @@ export default class Japanese { public reading: string; private normalized: string; - private formatters = { - "HTML": tokens => tokens.reduce((out, token) => { - if (token.ruby) out += `<ruby>${escape(token.writing)}<rt>${escape(token.reading)}</rt></ruby>`; - else out += token.writing; - return out; - }, ""), - "parenthesis": tokens => tokens.reduce((out, token) => { - if (token.ruby) out += `${token.writing}(${token.reading}) `; - else out += token.writing; - return out; - }, ""), - "refold-tools": tokens => tokens.reduce((out, token) => { - if (token.ruby) out += `[${token.writing}](${token.reading})`; - else out += token.writing; - return out; - }, ""), - } satisfies Record<string, (tokens: Array<JapaneseToken>) => string>; - constructor(writing: string, reading: string) { this.writing = writing; this.reading = reading; @@ -47,8 +49,8 @@ export default class Japanese { } /** @summary format this as text with furigana */ - public furigana(format: keyof typeof this.formatters = "HTML"): string { - return this.formatters[format](this.tokenize()); + public furigana(format: JapaneseFormatter = "HTML"): string { + return formatters[format](this.tokenize()); } /** @@ -151,3 +153,6 @@ export default class Japanese { } } +export type test = keyof typeof Japanese.formatters; + +var gert: test = "HTML"; diff --git a/api/sentence-word.ts b/api/sentence-word.ts deleted file mode 100644 index 6b2ef22..0000000 --- a/api/sentence-word.ts +++ /dev/null @@ -1,11 +0,0 @@ -import { ParseToken } from "../language/types.ts"; -import Word from "./word.ts"; - -export default class SentenceWord extends Word { - public indexStart: number = 0; - public indexEnd: number = 0; - - constructor(source: string | ParseToken) { - super(); - } -} diff --git a/api/sentence.ts b/api/sentence.ts index 276a6c5..6d9fc6d 100644 --- a/api/sentence.ts +++ b/api/sentence.ts @@ -1,10 +1,10 @@ import { ParseResult } from "../language/types.ts"; import APIBase from "./base.ts"; -import SentenceWord from "./sentence-word.ts"; +import Japanese, { JapaneseFormatter } from "./japanese.ts"; import Word from "./word.ts"; export default class Sentence extends APIBase { - public words: Array<SentenceWord> = []; + public words: Array<Word> = []; protected query?: ParseResult; protected original: string = ""; @@ -33,7 +33,7 @@ export default class Sentence extends APIBase { let token = 0; let i = 0; while (i < this.original.length) { - this.words.push(new SentenceWord(this.query!.tokens[token]).withParent(await this.api)); + this.words.push(new Word(this.query!.tokens[token]).withParent(await this.api)); i += this.query!.tokens[token].source.length; if (i == this.original.length) break; @@ -43,8 +43,14 @@ export default class Sentence extends APIBase { if (this.query!.tokens[token]?.start == i) continue; var remainder = this.original.substring(i, this.query!.tokens[token]?.start); - this.words.push(new SentenceWord(remainder).withParent(await this.api)); + this.words.push(new Word(remainder).withParent(await this.api)); i += remainder.length; } } + + furigana(format: JapaneseFormatter = "HTML"): string { + return this.words.reduce((out, word) => { + return out + word.text.furigana(format); + }, ""); + } } diff --git a/api/word.ts b/api/word.ts index 7eba936..3421f9f 100644 --- a/api/word.ts +++ b/api/word.ts @@ -1,14 +1,31 @@ import Glossary from "./glossary.ts"; import APIBase from "./base.ts"; import { ParseToken } from "../language/types.ts"; +import Japanese from "./japanese.ts"; + +import "../util/string.ts"; export default class Word extends APIBase { - public writing = "TODO"; - public reading = "TODO"; - public conjugated = "TODO"; + /** @prop dictionary form of verb if this word is a verb */ + public base: Japanese; + /** @prop word as written in parent sentence */ + public text: Japanese; - constructor() { - super(); + constructor(input: string | ParseToken) { + super(); + if (typeof input === "string") { + input = input as string; + this.text = new Japanese(input, input); + this.base = this.text; + } else { + input = input as ParseToken; + var common = Math.max(input.writing.cmpLen(input.source), + input.reading.cmpLen(input.source)); + var conjugation = input.source.substring(common); + this.base = new Japanese(input.writing, input.reading); + this.text = new Japanese(input.writing.substring(0, common) + conjugation, + input.reading.substring(0, common) + conjugation); + } } async glossary() { diff --git a/examples/furigana-html.ts b/examples/furigana-html.ts index 5f59b4f..f0ff067 100644 --- a/examples/furigana-html.ts +++ b/examples/furigana-html.ts @@ -13,7 +13,7 @@ var api = new Yomikun(new DirectCoreClient()); var sentence = await api.sentence("日本に来て一番驚いたことは自動販売機の多さだ。"); // Copy the sentence verbatim but add furigana to each word's kanji -var furigana = sentence.furigana({ format: "HTML" }); +var furigana = sentence.furigana("HTML"); console.log(furigana); diff --git a/language/parser.ts b/language/parser.ts index bc80637..bb4ac1e 100644 --- a/language/parser.ts +++ b/language/parser.ts @@ -116,7 +116,8 @@ export default class Parser { const result = results[0]; parseResult.tokens.push({ - reading: [ {"text": result.expression, "ruby": result.reading} ], // TODO: source to reading + separate kaji/kana + writing: result.expression, + reading: result.reading, tags: result.tags, term_id: result.id, source: result.original, diff --git a/language/types.ts b/language/types.ts index 1aeb9da..d3585f8 100644 --- a/language/types.ts +++ b/language/types.ts @@ -5,11 +5,6 @@ export enum ParseDepth { Glossary, }; -export interface TokenReading { - text: string; - ruby?: string; -}; - export interface GlossaryDefinition { }; @@ -20,7 +15,8 @@ export interface Glossary { }; export interface ParseToken { - reading: TokenReading[]; + writing: string; + reading: string; tags: TokenTags; glossary?: Glossary; term_id: number; diff --git a/util/string.ts b/util/string.ts index 327b884..934963e 100644 --- a/util/string.ts +++ b/util/string.ts @@ -32,6 +32,17 @@ declare global { * `mapFn` */ map(mapFn: (char: string) => string): string; + + /** + * @summary return length of the match of searchValue from startIndex (default: 0) + * + * Similar to String.prototype.startsWith, but returns the length of the + * match instead of a boolean true or false. + * + * @param searchString string to search for + * @param position index to search from (0 by default = start of string) + */ + cmpLen(searchString: string, position?: number): number; } } @@ -113,3 +124,12 @@ String.prototype.map = function(mapFn) { return out; } +String.prototype.cmpLen = function(searchString, position = 0) { + let len = 0; + for (let i = 0; i < searchString.length; i++) { + if (i + position >= this.length) break; + if (this[i + position] == searchString[i]) len++; + } + return len; +} + |