broken furigana adder :(

author: lonkaars <loek@pipeframe.xyz> 2023-07-02 20:27:46 +0200
committer: lonkaars <loek@pipeframe.xyz> 2023-07-02 20:27:46 +0200
commit: fa68eb21bf1d0460303cae3a0233e2d6cefd00ca (patch)
tree: 2a7026d2ed3710f6b455fd867f8258452100136e
parent: f4963b89ee542592e9ae95ca29d74ddc57841c3f (diff)
8 files changed, 82 insertions, 48 deletions
diff --git a/api/japanese.ts b/api/japanese.ts
index 0396821..9319cdd 100644
--- a/api/japanese.ts
+++ b/api/japanese.ts
@@ -4,6 +4,26 @@ import "../util/string.ts";
 import "../util/japanese.ts";
 import "../util/array.ts";
 
+const formatters = {
+	"HTML": tokens => tokens.reduce((out, token) => {
+		if (token.ruby) out += `<ruby>${escape(token.writing)}<rt>${escape(token.reading)}</rt></ruby>`;
+		else out += token.writing;
+		return out;
+	}, ""),
+	"parenthesis": tokens => tokens.reduce((out, token) => {
+		if (token.ruby) out += `${token.writing}(${token.reading}) `;
+		else out += token.writing;
+		return out;
+	}, ""),
+	"refold-tools": tokens => tokens.reduce((out, token) => {
+		if (token.ruby) out += `[${token.writing}](${token.reading})`;
+		else out += token.writing;
+		return out;
+	}, ""),
+} satisfies { [name: string]: (tokens: Array<JapaneseToken>) => string };
+
+export type JapaneseFormatter = keyof typeof formatters;
+
 /** @interface Piece */
 interface JapaneseToken {
 	/** @prop token writing (kanji/katakana/hiragana) */
@@ -22,24 +42,6 @@ export default class Japanese {
 	public reading: string;
 	private normalized: string;
 
-	private formatters = {
-		"HTML": tokens => tokens.reduce((out, token) => {
-			if (token.ruby) out += `<ruby>${escape(token.writing)}<rt>${escape(token.reading)}</rt></ruby>`;
-			else out += token.writing;
-			return out;
-		}, ""),
-		"parenthesis": tokens => tokens.reduce((out, token) => {
-			if (token.ruby) out += `${token.writing}(${token.reading}) `;
-			else out += token.writing;
-			return out;
-		}, ""),
-		"refold-tools": tokens => tokens.reduce((out, token) => {
-			if (token.ruby) out += `[${token.writing}](${token.reading})`;
-			else out += token.writing;
-			return out;
-		}, ""),
-	} satisfies Record<string, (tokens: Array<JapaneseToken>) => string>;
-
 	constructor(writing: string, reading: string) {
 		this.writing = writing;
 		this.reading = reading;
@@ -47,8 +49,8 @@ export default class Japanese {
 	}
 
 	/** @summary format this as text with furigana */
-	public furigana(format: keyof typeof this.formatters = "HTML"): string {
-		return this.formatters[format](this.tokenize());
+	public furigana(format: JapaneseFormatter = "HTML"): string {
+		return formatters[format](this.tokenize());
 	}
 
 	/**
@@ -151,3 +153,6 @@ export default class Japanese {
 	}
 }
 
+export type test = keyof typeof Japanese.formatters;
+
+var gert: test = "HTML";
diff --git a/api/sentence-word.ts b/api/sentence-word.ts
deleted file mode 100644
index 6b2ef22..0000000
--- a/api/sentence-word.ts
+++ /dev/null
@@ -1,11 +0,0 @@
-import { ParseToken } from "../language/types.ts";
-import Word from "./word.ts";
-
-export default class SentenceWord extends Word {
-  public indexStart: number = 0;
-  public indexEnd: number = 0;
-
-  constructor(source: string | ParseToken) {
-    super();
-  }
-}
diff --git a/api/sentence.ts b/api/sentence.ts
index 276a6c5..6d9fc6d 100644
--- a/api/sentence.ts
+++ b/api/sentence.ts
@@ -1,10 +1,10 @@
 import { ParseResult } from "../language/types.ts";
 import APIBase from "./base.ts";
-import SentenceWord from "./sentence-word.ts";
+import Japanese, { JapaneseFormatter } from "./japanese.ts";
 import Word from "./word.ts";
 
 export default class Sentence extends APIBase {
-  public words: Array<SentenceWord> = [];
+  public words: Array<Word> = [];
 	protected query?: ParseResult;
 	protected original: string = "";
 
@@ -33,7 +33,7 @@ export default class Sentence extends APIBase {
 		let token = 0;
 		let i = 0;
 		while (i < this.original.length) {
-			this.words.push(new SentenceWord(this.query!.tokens[token]).withParent(await this.api));
+			this.words.push(new Word(this.query!.tokens[token]).withParent(await this.api));
 
 			i += this.query!.tokens[token].source.length;
 			if (i == this.original.length) break;
@@ -43,8 +43,14 @@ export default class Sentence extends APIBase {
 			if (this.query!.tokens[token]?.start == i) continue;
 			var remainder = this.original.substring(i, this.query!.tokens[token]?.start);
 
-			this.words.push(new SentenceWord(remainder).withParent(await this.api));
+			this.words.push(new Word(remainder).withParent(await this.api));
 			i += remainder.length;
 		}
 	}
+
+	furigana(format: JapaneseFormatter = "HTML"): string {
+		return this.words.reduce((out, word) => {
+			return out + word.text.furigana(format);
+		}, "");
+	}
 }
diff --git a/api/word.ts b/api/word.ts
index 7eba936..3421f9f 100644
--- a/api/word.ts
+++ b/api/word.ts
@@ -1,14 +1,31 @@
 import Glossary from "./glossary.ts";
 import APIBase from "./base.ts";
 import { ParseToken } from "../language/types.ts";
+import Japanese from "./japanese.ts";
+
+import "../util/string.ts";
 
 export default class Word extends APIBase {
-  public writing = "TODO";
-  public reading = "TODO";
-	public conjugated = "TODO";
+	/** @prop dictionary form of verb if this word is a verb */
+	public base: Japanese;
+	/** @prop word as written in parent sentence */
+	public text: Japanese;
 
-  constructor() {
-    super();
+  constructor(input: string | ParseToken) {
+		super();
+		if (typeof input === "string") {
+			input = input as string;
+			this.text = new Japanese(input, input);
+			this.base = this.text;
+		} else {
+			input = input as ParseToken;
+			var common = Math.max(input.writing.cmpLen(input.source),
+														input.reading.cmpLen(input.source));
+			var conjugation = input.source.substring(common);
+			this.base = new Japanese(input.writing, input.reading);
+			this.text = new Japanese(input.writing.substring(0, common) + conjugation,
+															 input.reading.substring(0, common) + conjugation);
+		}
   }
 
   async glossary() {
diff --git a/examples/furigana-html.ts b/examples/furigana-html.ts
index 5f59b4f..f0ff067 100644
--- a/examples/furigana-html.ts
+++ b/examples/furigana-html.ts
@@ -13,7 +13,7 @@ var api = new Yomikun(new DirectCoreClient());
 var sentence = await api.sentence("日本に来て一番驚いたことは自動販売機の多さだ。");
 
 // Copy the sentence verbatim but add furigana to each word's kanji
-var furigana = sentence.furigana({ format: "HTML" });
+var furigana = sentence.furigana("HTML");
 
 console.log(furigana);
 
diff --git a/language/parser.ts b/language/parser.ts
index bc80637..bb4ac1e 100644
--- a/language/parser.ts
+++ b/language/parser.ts
@@ -116,7 +116,8 @@ export default class Parser {
 			const result = results[0];
 
 			parseResult.tokens.push({
-				reading: [ {"text": result.expression, "ruby": result.reading} ], // TODO: source to reading + separate kaji/kana
+				writing: result.expression,
+				reading: result.reading,
 				tags: result.tags,
 				term_id: result.id,
 				source: result.original,
diff --git a/language/types.ts b/language/types.ts
index 1aeb9da..d3585f8 100644
--- a/language/types.ts
+++ b/language/types.ts
@@ -5,11 +5,6 @@ export enum ParseDepth {
 	Glossary,
 };
 
-export interface TokenReading {
-	text: string;
-	ruby?: string;
-};
-
 export interface GlossaryDefinition {
 	
 };
@@ -20,7 +15,8 @@ export interface Glossary {
 };
 
 export interface ParseToken {
-	reading: TokenReading[];
+	writing: string;
+	reading: string;
 	tags: TokenTags;
 	glossary?: Glossary;
 	term_id: number;
diff --git a/util/string.ts b/util/string.ts
index 327b884..934963e 100644
--- a/util/string.ts
+++ b/util/string.ts
@@ -32,6 +32,17 @@ declare global {
 		 * `mapFn`
 		 */
 		map(mapFn: (char: string) => string): string;
+
+		/**
+		 * @summary return length of the match of searchValue from startIndex (default: 0)
+		 *
+		 * Similar to String.prototype.startsWith, but returns the length of the
+		 * match instead of a boolean true or false.
+		 *
+		 * @param searchString  string to search for
+		 * @param position  index to search from (0 by default = start of string)
+		 */
+		cmpLen(searchString: string, position?: number): number;
 	}
 }
 
@@ -113,3 +124,12 @@ String.prototype.map = function(mapFn) {
 	return out;
 }
 
+String.prototype.cmpLen = function(searchString, position = 0) {
+	let len = 0;
+	for (let i = 0; i < searchString.length; i++) {
+		if (i + position >= this.length) break;
+		if (this[i + position] == searchString[i]) len++;
+	}
+	return len;
+}
+
author	lonkaars <loek@pipeframe.xyz>	2023-07-02 20:27:46 +0200
committer	lonkaars <loek@pipeframe.xyz>	2023-07-02 20:27:46 +0200
commit	fa68eb21bf1d0460303cae3a0233e2d6cefd00ca (patch)
tree	2a7026d2ed3710f6b455fd867f8258452100136e
parent	f4963b89ee542592e9ae95ca29d74ddc57841c3f (diff)