From 8e179a43e909ce4683f753a90bb3505630f05ad8 Mon Sep 17 00:00:00 2001 From: lonkaars Date: Sat, 15 Jul 2023 21:52:57 +0200 Subject: implement alternate writings (failing tests down to 500) --- search/readme.md | 16 ++++++++-------- search/search.ts | 2 +- search/tags.ts | 20 ++++++++------------ 3 files changed, 17 insertions(+), 21 deletions(-) (limited to 'search') diff --git a/search/readme.md b/search/readme.md index 400c8ce..164dc9f 100644 --- a/search/readme.md +++ b/search/readme.md @@ -23,11 +23,11 @@ to Yomikun's tags for compatibility. Other tags include: ### Behavior-altering tags -Some tag classes impact the parser's behavior. For example, the input text +A word's class can impact the parser's behavior. For example, the input text 「完了しました」 will be parsed as just 「完了」, but with the -`class:verb:suru-included` tag added by the parser. This is because the word +inflection `infl:suru` tag added by the parser. This is because the word 「完了」 has the tag `class:verb:suru` in the database, which allows the parser -to deconjugate a noun with the verb 「する」 back into the stem. +to deconjugate a noun with the verb 「する」 attached back into the stem. Other uses of this behavior include more accurate automatic kanji reading generation, for example 「城」 being read as 「じょう」 in 「ハイラル城」 @@ -35,11 +35,11 @@ because 「ハイラル」 has the tag `name:place` in the database, and 「城(じょう)」 has `class:suffix`, while 「城(しろ)」 has `class:noun`. Yomikun encourages homebrew dictionary sharing, and encourages using -behavior-altering tags for fixing readings for cases like the above examples. -As another example of this, it is encouraged that a dictionary for (for -example) Zelda add 「トト」 as a term with tags `class:noun` and `name:place`, -instead of 「トト湖(こ)」 as an expression to fix the reading of the kanji -「湖(みずうみ)」. +behavior-altering tags instead of expressions for fixing readings for cases +like the above examples. As another example of this, it is encouraged that a +dictionary for (for example) Zelda add 「トト」 as a term with tags +`class:noun` and `name:place`, instead of 「トト湖(こ)」 as an expression to +fix the reading of the kanji 「湖(みずうみ)」. If Yomikun doesn't generate the correct reading, and the reading isn't based on natural language context (=a computer *could* accurately decide which reading diff --git a/search/search.ts b/search/search.ts index 89c8289..81ca937 100644 --- a/search/search.ts +++ b/search/search.ts @@ -48,7 +48,7 @@ export default class Search { if (result.tags.includes(Tag.Class.Verb.Ru) && !result.tags.includes(Tag.Inflection.Reason.Ru)) return false; if (result.tags.includes(Tag.Class.Verb.Suru) && - !result.tags.anyOf([ Tag.Inflection.Reason.Suru, Tag.Class.Verb.SuruIncluded ])) return false; + !result.tags.anyOf([ Tag.Inflection.Reason.Suru, Tag.Inflection.Suru ])) return false; if (result.tags.includes(Tag.Class.Adjective.I) && !result.tags.includes(Tag.Inflection.Reason.Adjective.I)) return false; if (result.tags.includes(Tag.Class.Adjective.Na) && diff --git a/search/tags.ts b/search/tags.ts index 32ce02f..0f21eea 100644 --- a/search/tags.ts +++ b/search/tags.ts @@ -6,22 +6,12 @@ export const Tag = { Class: { /** @constant verb subgroup */ Verb: { - /** @constant noun that can be conjugated into a verb by adding する and する itself */ - Suru: "class:verb:suru", - /** - * @constant verb stored as conjugated noun in database (nominal verb) - * - * @deprecated The use of conjugated forms in dictionaries is discouraged. - * - * This tag is added by the deconjugation code to check for a legal - * deconjugation if する has been deconjugated away for a word marked - * suru-verb. - */ - SuruIncluded: "class:verb:suru-included", /** @constant 〜う verbs in [taekim] (godan) */ U: "class:verb:u", /** @constant 〜る verbs in [taekim] (ichidan) */ Ru: "class:verb:ru", + /** @constant noun that can be conjugated into a verb by adding する and する itself */ + Suru: "class:verb:suru", /** @constant kuru (来る) */ Kuru: "class:verb:kuru", }, @@ -47,6 +37,10 @@ export const Tag = { Expression: "class:expr", /** @constant adverbs (e.g. 早く) */ Adverb: "class:adverb", + Special: { // TODO: remove or start using instead of hardcoding する and くる + Suru: "class:special:suru", + Kuru: "class:special:suru", + } }, /** @constant types of names */ Name: { @@ -148,6 +142,8 @@ export const Tag = { /** @constant 〜とする attempts (e.g. 入ろうとしている) */ ToSuru: "infl:attempt:tosuru", }, + /** @constant suru verbs conjugated with 〜する (e.g. 説明する) */ + Suru: "infl:suru", }, /** @constant uncategorized tags */ Auxiliary: { -- cgit v1.2.3