From 92bc1ed78859984486336d95641ddbdca8d02841 Mon Sep 17 00:00:00 2001 From: lonkaars Date: Fri, 7 Jul 2023 00:50:25 +0200 Subject: small updates --- language/parser.ts | 6 +++--- language/readme.md | 4 ++-- language/tags.ts | 23 ++++++++++++++--------- readme.md | 9 ++++++--- test/deinflection/cases.ts | 8 ++++++-- test/reading/cases.ts | 10 +++++++++- 6 files changed, 40 insertions(+), 20 deletions(-) diff --git a/language/parser.ts b/language/parser.ts index 6398595..7fd3981 100644 --- a/language/parser.ts +++ b/language/parser.ts @@ -30,7 +30,7 @@ export default class Parser { depth: optional?.depth ?? ParseDepth.Term, priorityMod: { high: optional?.priorityMod?.high ?? 10, - low: optional?.priorityMod?.low ?? 0.1, + low: optional?.priorityMod?.low ?? -10, }, breaks: optional?.breaks ?? [], } @@ -95,12 +95,12 @@ export default class Parser { // give higher priority to suffixes when last token was a name, else lower priority if (result.tags.includes(Tag.Class.Suffix)) - result.sort *= lastTokenName ? options.priorityMod.high : options.priorityMod.low; + result.sort += lastTokenName ? options.priorityMod.high : options.priorityMod.low; // give lower priority to terms matched only by their readings, and are // usually written in kanji if (!result.tags.includes(Tag.Auxiliary.UsuallyKana) && !result.match.kanji) - result.sort *= options.priorityMod.low; + result.sort += options.priorityMod.low; return result; }); diff --git a/language/readme.md b/language/readme.md index c889c9d..99a7d69 100644 --- a/language/readme.md +++ b/language/readme.md @@ -30,7 +30,7 @@ Some tag classes impact the parser's behavior. For example, the input text to deconjugate a noun with the verb 「する」 back into the stem. Other uses of this behavior include more accurate automatic kanji reading -generation, for example 「城」 b:ing read as 「じょう」 in 「ハイラル城」 +generation, for example 「城」 being read as 「じょう」 in 「ハイラル城」 because 「ハイラル」 has the tag `name:place` in the database, and 「城(じょう)」 has `class:suffix`, while 「城(しろ)」 has `class:noun`. @@ -44,7 +44,7 @@ instead of 「トト湖(こ)」 as an expression to fix the reading of the kanji If Yomikun doesn't generate the correct reading, and the reading isn't based on natural language context (=a computer *could* accurately decide which reading is correct based on other words/tags in the sentence), please submit a pull -request with the sentence and it's (expected) reading. An example of a +request with the sentence and its (expected) reading. An example of a non-deterministic reading is 「何」 in the sentence 「何できた?」 which can be read as both 「なん」 in which case 「何で」 turns into a single word, or 「なに」 where 「何」 is a regular word and 「で」 is particle. diff --git a/language/tags.ts b/language/tags.ts index 4205e72..7f5757f 100644 --- a/language/tags.ts +++ b/language/tags.ts @@ -6,18 +6,21 @@ export const Tag = { Class: { /** @constant verb subgroup */ Verb: { - /** @constant noun that can be conjugated into a verb by adding する */ + /** @constant noun that can be conjugated into a verb by adding する and する itself */ Suru: "class:verb:suru", /** - * @constant verb stored as conjugated noun in database + * @constant verb stored as conjugated noun in database (nominal verb) * - * some dictionaries do this, also used internally to represent - * conjugation if found for suru-verb + * @deprecated The use of conjugated forms in dictionaries is discouraged. + * + * This tag is added by the deconjugation code to check for a legal + * deconjugation if する has been deconjugated away for a word marked + * suru-verb. */ SuruIncluded: "class:verb:suru-included", - /** @constant godan verbs (〜う in [taekim]) */ + /** @constant 〜う verbs in [taekim] (godan) */ U: "class:verb:u", - /** @constant ichidan verbs (〜る in [taekim]) */ + /** @constant 〜る verbs in [taekim] (ichidan) */ Ru: "class:verb:ru", /** @constant kuru (来る) */ Kuru: "class:verb:kuru", @@ -101,12 +104,14 @@ export const Tag = { Na: "infl:reason:adj:na", }, }, - /** @constant passive form (e.g. 言われる) */ + /** @constant makes a verb usable without specifying who carries it out (e.g. 言われる) */ Passive: "infl:passive", /** @constant indicates that a verb *can* happen (e.g. 落ちられる) */ Potential: "infl:potential", - Causative: "infl:causative", // TODO: jsdoc this - Imperative: "infl:imperative", + /** @constant indicates that someone makes a verb happen (e.g. ⾷べさせる) */ + Causative: "infl:causative", + /** @constant imperative form (e.g. 聞け) */ + Command: "infl:command", }, /** @constant uncategorized tags */ Auxiliary: { diff --git a/readme.md b/readme.md index 7986f54..2356e56 100644 --- a/readme.md +++ b/readme.md @@ -52,9 +52,12 @@ NONE OF THESE ARE IMPLEMENTED YET Some general documentation is done in markdown, but other general documentation should be done in JSDoc format in the corresponding code files. The -documentation also makes frequent references to [Tae Kim's Japanese grammar -guide][taekim], which is abbreviated to [taekim] instead of copying the link -into the source code each time. +documentation also makes frequent references to, and uses terminology from [Tae +Kim's Japanese grammar guide][taekim], which is abbreviated to [taekim] instead +of copying the link into the source code each time. Tae Kim uses slightly +different terms for grammatical concepts. The 'Tae Kim-version' of these terms +is used for named constants in code. See [tags.ts](language/tags.ts) for an +overview of relevant grammatical terms for the Yomikun parser. ## The dream diff --git a/test/deinflection/cases.ts b/test/deinflection/cases.ts index 04df2bd..4bff5e3 100644 --- a/test/deinflection/cases.ts +++ b/test/deinflection/cases.ts @@ -25,7 +25,11 @@ export default [ { input: "取らせない", tags: [ Inflection.Negative, Inflection.Causative ], }, { input: "取らせられる", tags: [ Inflection.Causative, Inflection.Passive ], }, { input: "取らせられない", tags: [ Inflection.Negative, Inflection.Causative, Inflection.Passive ], }, - { input: "取れ", tags: [ Inflection.Imperative ], }, - { input: "取るな", tags: [ Inflection.Negative, Inflection.Imperative ], }, + { input: "取れ", tags: [ Inflection.Command ], }, + { input: "取るな", tags: [ Inflection.Negative, Inflection.Command ], }, + // TODO: りゃ for いることは + // TODO: じゃ for では + // TODO: なきゃ + なくちゃ + // and more! ] satisfies Test[]; diff --git a/test/reading/cases.ts b/test/reading/cases.ts index 9fe916f..e6b0787 100644 --- a/test/reading/cases.ts +++ b/test/reading/cases.ts @@ -6,6 +6,7 @@ interface Test { }; export default [ + // BEGIN BULK IMPORT (ANKI) { input: "家の主をなめるなよ…", reading: "うちのあるじをなめるなよ…", @@ -9461,5 +9462,12 @@ export default [ reading: "かゆいところはないかな?", output: "かゆいところはないかな?", tags: [ "お兄ちゃんはおしまい!" ] - } + }, + // END BULK IMPORT (ANKI) + { // お兄ちゃんはおしまい! episode 09 @ 06:46 + input: "ええ~ デート?\n違わい!", + reading: "ええ~ デート?\nちがわい!", + output: "ええ~ デート?\n[違](ちが)わい!", + tags: [ "お兄ちゃんはおしまい!" ] + }, ] satisfies Test[]; -- cgit v1.2.3