aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorlonkaars <loek@pipeframe.xyz>2023-07-07 00:50:25 +0200
committerlonkaars <loek@pipeframe.xyz>2023-07-07 00:50:25 +0200
commit92bc1ed78859984486336d95641ddbdca8d02841 (patch)
treea2c0659815cbf7b5d7f9cc6ab9329ac5ed72461b
parentcd01df3747ff361fab819fd1d30fac1dba6240e1 (diff)
small updates
-rw-r--r--language/parser.ts6
-rw-r--r--language/readme.md4
-rw-r--r--language/tags.ts23
-rw-r--r--readme.md9
-rw-r--r--test/deinflection/cases.ts8
-rw-r--r--test/reading/cases.ts10
6 files changed, 40 insertions, 20 deletions
diff --git a/language/parser.ts b/language/parser.ts
index 6398595..7fd3981 100644
--- a/language/parser.ts
+++ b/language/parser.ts
@@ -30,7 +30,7 @@ export default class Parser {
depth: optional?.depth ?? ParseDepth.Term,
priorityMod: {
high: optional?.priorityMod?.high ?? 10,
- low: optional?.priorityMod?.low ?? 0.1,
+ low: optional?.priorityMod?.low ?? -10,
},
breaks: optional?.breaks ?? [],
}
@@ -95,12 +95,12 @@ export default class Parser {
// give higher priority to suffixes when last token was a name, else lower priority
if (result.tags.includes(Tag.Class.Suffix))
- result.sort *= lastTokenName ? options.priorityMod.high : options.priorityMod.low;
+ result.sort += lastTokenName ? options.priorityMod.high : options.priorityMod.low;
// give lower priority to terms matched only by their readings, and are
// usually written in kanji
if (!result.tags.includes(Tag.Auxiliary.UsuallyKana) && !result.match.kanji)
- result.sort *= options.priorityMod.low;
+ result.sort += options.priorityMod.low;
return result;
});
diff --git a/language/readme.md b/language/readme.md
index c889c9d..99a7d69 100644
--- a/language/readme.md
+++ b/language/readme.md
@@ -30,7 +30,7 @@ Some tag classes impact the parser's behavior. For example, the input text
to deconjugate a noun with the verb 「する」 back into the stem.
Other uses of this behavior include more accurate automatic kanji reading
-generation, for example 「城」 b:ing read as 「じょう」 in 「ハイラル城」
+generation, for example 「城」 being read as 「じょう」 in 「ハイラル城」
because 「ハイラル」 has the tag `name:place` in the database, and
「城(じょう)」 has `class:suffix`, while 「城(しろ)」 has `class:noun`.
@@ -44,7 +44,7 @@ instead of 「トト湖(こ)」 as an expression to fix the reading of the kanji
If Yomikun doesn't generate the correct reading, and the reading isn't based on
natural language context (=a computer *could* accurately decide which reading
is correct based on other words/tags in the sentence), please submit a pull
-request with the sentence and it's (expected) reading. An example of a
+request with the sentence and its (expected) reading. An example of a
non-deterministic reading is 「何」 in the sentence 「何できた?」 which can be
read as both 「なん」 in which case 「何で」 turns into a single word, or
「なに」 where 「何」 is a regular word and 「で」 is particle.
diff --git a/language/tags.ts b/language/tags.ts
index 4205e72..7f5757f 100644
--- a/language/tags.ts
+++ b/language/tags.ts
@@ -6,18 +6,21 @@ export const Tag = {
Class: {
/** @constant verb subgroup */
Verb: {
- /** @constant noun that can be conjugated into a verb by adding する */
+ /** @constant noun that can be conjugated into a verb by adding する and する itself */
Suru: "class:verb:suru",
/**
- * @constant verb stored as conjugated noun in database
+ * @constant verb stored as conjugated noun in database (nominal verb)
*
- * some dictionaries do this, also used internally to represent
- * conjugation if found for suru-verb
+ * @deprecated The use of conjugated forms in dictionaries is discouraged.
+ *
+ * This tag is added by the deconjugation code to check for a legal
+ * deconjugation if する has been deconjugated away for a word marked
+ * suru-verb.
*/
SuruIncluded: "class:verb:suru-included",
- /** @constant godan verbs (〜う in [taekim]) */
+ /** @constant 〜う verbs in [taekim] (godan) */
U: "class:verb:u",
- /** @constant ichidan verbs (〜る in [taekim]) */
+ /** @constant 〜る verbs in [taekim] (ichidan) */
Ru: "class:verb:ru",
/** @constant kuru (来る) */
Kuru: "class:verb:kuru",
@@ -101,12 +104,14 @@ export const Tag = {
Na: "infl:reason:adj:na",
},
},
- /** @constant passive form (e.g. 言われる) */
+ /** @constant makes a verb usable without specifying who carries it out (e.g. 言われる) */
Passive: "infl:passive",
/** @constant indicates that a verb *can* happen (e.g. 落ちられる) */
Potential: "infl:potential",
- Causative: "infl:causative", // TODO: jsdoc this
- Imperative: "infl:imperative",
+ /** @constant indicates that someone makes a verb happen (e.g. ⾷べさせる) */
+ Causative: "infl:causative",
+ /** @constant imperative form (e.g. 聞け) */
+ Command: "infl:command",
},
/** @constant uncategorized tags */
Auxiliary: {
diff --git a/readme.md b/readme.md
index 7986f54..2356e56 100644
--- a/readme.md
+++ b/readme.md
@@ -52,9 +52,12 @@ NONE OF THESE ARE IMPLEMENTED YET
Some general documentation is done in markdown, but other general documentation
should be done in JSDoc format in the corresponding code files. The
-documentation also makes frequent references to [Tae Kim's Japanese grammar
-guide][taekim], which is abbreviated to [taekim] instead of copying the link
-into the source code each time.
+documentation also makes frequent references to, and uses terminology from [Tae
+Kim's Japanese grammar guide][taekim], which is abbreviated to [taekim] instead
+of copying the link into the source code each time. Tae Kim uses slightly
+different terms for grammatical concepts. The 'Tae Kim-version' of these terms
+is used for named constants in code. See [tags.ts](language/tags.ts) for an
+overview of relevant grammatical terms for the Yomikun parser.
## The dream
diff --git a/test/deinflection/cases.ts b/test/deinflection/cases.ts
index 04df2bd..4bff5e3 100644
--- a/test/deinflection/cases.ts
+++ b/test/deinflection/cases.ts
@@ -25,7 +25,11 @@ export default [
{ input: "取らせない", tags: [ Inflection.Negative, Inflection.Causative ], },
{ input: "取らせられる", tags: [ Inflection.Causative, Inflection.Passive ], },
{ input: "取らせられない", tags: [ Inflection.Negative, Inflection.Causative, Inflection.Passive ], },
- { input: "取れ", tags: [ Inflection.Imperative ], },
- { input: "取るな", tags: [ Inflection.Negative, Inflection.Imperative ], },
+ { input: "取れ", tags: [ Inflection.Command ], },
+ { input: "取るな", tags: [ Inflection.Negative, Inflection.Command ], },
+ // TODO: りゃ for いることは
+ // TODO: じゃ for では
+ // TODO: なきゃ + なくちゃ
+ // and more!
] satisfies Test[];
diff --git a/test/reading/cases.ts b/test/reading/cases.ts
index 9fe916f..e6b0787 100644
--- a/test/reading/cases.ts
+++ b/test/reading/cases.ts
@@ -6,6 +6,7 @@ interface Test {
};
export default [
+ // BEGIN BULK IMPORT (ANKI)
{
input: "家の主をなめるなよ…",
reading: "うちのあるじをなめるなよ…",
@@ -9461,5 +9462,12 @@ export default [
reading: "かゆいところはないかな?",
output: "かゆいところはないかな?",
tags: [ "お兄ちゃんはおしまい!" ]
- }
+ },
+ // END BULK IMPORT (ANKI)
+ { // お兄ちゃんはおしまい! episode 09 @ 06:46
+ input: "ええ~ デート?\n違わい!",
+ reading: "ええ~ デート?\nちがわい!",
+ output: "ええ~ デート?\n[違](ちが)わい!",
+ tags: [ "お兄ちゃんはおしまい!" ]
+ },
] satisfies Test[];