aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorlonkaars <loek@pipeframe.xyz>2023-07-09 00:50:58 +0200
committerlonkaars <loek@pipeframe.xyz>2023-07-09 00:50:58 +0200
commit1138ac8fc8764cf5cd987383a7a0332879be6cca (patch)
tree63aa25275e866d986c2b532f1f050c4f2cf99ac1
parentd36cefb50ddf67daa08a221d2de4d3eaae9e2492 (diff)
rename conditionals and deconjugate obligatory inflections
-rw-r--r--db/dict/deinflections.sql26
-rw-r--r--import/jmdict/jmdict.ts1
-rw-r--r--language/tags.ts35
-rw-r--r--test/deinflection/cases.ts4
4 files changed, 53 insertions, 13 deletions
diff --git a/db/dict/deinflections.sql b/db/dict/deinflections.sql
index a6070a8..7d67b02 100644
--- a/db/dict/deinflections.sql
+++ b/db/dict/deinflections.sql
@@ -142,17 +142,23 @@ insert into deinflection_temp values
('infl:potential', 'ありえる', 'ある', 'a', ''), -- exception
-- conditionals <https://guidetojapanese.org/learn/grammar/conditionals>
- ('infl:suffix:ba', 'えば', 'う', 'a', 'u'),
- ('infl:suffix:ba', 'けば', 'く', 'a', 'u'),
- ('infl:suffix:ba', 'げば', 'ぐ', 'a', 'u'),
- ('infl:suffix:ba', 'せば', 'す', 'a', 'u'),
- ('infl:suffix:ba', 'てば', 'つ', 'a', 'u'),
- ('infl:suffix:ba', 'ねば', 'ぬ', 'a', 'u'),
- ('infl:suffix:ba', 'べば', 'ぶ', 'a', 'u'),
- ('infl:suffix:ba', 'めば', 'む', 'a', 'u'),
- ('infl:suffix:ba', 'れば', 'る', 'a', 'u ru'),
- ('infl:suffix:ba', 'ければ', 'い', 'a', 'a'),
+ ('infl:cond:ba', 'えば', 'う', 'a', 'u'),
+ ('infl:cond:ba', 'けば', 'く', 'a', 'u'),
+ ('infl:cond:ba', 'げば', 'ぐ', 'a', 'u'),
+ ('infl:cond:ba', 'せば', 'す', 'a', 'u'),
+ ('infl:cond:ba', 'てば', 'つ', 'a', 'u'),
+ ('infl:cond:ba', 'ねば', 'ぬ', 'a', 'u'),
+ ('infl:cond:ba', 'べば', 'ぶ', 'a', 'u'),
+ ('infl:cond:ba', 'めば', 'む', 'a', 'u'),
+ ('infl:cond:ba', 'れば', 'る', 'a', 'u ru'),
+ ('infl:cond:ba', 'ければ', 'い', 'a', 'a'),
-- TODO: 〜であれば (deconjugates to です i think?)
+ ('infl:cond:ra', 'ら', '', 'a', 'a'), -- TODO: constrain valid matches to after past conjugation?
+
+ -- obligation <https://guidetojapanese.org/learn/grammar/must>
+ ('infl:must infl:negative', 'はだめ', '', 'a', 'a'), -- built-in negative because だめ can't be deconjugated
+ ('infl:must', 'はいける', '', 'a', 'a'), -- はいけない -> positive
+ ('infl:must', 'はなる', '', 'a', 'a'), -- はならない -> positive
-- auxiliary rules
('class:verb:suru-included', 'する', '', 's', ''); -- deconjugate suru verbs into stem
diff --git a/import/jmdict/jmdict.ts b/import/jmdict/jmdict.ts
index 6109c9b..1d14ade 100644
--- a/import/jmdict/jmdict.ts
+++ b/import/jmdict/jmdict.ts
@@ -21,6 +21,7 @@ const tagLookup = {
["class/v1"]: Tag.Class.Verb.Ru,
["class/v5"]: Tag.Class.Verb.U,
["class/v5k"]: Tag.Class.Verb.U,
+ ["class/v5k-s"]: Tag.Class.Verb.U,
["class/v5uru"]: Tag.Class.Verb.U,
["class/v5r-i"]: Tag.Class.Verb.U,
["class/v5u-s"]: Tag.Class.Verb.U,
diff --git a/language/tags.ts b/language/tags.ts
index a9fc5ca..312a594 100644
--- a/language/tags.ts
+++ b/language/tags.ts
@@ -67,6 +67,15 @@ export const Tag = {
* e.g. 来ない -> 来る [infl:negative]
*/
Inflection: {
+ /**
+ * @constant affirmative conjugations
+ *
+ * This conjugation should not be added by any deconjugation rules, but is
+ * calculated based on the amount of negations. Even counts of negative
+ * inflections (including 0) add this tag, while odd counts don't add this
+ * tag.
+ */
+ Affirmative: "infl:affirmative",
/** @constant negative conjugations */
Negative: "infl:negative",
/** @constant time-related conjugations */
@@ -89,8 +98,6 @@ export const Tag = {
Te: "infl:suffix:te",
/** @constant -tari ending (e.g. 遊んだり) */
Tari: "infl:suffix:tari",
- /** @constant -ba ending for conditionals (e.g. 泳げれば)*/
- Ba: "infl:suffix:ba",
},
/** @constant internal deinflection rules */
Reason: {
@@ -116,6 +123,15 @@ export const Tag = {
Causative: "infl:causative",
/** @constant imperative form (e.g. 聞け) */
Command: "infl:command",
+ /** @constant conditional forms */
+ Conditional: {
+ /** @constant -ba ending (e.g. 泳げれば) */
+ Ba: "infl:cond:ba",
+ /** @constant -ra ending (e.g. 取ったら) */
+ Ra: "infl:cond:ra",
+ },
+ /** @constant makes a verb obligatory (e.g. 入ってはいけない) */
+ Obliged: "infl:must",
},
/** @constant uncategorized tags */
Auxiliary: {
@@ -140,16 +156,31 @@ export type TokenTags = Array<TokenTag>;
export function parseTags(input: string) {
var tags = input.replaceAll(/ +/g, " ").trim().split(" ") as TokenTag[];
var filteredTags: TokenTag[] = [];
+ var negationCount = 0;
for (var tag of tags) {
+ // conjugations that are used as "stepping stones" for others should be
+ // filtered in this loop. checking if a combination of tags is valid should
+ // be done in ./parser.ts
+
// skip past tense tag if used as step for -te and -tari inflection
if (tag == Tag.Inflection.Tense.Past &&
filteredTags.anyOf([Tag.Inflection.Suffix.Te, Tag.Inflection.Suffix.Tari])) continue;
// skip -te suffix tag if it's a base for continuous tense
if (tag == Tag.Inflection.Suffix.Te &&
filteredTags.anyOf([Tag.Inflection.Tense.Continuous])) continue;
+ // skip -te suffix tag if it's a base for obligatory inflection
+ if (tag == Tag.Inflection.Suffix.Te &&
+ filteredTags.anyOf([Tag.Inflection.Obliged])) continue;
+
+ // normalize multiple Inflection.Negative to single Inflection.Affirmative or Inflection.Negative
+ if (tag == Tag.Inflection.Negative) {
+ negationCount++;
+ continue;
+ }
filteredTags.push(tag);
}
+ filteredTags.push(negationCount % 2 == 0 ? Tag.Inflection.Affirmative : Tag.Inflection.Negative);
return filteredTags.set().arr() as TokenTags; // make sure array doesn't contain duplicates
}
diff --git a/test/deinflection/cases.ts b/test/deinflection/cases.ts
index e0b2137..143e2a4 100644
--- a/test/deinflection/cases.ts
+++ b/test/deinflection/cases.ts
@@ -38,8 +38,10 @@ export default [
{ input: "落ちられる", mustHave: [ Inflection.Potential ], mustNotHave: [], },
{ input: "言われる", mustHave: [ Inflection.Passive ], mustNotHave: [], },
{ input: "喋っている", mustHave: [ Inflection.Tense.Continuous ], mustNotHave: [ Inflection.Suffix.Te ], },
- { input: "泳げれば", mustHave: [ Inflection.Suffix.Ba ], mustNotHave: [], },
+ { input: "泳げれば", mustHave: [ Inflection.Conditional.Ba ], mustNotHave: [], },
{ input: "取らなければ", mustHave: [ Inflection.Potential, Inflection.Negative ], mustNotHave: [], },
+ { input: "入ってはいけない", mustHave: [ Inflection.Obliged, Inflection.Negative ], mustNotHave: [ Inflection.Affirmative ], }, // obliged + negative = must not ~
+ { input: "行かなくてはなりません", mustHave: [ Inflection.Obliged, Inflection.Affirmative ], mustNotHave: [ Inflection.Negative ], }, // obliged + affirmative = must ~
// TODO: りゃ for いることは
// TODO: じゃ for では
// TODO: なきゃ + なくちゃ