aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorlonkaars <loek@pipeframe.xyz>2023-07-09 18:00:33 +0200
committerlonkaars <loek@pipeframe.xyz>2023-07-09 18:00:33 +0200
commit80b46d2f6152129f25f5734e4960cb7c15edfcf0 (patch)
tree191c528e5ed18c6c85091b5dadb152231cb857aa
parent1138ac8fc8764cf5cd987383a7a0332879be6cca (diff)
more deinflections + tests
-rw-r--r--db/dict/deinflections.sql78
-rw-r--r--import/jmdict/jmdict.ts1
-rw-r--r--language/tags.ts56
-rw-r--r--test/deinflection/cases.ts32
4 files changed, 142 insertions, 25 deletions
diff --git a/db/dict/deinflections.sql b/db/dict/deinflections.sql
index 7d67b02..a4a35d5 100644
--- a/db/dict/deinflections.sql
+++ b/db/dict/deinflections.sql
@@ -142,23 +142,71 @@ insert into deinflection_temp values
('infl:potential', 'ありえる', 'ある', 'a', ''), -- exception
-- conditionals <https://guidetojapanese.org/learn/grammar/conditionals>
- ('infl:cond:ba', 'えば', 'う', 'a', 'u'),
- ('infl:cond:ba', 'けば', 'く', 'a', 'u'),
- ('infl:cond:ba', 'げば', 'ぐ', 'a', 'u'),
- ('infl:cond:ba', 'せば', 'す', 'a', 'u'),
- ('infl:cond:ba', 'てば', 'つ', 'a', 'u'),
- ('infl:cond:ba', 'ねば', 'ぬ', 'a', 'u'),
- ('infl:cond:ba', 'べば', 'ぶ', 'a', 'u'),
- ('infl:cond:ba', 'めば', 'む', 'a', 'u'),
- ('infl:cond:ba', 'れば', 'る', 'a', 'u ru'),
- ('infl:cond:ba', 'ければ', 'い', 'a', 'a'),
+ ('infl:cond:ba', 'えば', 'う', 'nt', 'u'),
+ ('infl:cond:ba', 'けば', 'く', 'nt', 'u'),
+ ('infl:cond:ba', 'げば', 'ぐ', 'nt', 'u'),
+ ('infl:cond:ba', 'せば', 'す', 'nt', 'u'),
+ ('infl:cond:ba', 'てば', 'つ', 'nt', 'u'),
+ ('infl:cond:ba', 'ねば', 'ぬ', 'nt', 'u'),
+ ('infl:cond:ba', 'べば', 'ぶ', 'nt', 'u'),
+ ('infl:cond:ba', 'めば', 'む', 'nt', 'u'),
+ ('infl:cond:ba', 'れば', 'る', 'nt', 'u ru'),
+ ('infl:cond:ba', 'ければ', 'い', 'nt', 'a'),
-- TODO: 〜であれば (deconjugates to です i think?)
('infl:cond:ra', 'ら', '', 'a', 'a'), -- TODO: constrain valid matches to after past conjugation?
-- obligation <https://guidetojapanese.org/learn/grammar/must>
- ('infl:must infl:negative', 'はだめ', '', 'a', 'a'), -- built-in negative because だめ can't be deconjugated
- ('infl:must', 'はいける', '', 'a', 'a'), -- はいけない -> positive
- ('infl:must', 'はなる', '', 'a', 'a'), -- はならない -> positive
+ ('infl:tmp:must:res infl:negative', 'だめ', '', 'a', 'ot'), -- built-in negative because だめ can't be deconjugated
+ ('infl:tmp:must:res', 'いける', '', 'a', 'ot'), -- はいけない -> positive (stored this way because obligatory could be in past)
+ ('infl:tmp:must:res', 'なる', '', 'a', 'ot'), -- はならない -> positive
+ ('infl:must', 'は', '', 'ot', 'a'), -- removes particle (negative -te + は + だめ/いけない/ならない)
+ ('infl:tmp:must:prt infl:must', 'と', '', 'ot', 'a'), -- removes particle (negative + と + だめ/いけない/ならない)
+ ('infl:must', 'ば', 'ば', 'ot', 'a'), -- removes ot rule, keeps 〜ば (negative + ば + だめ/いけない/ならない)
+ ('infl:must infl:tmp:must:prt', 'なくちゃ', 'なくて', 'a', 'a'), -- colloquial abbreviation
+ ('infl:must infl:tmp:must:prt', 'なきゃ', 'なければ', 'a', 'a'), -- colloquial abbreviation
+ ('infl:tmp:must:prt', 'ちゃ', 'ては', 'a', 'a'), -- colloquial abbreviation
+ ('infl:tmp:must:prt', 'じゃ', 'では', 'a', 'a'), -- colloquial abbreviation
+
+ -- ~tai endings <https://guidetojapanese.org/learn/grammar/desire>
+ ('infl:desire:itai', 'たい', 'る', 'i', 'ru'),
+ ('infl:desire:itai', 'いたい', 'う', 'i', 'u'),
+ ('infl:desire:itai', 'きたい', 'く', 'i', 'u'),
+ ('infl:desire:itai', 'ぎたい', 'ぐ', 'i', 'u'),
+ ('infl:desire:itai', 'したい', 'す', 'i', 'u'),
+ ('infl:desire:itai', 'ちたい', 'つ', 'i', 'u'),
+ ('infl:desire:itai', 'にたい', 'ぬ', 'i', 'u'),
+ ('infl:desire:itai', 'びたい', 'ぶ', 'i', 'u'),
+ ('infl:desire:itai', 'みたい', 'む', 'i', 'u'),
+ ('infl:desire:itai', 'りたい', 'る', 'i', 'u'),
+ ('infl:desire:itai', 'したい', 'する', 'i', 's'),
+ ('infl:desire:itai', 'きたい', 'くる', 'i', 'k'),
+ ('infl:desire:itai', '来たい', '来る', 'i', 'k'),
+
+ -- volitional form <https://guidetojapanese.org/learn/grammar/desire>
+ ('infl:desire:volitional', 'よう', 'る', 'a', 'ru'),
+ ('infl:desire:volitional', 'おう', 'う', 'a', 'u'),
+ ('infl:desire:volitional', 'こう', 'く', 'a', 'u'),
+ ('infl:desire:volitional', 'ごう', 'ぐ', 'a', 'u'),
+ ('infl:desire:volitional', 'そう', 'す', 'a', 'u'),
+ ('infl:desire:volitional', 'とう', 'つ', 'a', 'u'),
+ ('infl:desire:volitional', 'のう', 'ぬ', 'a', 'u'),
+ ('infl:desire:volitional', 'ぼう', 'ぶ', 'a', 'u'),
+ ('infl:desire:volitional', 'もう', 'む', 'a', 'u'),
+ ('infl:desire:volitional', 'ろう', 'る', 'a', 'u'),
+ ('infl:desire:volitional', 'しよう', 'する', 'a', 's'),
+ ('infl:desire:volitional', 'こよう', 'くる', 'a', 'k'),
+ ('infl:desire:volitional', '来よう', '来る', 'a', 'k'),
+
+ -- polite volitional <https://guidetojapanese.org/learn/grammar/desire>
+ ('infl:desire:volitional', 'ましょう', 'ます', 'nt', 'nt'),
+
+ -- TODO: making suggestions using the 「ば」 or 「たら」 conditional
+
+ -- attempt (〜みる) <https://guidetojapanese.org/learn/grammar/try>
+ ('infl:attempt:miru', 'みる', '', 'ru', 'a'),
+
+ -- attempt (〜とする) <https://guidetojapanese.org/learn/grammar/try>
+ ('infl:attempt:tosuru', 'とする', '', 's', 'a'),
-- auxiliary rules
('class:verb:suru-included', 'する', '', 's', ''); -- deconjugate suru verbs into stem
@@ -175,7 +223,9 @@ insert into rule_map values
(null, 'z', 1 << 4), -- ずる (zuru)
('infl:reason:adj:i', 'i', 1 << 5), -- 形容詞 (i-adjective)
(null, 'iru', 1 << 6), -- 〜いる (temporary iru for progressive tense)
- ('infl:reason:adj:na', 'na', 1 << 7); -- 形容動詞 (na-adjective)
+ ('infl:reason:adj:na', 'na', 1 << 7), -- 形容動詞 (na-adjective)
+ (null, 'ot', 1 << 8), -- (temporary rule for deconjugating obligatory endings)
+ (null, 'nt', (1 << 6) - 1); -- (utility no temp, allows anything but temporaries)
-- add tags to db
insert into deinflection_rules (mask, tag)
diff --git a/import/jmdict/jmdict.ts b/import/jmdict/jmdict.ts
index 1d14ade..155c423 100644
--- a/import/jmdict/jmdict.ts
+++ b/import/jmdict/jmdict.ts
@@ -18,6 +18,7 @@ const tagLookup = {
["misc/uk"]: Tag.Auxiliary.UsuallyKana,
["class/adv"]: Tag.Class.Adverb,
["class/vs"]: Tag.Class.Verb.Suru,
+ ["class/vs-i"]: Tag.Class.Verb.Suru,
["class/v1"]: Tag.Class.Verb.Ru,
["class/v5"]: Tag.Class.Verb.U,
["class/v5k"]: Tag.Class.Verb.U,
diff --git a/language/tags.ts b/language/tags.ts
index 312a594..de1a3ea 100644
--- a/language/tags.ts
+++ b/language/tags.ts
@@ -131,7 +131,28 @@ export const Tag = {
Ra: "infl:cond:ra",
},
/** @constant makes a verb obligatory (e.g. 入ってはいけない) */
- Obliged: "infl:must",
+ Obligatory: "infl:must",
+ /** @constant verbs that someone wants to do / be done */
+ Desirable: {
+ /** @constant 〜たい endings (e.g. 買いたい) */
+ Itai: "infl:desire:itai",
+ /** @constant 〜おう endings (e.g. 寝よう) */
+ Volitional: "infl:desire:volitional",
+ },
+ /** @constant makes a verb an attempt */
+ Attempt: {
+ /** @constant 〜みる to try something out (e.g. 飲んでみた) */
+ Miru: "infl:attempt:miru",
+ /** @constant 〜とする attempts (e.g. 入ろうとしている) */
+ ToSuru: "infl:attempt:tosuru",
+ },
+ /** @constant temporary tags (removed by parseTags) */
+ Temporary: {
+ /** @constant particle of obligatory conjugation (e.g. 行かない*と*だめ), or colloquial abbreviation */
+ ObligatoryParticle: "infl:tmp:must:prt",
+ /** @constant resulting action part of obligatory conjugation (e.g. 行かないと*だめ*) */
+ ObligatoryResult: "infl:tmp:must:res",
+ },
},
/** @constant uncategorized tags */
Auxiliary: {
@@ -165,12 +186,17 @@ export function parseTags(input: string) {
// skip past tense tag if used as step for -te and -tari inflection
if (tag == Tag.Inflection.Tense.Past &&
filteredTags.anyOf([Tag.Inflection.Suffix.Te, Tag.Inflection.Suffix.Tari])) continue;
- // skip -te suffix tag if it's a base for continuous tense
- if (tag == Tag.Inflection.Suffix.Te &&
- filteredTags.anyOf([Tag.Inflection.Tense.Continuous])) continue;
- // skip -te suffix tag if it's a base for obligatory inflection
- if (tag == Tag.Inflection.Suffix.Te &&
- filteredTags.anyOf([Tag.Inflection.Obliged])) continue;
+
+ // skip -te suffix tag if used for
+ if (tag == Tag.Inflection.Suffix.Te && filteredTags.anyOf([
+ Tag.Inflection.Tense.Continuous, // base for continuous tense
+ Tag.Inflection.Obligatory, // base for obligatory inflection
+ Tag.Inflection.Attempt.Miru, // base for 〜みる attempt
+ ])) continue;
+
+ // skip volitional tag if used for 〜とする attempt
+ if (tag == Tag.Inflection.Desirable.Volitional &&
+ filteredTags.anyOf([Tag.Inflection.Attempt.ToSuru])) continue;
// normalize multiple Inflection.Negative to single Inflection.Affirmative or Inflection.Negative
if (tag == Tag.Inflection.Negative) {
@@ -180,7 +206,21 @@ export function parseTags(input: string) {
filteredTags.push(tag);
}
+
+ // negative + と without resulting action = implicit affirmative obligatory
+ if (filteredTags.includes(Tag.Inflection.Temporary.ObligatoryParticle) &&
+ !filteredTags.includes(Tag.Inflection.Temporary.ObligatoryResult)) {
+ negationCount = 0; // -> make resulting tags affirmative
+ }
+
+ // normalize affirmative/negative
filteredTags.push(negationCount % 2 == 0 ? Tag.Inflection.Affirmative : Tag.Inflection.Negative);
- return filteredTags.set().arr() as TokenTags; // make sure array doesn't contain duplicates
+
+ // filter any remaining temporary tags
+ type tempTag = typeof Tag.Inflection.Temporary[keyof typeof Tag.Inflection.Temporary];
+ filteredTags = filteredTags.filter(t => !Object.values(Tag.Inflection.Temporary).includes(t as tempTag));
+
+ // filter any duplicates
+ return filteredTags.set().arr() as TokenTags;
}
diff --git a/test/deinflection/cases.ts b/test/deinflection/cases.ts
index 143e2a4..a54dba9 100644
--- a/test/deinflection/cases.ts
+++ b/test/deinflection/cases.ts
@@ -39,9 +39,35 @@ export default [
{ input: "言われる", mustHave: [ Inflection.Passive ], mustNotHave: [], },
{ input: "喋っている", mustHave: [ Inflection.Tense.Continuous ], mustNotHave: [ Inflection.Suffix.Te ], },
{ input: "泳げれば", mustHave: [ Inflection.Conditional.Ba ], mustNotHave: [], },
- { input: "取らなければ", mustHave: [ Inflection.Potential, Inflection.Negative ], mustNotHave: [], },
- { input: "入ってはいけない", mustHave: [ Inflection.Obliged, Inflection.Negative ], mustNotHave: [ Inflection.Affirmative ], }, // obliged + negative = must not ~
- { input: "行かなくてはなりません", mustHave: [ Inflection.Obliged, Inflection.Affirmative ], mustNotHave: [ Inflection.Negative ], }, // obliged + affirmative = must ~
+ { input: "取らなければ", mustHave: [ Inflection.Conditional.Ba, Inflection.Negative ], mustNotHave: [], },
+ { input: "入ってはいけない", mustHave: [ Inflection.Obligatory, Inflection.Negative ], mustNotHave: [ Inflection.Affirmative ], }, // obliged + negative = must not ~
+ { input: "行かなくてはなりません", mustHave: [ Inflection.Obligatory, Inflection.Affirmative ], mustNotHave: [ Inflection.Negative ], }, // obliged + affirmative = must ~
+ { input: "しなくちゃ", mustHave: [ Inflection.Obligatory, Inflection.Affirmative ], mustNotHave: [ Inflection.Negative ], },
+ { input: "食べなきゃ", mustHave: [ Inflection.Obligatory, Inflection.Affirmative ], mustNotHave: [ Inflection.Negative ], },
+ { input: "行かないと", mustHave: [ Inflection.Obligatory, Inflection.Affirmative ], mustNotHave: [ Inflection.Negative ], },
+ { input: "入っちゃだめ", mustHave: [ Inflection.Obligatory, Inflection.Negative ], mustNotHave: [ Inflection.Affirmative ], },
+ { input: "死んじゃだめ", mustHave: [ Inflection.Obligatory, Inflection.Negative ], mustNotHave: [ Inflection.Affirmative ], },
+ { input: "しなくてはいけなかった", mustHave: [ Inflection.Obligatory, Inflection.Affirmative ], mustNotHave: [ Inflection.Negative ], },
+ { input: "行かないとだめ", mustHave: [ Inflection.Obligatory, Inflection.Affirmative ], mustNotHave: [ Inflection.Negative ], },
+ { input: "しないといけない", mustHave: [ Inflection.Obligatory, Inflection.Affirmative ], mustNotHave: [ Inflection.Negative ], },
+ { input: "行かなければいけません", mustHave: [ Inflection.Obligatory, Inflection.Affirmative ], mustNotHave: [ Inflection.Negative ], },
+ { input: "しなければだめ", mustHave: [ Inflection.Obligatory, Inflection.Affirmative ], mustNotHave: [ Inflection.Negative ], },
+ { input: "行かないと", mustHave: [ Inflection.Obligatory, Inflection.Affirmative ], mustNotHave: [ Inflection.Negative ], },
+ { input: "買いたい", mustHave: [ Inflection.Desirable.Itai ], mustNotHave: [], },
+ { input: "寝よう", mustHave: [ Inflection.Desirable.Volitional ], mustNotHave: [], },
+ { input: "しましょう", mustHave: [ Inflection.Desirable.Volitional, Inflection.Polite.Masu ], mustNotHave: [], },
+ { input: "きましょう", mustHave: [ Inflection.Desirable.Volitional, Inflection.Polite.Masu ], mustNotHave: [], },
+ { input: "寝ましょう", mustHave: [ Inflection.Desirable.Volitional, Inflection.Polite.Masu ], mustNotHave: [], },
+ { input: "行きましょう", mustHave: [ Inflection.Desirable.Volitional, Inflection.Polite.Masu ], mustNotHave: [], },
+ { input: "遊びましょう", mustHave: [ Inflection.Desirable.Volitional, Inflection.Polite.Masu ], mustNotHave: [], },
+ { input: "食べてみた", mustHave: [ Inflection.Attempt.Miru, Inflection.Tense.Past ], mustNotHave: [ Inflection.Suffix.Te ], },
+ { input: "飲んでみました", mustHave: [ Inflection.Attempt.Miru, Inflection.Tense.Past, Inflection.Polite.Masu ], mustNotHave: [ Inflection.Suffix.Te ], },
+ { input: "食べてみたい", mustHave: [ Inflection.Attempt.Miru, Inflection.Desirable.Itai ], mustNotHave: [ Inflection.Suffix.Te ], },
+ { input: "切ってみなかった", mustHave: [ Inflection.Attempt.Miru, Inflection.Negative, Inflection.Tense.Past ], mustNotHave: [ Inflection.Suffix.Te ] },
+ { input: "見ようとする", mustHave: [ Inflection.Attempt.ToSuru ], mustNotHave: [ Inflection.Desirable.Volitional ], },
+ { input: "行こうとする", mustHave: [ Inflection.Attempt.ToSuru ], mustNotHave: [ Inflection.Desirable.Volitional ], },
+ { input: "避けようとする", mustHave: [ Inflection.Attempt.ToSuru ], mustNotHave: [ Inflection.Desirable.Volitional ], },
+ { input: "入ろうとしている", mustHave: [ Inflection.Attempt.ToSuru, Inflection.Tense.Continuous ], mustNotHave: [ Inflection.Desirable.Volitional ], },
// TODO: りゃ for いることは
// TODO: じゃ for では
// TODO: なきゃ + なくちゃ