aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorlonkaars <loek@pipeframe.xyz>2023-07-08 23:43:14 +0200
committerlonkaars <loek@pipeframe.xyz>2023-07-08 23:43:14 +0200
commitd36cefb50ddf67daa08a221d2de4d3eaae9e2492 (patch)
treebf8139bd668f1d99304f3f92ec01e9172a67e463
parent92bc1ed78859984486336d95641ddbdca8d02841 (diff)
more deinflections
-rw-r--r--db/dict/deinflections.sql38
-rw-r--r--db/find.sql6
-rw-r--r--language/tags.ts9
-rw-r--r--readme.md2
-rw-r--r--test/deinflection/cases.ts55
-rw-r--r--test/deinflection/test.ts13
6 files changed, 91 insertions, 32 deletions
diff --git a/db/dict/deinflections.sql b/db/dict/deinflections.sql
index ff177e2..a6070a8 100644
--- a/db/dict/deinflections.sql
+++ b/db/dict/deinflections.sql
@@ -15,7 +15,7 @@ insert into deinflection_temp values
('infl:negative', 'しない', 'する', 'a', 's'),
('infl:negative', 'こない', 'くる', 'a', 'k'),
('infl:negative', '来ない', '来る', 'a', 'k'),
- ('infl:negative', 'ない', 'ある', 'a', 'ru'), -- this one may cause problems (?)
+ -- ('infl:negative', 'ない', 'ある', 'a', 'ru'), -- this one may cause problems (?)
-- ('infl:negative', 'ない', '', 'a', 'ru'), -- this one may cause problems (?)
-- past tense <https://guidetojapanese.org/learn/grammar/past_tense>
@@ -33,6 +33,7 @@ insert into deinflection_temp values
('infl:tense:past', 'きた', 'くる', 'a', 'k'),
('infl:tense:past', '来た', 'くる', 'a', 'k'),
('infl:tense:past', '行った', '行く', 'a', ''),
+ ('infl:tense:past', 'かった', 'い', 'a', 'a'), -- past negative
-- adjective to adverb <https://guidetojapanese.org/learn/grammar/adverbs>
('infl:adverb', 'く', 'い', 'a', 'i'),
@@ -104,7 +105,8 @@ insert into deinflection_temp values
('infl:suffix:te', 'って', 'った', 'a', 'u'),
('infl:suffix:te', 'きて', 'きた', 'a', 'k'),
('infl:suffix:te', '来て', '来た', 'a', 'k'),
- ('infl:suffix:te', 'くて', 'い', 'a', ''), -- TODO: rules_out of this one is i?
+ ('infl:suffix:te', 'くて', 'い', 'a', 'a'),
+ ('infl:suffix:te', 'よくて', 'いい', 'a', 'a'), -- exception
-- -tari lists <https://guidetojapanese.org/learn/grammar/compound>
('infl:suffix:tari', 'たり', 'た', 'a', 'ru'),
@@ -120,6 +122,38 @@ insert into deinflection_temp values
('infl:suffix:sa class:noun', 'さ', '', 'a', 'na'),
('infl:suffix:sa class:noun', 'さ', 'い', 'a', 'i'),
+ -- continuous tense <https://guidetojapanese.org/learn/grammar/teform>
+ ('infl:tense:cont', 'いる', '', 'a', 'a'),
+
+ -- potential form <https://guidetojapanese.org/learn/grammar/potential>
+ ('infl:potential', 'られる', 'る', 'a', 'ru'),
+ ('infl:potential', 'える', 'う', 'a', 'u'),
+ ('infl:potential', 'ける', 'く', 'a', 'u'),
+ ('infl:potential', 'げる', 'ぐ', 'a', 'u'),
+ ('infl:potential', 'せる', 'す', 'a', 'u'),
+ ('infl:potential', 'てる', 'つ', 'a', 'u'),
+ ('infl:potential', 'ねる', 'ぬ', 'a', 'u'),
+ ('infl:potential', 'べる', 'ぶ', 'a', 'u'),
+ ('infl:potential', 'める', 'む', 'a', 'u'),
+ ('infl:potential', 'れる', 'る', 'a', 'u'),
+ ('infl:potential', 'できる', 'する', 'a', 's'),
+ ('infl:potential', 'こられる', 'くる', 'a', 'k'),
+ ('infl:potential', 'ありうる', 'ある', 'a', ''), -- exception
+ ('infl:potential', 'ありえる', 'ある', 'a', ''), -- exception
+
+ -- conditionals <https://guidetojapanese.org/learn/grammar/conditionals>
+ ('infl:suffix:ba', 'えば', 'う', 'a', 'u'),
+ ('infl:suffix:ba', 'けば', 'く', 'a', 'u'),
+ ('infl:suffix:ba', 'げば', 'ぐ', 'a', 'u'),
+ ('infl:suffix:ba', 'せば', 'す', 'a', 'u'),
+ ('infl:suffix:ba', 'てば', 'つ', 'a', 'u'),
+ ('infl:suffix:ba', 'ねば', 'ぬ', 'a', 'u'),
+ ('infl:suffix:ba', 'べば', 'ぶ', 'a', 'u'),
+ ('infl:suffix:ba', 'めば', 'む', 'a', 'u'),
+ ('infl:suffix:ba', 'れば', 'る', 'a', 'u ru'),
+ ('infl:suffix:ba', 'ければ', 'い', 'a', 'a'),
+ -- TODO: 〜であれば (deconjugates to です i think?)
+
-- auxiliary rules
('class:verb:suru-included', 'する', '', 's', ''); -- deconjugate suru verbs into stem
diff --git a/db/find.sql b/db/find.sql
index cdaebb3..dd6a011 100644
--- a/db/find.sql
+++ b/db/find.sql
@@ -49,7 +49,7 @@ with results(id, expression, reading, tags, depth, rules, original, deinflected)
(length(term) > 0)
limit 50 -- failsafe to catch any infinite loops
)
- select term, tags, depth, substr(:term, 1, deinflect.length), rules_out
+ select term, tags, depth, substr(:term, 1, deinflect.length), rules
from deinflect
)
select
@@ -65,7 +65,7 @@ with results(id, expression, reading, tags, depth, rules, original, deinflected)
inner join term on (term.expression = deinflections.term) or (term.reading = deinflections.term)
inner join term_tag on term_tag.term_id = term.id
inner join tag on term_tag.tag_id = tag.id
- group by term.id, deinflections.original
+ group by term.id, deinflections.original, deinflections.rules
having term.id is not null
)
select
@@ -92,5 +92,5 @@ left join sort_overlay
on (user_overlay.expression = results.expression) and
(user_overlay.reading = results.reading) and
(user_overlay.user_id = (select id from user where username = :user))
-group by results.id, results.original;
+group by results.id, results.original, results.rules;
diff --git a/language/tags.ts b/language/tags.ts
index 7f5757f..a9fc5ca 100644
--- a/language/tags.ts
+++ b/language/tags.ts
@@ -73,6 +73,8 @@ export const Tag = {
Tense: {
/** @constant past tense (e.g. 叩いた) */
Past: "infl:tense:past",
+ /** @constant continuous tense (e.g. 喋っている) */
+ Continuous: "infl:tense:cont",
},
/** @constant adverbs (e.g. 早く) */
Adverb: "infl:adverb",
@@ -87,6 +89,8 @@ export const Tag = {
Te: "infl:suffix:te",
/** @constant -tari ending (e.g. 遊んだり) */
Tari: "infl:suffix:tari",
+ /** @constant -ba ending for conditionals (e.g. 泳げれば)*/
+ Ba: "infl:suffix:ba",
},
/** @constant internal deinflection rules */
Reason: {
@@ -137,9 +141,12 @@ export function parseTags(input: string) {
var tags = input.replaceAll(/ +/g, " ").trim().split(" ") as TokenTag[];
var filteredTags: TokenTag[] = [];
for (var tag of tags) {
- // skip past tense tags after -te and -tari deinflection
+ // skip past tense tag if used as step for -te and -tari inflection
if (tag == Tag.Inflection.Tense.Past &&
filteredTags.anyOf([Tag.Inflection.Suffix.Te, Tag.Inflection.Suffix.Tari])) continue;
+ // skip -te suffix tag if it's a base for continuous tense
+ if (tag == Tag.Inflection.Suffix.Te &&
+ filteredTags.anyOf([Tag.Inflection.Tense.Continuous])) continue;
filteredTags.push(tag);
}
diff --git a/readme.md b/readme.md
index 2356e56..60f1eda 100644
--- a/readme.md
+++ b/readme.md
@@ -24,7 +24,7 @@ scope is larger than Yomichan, it's still focused on Japanese only.**
- [ ] add separate kanji readings/info table
- [ ] add separate frequency dictionary
- [ ] add more deinflections to db/deinflections.sql
-- [ ] set up unit tests for sentence reading generation
+- [x] set up unit tests for sentence reading generation
- [x] port server-internal API to simple HTTP JSON API
- [ ] create primitive search page ui
- [ ] add code formatter config
diff --git a/test/deinflection/cases.ts b/test/deinflection/cases.ts
index 4bff5e3..e0b2137 100644
--- a/test/deinflection/cases.ts
+++ b/test/deinflection/cases.ts
@@ -3,30 +3,43 @@ const { Inflection } = Tag;
interface Test {
input: string;
- tags: TokenTags;
+ mustHave: TokenTags;
+ mustNotHave: TokenTags;
};
export default [
- { input: "取る", tags: [], },
- { input: "取らない", tags: [ Inflection.Negative ], },
- { input: "取ります", tags: [ Inflection.Polite.Masu ], },
- { input: "取りません", tags: [ Inflection.Negative, Inflection.Polite.Masu ], },
- { input: "取った", tags: [ Inflection.Tense.Past ], },
- { input: "取らなかった", tags: [ Inflection.Negative, Inflection.Tense.Past ], },
- { input: "取りました", tags: [ Inflection.Polite.Masu, Inflection.Tense.Past ], },
- { input: "取りませんでした", tags: [ Inflection.Negative, Inflection.Polite.Masu, Inflection.Tense.Past ], },
- { input: "取って", tags: [ Inflection.Suffix.Te ], },
- { input: "取らなくて", tags: [ Inflection.Negative, Inflection.Suffix.Te ], },
- { input: "取れる", tags: [ Inflection.Potential ], },
- { input: "取れない", tags: [ Inflection.Negative, Inflection.Potential ], },
- { input: "取られる", tags: [ Inflection.Passive ], },
- { input: "取られない", tags: [ Inflection.Negative, Inflection.Passive ], },
- { input: "取らせる", tags: [ Inflection.Causative ], },
- { input: "取らせない", tags: [ Inflection.Negative, Inflection.Causative ], },
- { input: "取らせられる", tags: [ Inflection.Causative, Inflection.Passive ], },
- { input: "取らせられない", tags: [ Inflection.Negative, Inflection.Causative, Inflection.Passive ], },
- { input: "取れ", tags: [ Inflection.Command ], },
- { input: "取るな", tags: [ Inflection.Negative, Inflection.Command ], },
+ // jisho.org generated conjugations for 取る (u-verb)
+ { input: "取る", mustHave: [], mustNotHave: [], },
+ { input: "取らない", mustHave: [ Inflection.Negative ], mustNotHave: [], },
+ { input: "取ります", mustHave: [ Inflection.Polite.Masu ], mustNotHave: [], },
+ { input: "取りません", mustHave: [ Inflection.Negative, Inflection.Polite.Masu ], mustNotHave: [], },
+ { input: "取った", mustHave: [ Inflection.Tense.Past ], mustNotHave: [], },
+ { input: "取らなかった", mustHave: [ Inflection.Negative, Inflection.Tense.Past ], mustNotHave: [], },
+ { input: "取りました", mustHave: [ Inflection.Polite.Masu, Inflection.Tense.Past ], mustNotHave: [], },
+ { input: "取りませんでした", mustHave: [ Inflection.Negative, Inflection.Polite.Masu, Inflection.Tense.Past ], mustNotHave: [], },
+ { input: "取って", mustHave: [ Inflection.Suffix.Te ], mustNotHave: [], },
+ { input: "取らなくて", mustHave: [ Inflection.Negative, Inflection.Suffix.Te ], mustNotHave: [], },
+ { input: "取れる", mustHave: [ Inflection.Potential ], mustNotHave: [], },
+ { input: "取れない", mustHave: [ Inflection.Negative, Inflection.Potential ], mustNotHave: [], },
+ { input: "取られる", mustHave: [ Inflection.Passive ], mustNotHave: [], },
+ { input: "取られない", mustHave: [ Inflection.Negative, Inflection.Passive ], mustNotHave: [], },
+ { input: "取らせる", mustHave: [ Inflection.Causative ], mustNotHave: [], },
+ { input: "取らせない", mustHave: [ Inflection.Negative, Inflection.Causative ], mustNotHave: [], },
+ { input: "取らせられる", mustHave: [ Inflection.Causative, Inflection.Passive ], mustNotHave: [], },
+ { input: "取らせられない", mustHave: [ Inflection.Negative, Inflection.Causative, Inflection.Passive ], mustNotHave: [], },
+ { input: "取れ", mustHave: [ Inflection.Command ], mustNotHave: [], },
+ { input: "取るな", mustHave: [ Inflection.Negative, Inflection.Command ], mustNotHave: [], },
+ // other tests
+ { input: "取ったり", mustHave: [ Inflection.Suffix.Tari ], mustNotHave: [ Inflection.Tense.Past ], },
+ { input: "早く", mustHave: [ Inflection.Adverb ], mustNotHave: [], },
+ { input: "遊んだり", mustHave: [ Inflection.Suffix.Tari ], mustNotHave: [ Inflection.Tense.Past ], },
+ { input: "聞け", mustHave: [ Inflection.Command ], mustNotHave: [], },
+ { input: "⾷べさせる", mustHave: [ Inflection.Causative ], mustNotHave: [], },
+ { input: "落ちられる", mustHave: [ Inflection.Potential ], mustNotHave: [], },
+ { input: "言われる", mustHave: [ Inflection.Passive ], mustNotHave: [], },
+ { input: "喋っている", mustHave: [ Inflection.Tense.Continuous ], mustNotHave: [ Inflection.Suffix.Te ], },
+ { input: "泳げれば", mustHave: [ Inflection.Suffix.Ba ], mustNotHave: [], },
+ { input: "取らなければ", mustHave: [ Inflection.Potential, Inflection.Negative ], mustNotHave: [], },
// TODO: りゃ for いることは
// TODO: じゃ for では
// TODO: なきゃ + なくちゃ
diff --git a/test/deinflection/test.ts b/test/deinflection/test.ts
index 5a123ba..3faa6f8 100644
--- a/test/deinflection/test.ts
+++ b/test/deinflection/test.ts
@@ -1,8 +1,8 @@
-import DirectCoreClient from '../../core/direct/client.ts';
import cases from "./cases.ts";
import { core } from '../base.ts';
+import { TokenTag } from '../../language/tags.ts';
-cases.forEach(({ input, tags }) => {
+cases.forEach(({ input, mustHave, mustNotHave }) => {
Deno.test(`deinflection - ${input}`, async () => {
var { tokens } = await core.parseSentence(input);
@@ -14,9 +14,14 @@ cases.forEach(({ input, tags }) => {
if (!result)
throw new Error("No deconjugation found for input");
- for (var tag of tags)
+ let tag: TokenTag;
+ for (tag of mustHave)
if (!result.tags.includes(tag))
- throw new Error(`Deconjugation doesn't include tag ${tag}`);
+ throw new Error(`Deconjugation doesn't include required tag ${tag}`);
+
+ for (tag of mustNotHave)
+ if (result.tags.includes(tag))
+ throw new Error(`Deconjugation includes unallowed tag ${tag}`);
});
})