From d36cefb50ddf67daa08a221d2de4d3eaae9e2492 Mon Sep 17 00:00:00 2001 From: lonkaars Date: Sat, 8 Jul 2023 23:43:14 +0200 Subject: more deinflections --- db/dict/deinflections.sql | 38 ++++++++++++++++++++++++++++++-- db/find.sql | 6 ++--- language/tags.ts | 9 +++++++- readme.md | 2 +- test/deinflection/cases.ts | 55 ++++++++++++++++++++++++++++------------------ test/deinflection/test.ts | 13 +++++++---- 6 files changed, 91 insertions(+), 32 deletions(-) diff --git a/db/dict/deinflections.sql b/db/dict/deinflections.sql index ff177e2..a6070a8 100644 --- a/db/dict/deinflections.sql +++ b/db/dict/deinflections.sql @@ -15,7 +15,7 @@ insert into deinflection_temp values ('infl:negative', 'しない', 'する', 'a', 's'), ('infl:negative', 'こない', 'くる', 'a', 'k'), ('infl:negative', '来ない', '来る', 'a', 'k'), - ('infl:negative', 'ない', 'ある', 'a', 'ru'), -- this one may cause problems (?) + -- ('infl:negative', 'ない', 'ある', 'a', 'ru'), -- this one may cause problems (?) -- ('infl:negative', 'ない', '', 'a', 'ru'), -- this one may cause problems (?) -- past tense @@ -33,6 +33,7 @@ insert into deinflection_temp values ('infl:tense:past', 'きた', 'くる', 'a', 'k'), ('infl:tense:past', '来た', 'くる', 'a', 'k'), ('infl:tense:past', '行った', '行く', 'a', ''), + ('infl:tense:past', 'かった', 'い', 'a', 'a'), -- past negative -- adjective to adverb ('infl:adverb', 'く', 'い', 'a', 'i'), @@ -104,7 +105,8 @@ insert into deinflection_temp values ('infl:suffix:te', 'って', 'った', 'a', 'u'), ('infl:suffix:te', 'きて', 'きた', 'a', 'k'), ('infl:suffix:te', '来て', '来た', 'a', 'k'), - ('infl:suffix:te', 'くて', 'い', 'a', ''), -- TODO: rules_out of this one is i? + ('infl:suffix:te', 'くて', 'い', 'a', 'a'), + ('infl:suffix:te', 'よくて', 'いい', 'a', 'a'), -- exception -- -tari lists ('infl:suffix:tari', 'たり', 'た', 'a', 'ru'), @@ -120,6 +122,38 @@ insert into deinflection_temp values ('infl:suffix:sa class:noun', 'さ', '', 'a', 'na'), ('infl:suffix:sa class:noun', 'さ', 'い', 'a', 'i'), + -- continuous tense + ('infl:tense:cont', 'いる', '', 'a', 'a'), + + -- potential form + ('infl:potential', 'られる', 'る', 'a', 'ru'), + ('infl:potential', 'える', 'う', 'a', 'u'), + ('infl:potential', 'ける', 'く', 'a', 'u'), + ('infl:potential', 'げる', 'ぐ', 'a', 'u'), + ('infl:potential', 'せる', 'す', 'a', 'u'), + ('infl:potential', 'てる', 'つ', 'a', 'u'), + ('infl:potential', 'ねる', 'ぬ', 'a', 'u'), + ('infl:potential', 'べる', 'ぶ', 'a', 'u'), + ('infl:potential', 'める', 'む', 'a', 'u'), + ('infl:potential', 'れる', 'る', 'a', 'u'), + ('infl:potential', 'できる', 'する', 'a', 's'), + ('infl:potential', 'こられる', 'くる', 'a', 'k'), + ('infl:potential', 'ありうる', 'ある', 'a', ''), -- exception + ('infl:potential', 'ありえる', 'ある', 'a', ''), -- exception + + -- conditionals + ('infl:suffix:ba', 'えば', 'う', 'a', 'u'), + ('infl:suffix:ba', 'けば', 'く', 'a', 'u'), + ('infl:suffix:ba', 'げば', 'ぐ', 'a', 'u'), + ('infl:suffix:ba', 'せば', 'す', 'a', 'u'), + ('infl:suffix:ba', 'てば', 'つ', 'a', 'u'), + ('infl:suffix:ba', 'ねば', 'ぬ', 'a', 'u'), + ('infl:suffix:ba', 'べば', 'ぶ', 'a', 'u'), + ('infl:suffix:ba', 'めば', 'む', 'a', 'u'), + ('infl:suffix:ba', 'れば', 'る', 'a', 'u ru'), + ('infl:suffix:ba', 'ければ', 'い', 'a', 'a'), + -- TODO: 〜であれば (deconjugates to です i think?) + -- auxiliary rules ('class:verb:suru-included', 'する', '', 's', ''); -- deconjugate suru verbs into stem diff --git a/db/find.sql b/db/find.sql index cdaebb3..dd6a011 100644 --- a/db/find.sql +++ b/db/find.sql @@ -49,7 +49,7 @@ with results(id, expression, reading, tags, depth, rules, original, deinflected) (length(term) > 0) limit 50 -- failsafe to catch any infinite loops ) - select term, tags, depth, substr(:term, 1, deinflect.length), rules_out + select term, tags, depth, substr(:term, 1, deinflect.length), rules from deinflect ) select @@ -65,7 +65,7 @@ with results(id, expression, reading, tags, depth, rules, original, deinflected) inner join term on (term.expression = deinflections.term) or (term.reading = deinflections.term) inner join term_tag on term_tag.term_id = term.id inner join tag on term_tag.tag_id = tag.id - group by term.id, deinflections.original + group by term.id, deinflections.original, deinflections.rules having term.id is not null ) select @@ -92,5 +92,5 @@ left join sort_overlay on (user_overlay.expression = results.expression) and (user_overlay.reading = results.reading) and (user_overlay.user_id = (select id from user where username = :user)) -group by results.id, results.original; +group by results.id, results.original, results.rules; diff --git a/language/tags.ts b/language/tags.ts index 7f5757f..a9fc5ca 100644 --- a/language/tags.ts +++ b/language/tags.ts @@ -73,6 +73,8 @@ export const Tag = { Tense: { /** @constant past tense (e.g. 叩いた) */ Past: "infl:tense:past", + /** @constant continuous tense (e.g. 喋っている) */ + Continuous: "infl:tense:cont", }, /** @constant adverbs (e.g. 早く) */ Adverb: "infl:adverb", @@ -87,6 +89,8 @@ export const Tag = { Te: "infl:suffix:te", /** @constant -tari ending (e.g. 遊んだり) */ Tari: "infl:suffix:tari", + /** @constant -ba ending for conditionals (e.g. 泳げれば)*/ + Ba: "infl:suffix:ba", }, /** @constant internal deinflection rules */ Reason: { @@ -137,9 +141,12 @@ export function parseTags(input: string) { var tags = input.replaceAll(/ +/g, " ").trim().split(" ") as TokenTag[]; var filteredTags: TokenTag[] = []; for (var tag of tags) { - // skip past tense tags after -te and -tari deinflection + // skip past tense tag if used as step for -te and -tari inflection if (tag == Tag.Inflection.Tense.Past && filteredTags.anyOf([Tag.Inflection.Suffix.Te, Tag.Inflection.Suffix.Tari])) continue; + // skip -te suffix tag if it's a base for continuous tense + if (tag == Tag.Inflection.Suffix.Te && + filteredTags.anyOf([Tag.Inflection.Tense.Continuous])) continue; filteredTags.push(tag); } diff --git a/readme.md b/readme.md index 2356e56..60f1eda 100644 --- a/readme.md +++ b/readme.md @@ -24,7 +24,7 @@ scope is larger than Yomichan, it's still focused on Japanese only.** - [ ] add separate kanji readings/info table - [ ] add separate frequency dictionary - [ ] add more deinflections to db/deinflections.sql -- [ ] set up unit tests for sentence reading generation +- [x] set up unit tests for sentence reading generation - [x] port server-internal API to simple HTTP JSON API - [ ] create primitive search page ui - [ ] add code formatter config diff --git a/test/deinflection/cases.ts b/test/deinflection/cases.ts index 4bff5e3..e0b2137 100644 --- a/test/deinflection/cases.ts +++ b/test/deinflection/cases.ts @@ -3,30 +3,43 @@ const { Inflection } = Tag; interface Test { input: string; - tags: TokenTags; + mustHave: TokenTags; + mustNotHave: TokenTags; }; export default [ - { input: "取る", tags: [], }, - { input: "取らない", tags: [ Inflection.Negative ], }, - { input: "取ります", tags: [ Inflection.Polite.Masu ], }, - { input: "取りません", tags: [ Inflection.Negative, Inflection.Polite.Masu ], }, - { input: "取った", tags: [ Inflection.Tense.Past ], }, - { input: "取らなかった", tags: [ Inflection.Negative, Inflection.Tense.Past ], }, - { input: "取りました", tags: [ Inflection.Polite.Masu, Inflection.Tense.Past ], }, - { input: "取りませんでした", tags: [ Inflection.Negative, Inflection.Polite.Masu, Inflection.Tense.Past ], }, - { input: "取って", tags: [ Inflection.Suffix.Te ], }, - { input: "取らなくて", tags: [ Inflection.Negative, Inflection.Suffix.Te ], }, - { input: "取れる", tags: [ Inflection.Potential ], }, - { input: "取れない", tags: [ Inflection.Negative, Inflection.Potential ], }, - { input: "取られる", tags: [ Inflection.Passive ], }, - { input: "取られない", tags: [ Inflection.Negative, Inflection.Passive ], }, - { input: "取らせる", tags: [ Inflection.Causative ], }, - { input: "取らせない", tags: [ Inflection.Negative, Inflection.Causative ], }, - { input: "取らせられる", tags: [ Inflection.Causative, Inflection.Passive ], }, - { input: "取らせられない", tags: [ Inflection.Negative, Inflection.Causative, Inflection.Passive ], }, - { input: "取れ", tags: [ Inflection.Command ], }, - { input: "取るな", tags: [ Inflection.Negative, Inflection.Command ], }, + // jisho.org generated conjugations for 取る (u-verb) + { input: "取る", mustHave: [], mustNotHave: [], }, + { input: "取らない", mustHave: [ Inflection.Negative ], mustNotHave: [], }, + { input: "取ります", mustHave: [ Inflection.Polite.Masu ], mustNotHave: [], }, + { input: "取りません", mustHave: [ Inflection.Negative, Inflection.Polite.Masu ], mustNotHave: [], }, + { input: "取った", mustHave: [ Inflection.Tense.Past ], mustNotHave: [], }, + { input: "取らなかった", mustHave: [ Inflection.Negative, Inflection.Tense.Past ], mustNotHave: [], }, + { input: "取りました", mustHave: [ Inflection.Polite.Masu, Inflection.Tense.Past ], mustNotHave: [], }, + { input: "取りませんでした", mustHave: [ Inflection.Negative, Inflection.Polite.Masu, Inflection.Tense.Past ], mustNotHave: [], }, + { input: "取って", mustHave: [ Inflection.Suffix.Te ], mustNotHave: [], }, + { input: "取らなくて", mustHave: [ Inflection.Negative, Inflection.Suffix.Te ], mustNotHave: [], }, + { input: "取れる", mustHave: [ Inflection.Potential ], mustNotHave: [], }, + { input: "取れない", mustHave: [ Inflection.Negative, Inflection.Potential ], mustNotHave: [], }, + { input: "取られる", mustHave: [ Inflection.Passive ], mustNotHave: [], }, + { input: "取られない", mustHave: [ Inflection.Negative, Inflection.Passive ], mustNotHave: [], }, + { input: "取らせる", mustHave: [ Inflection.Causative ], mustNotHave: [], }, + { input: "取らせない", mustHave: [ Inflection.Negative, Inflection.Causative ], mustNotHave: [], }, + { input: "取らせられる", mustHave: [ Inflection.Causative, Inflection.Passive ], mustNotHave: [], }, + { input: "取らせられない", mustHave: [ Inflection.Negative, Inflection.Causative, Inflection.Passive ], mustNotHave: [], }, + { input: "取れ", mustHave: [ Inflection.Command ], mustNotHave: [], }, + { input: "取るな", mustHave: [ Inflection.Negative, Inflection.Command ], mustNotHave: [], }, + // other tests + { input: "取ったり", mustHave: [ Inflection.Suffix.Tari ], mustNotHave: [ Inflection.Tense.Past ], }, + { input: "早く", mustHave: [ Inflection.Adverb ], mustNotHave: [], }, + { input: "遊んだり", mustHave: [ Inflection.Suffix.Tari ], mustNotHave: [ Inflection.Tense.Past ], }, + { input: "聞け", mustHave: [ Inflection.Command ], mustNotHave: [], }, + { input: "⾷べさせる", mustHave: [ Inflection.Causative ], mustNotHave: [], }, + { input: "落ちられる", mustHave: [ Inflection.Potential ], mustNotHave: [], }, + { input: "言われる", mustHave: [ Inflection.Passive ], mustNotHave: [], }, + { input: "喋っている", mustHave: [ Inflection.Tense.Continuous ], mustNotHave: [ Inflection.Suffix.Te ], }, + { input: "泳げれば", mustHave: [ Inflection.Suffix.Ba ], mustNotHave: [], }, + { input: "取らなければ", mustHave: [ Inflection.Potential, Inflection.Negative ], mustNotHave: [], }, // TODO: りゃ for いることは // TODO: じゃ for では // TODO: なきゃ + なくちゃ diff --git a/test/deinflection/test.ts b/test/deinflection/test.ts index 5a123ba..3faa6f8 100644 --- a/test/deinflection/test.ts +++ b/test/deinflection/test.ts @@ -1,8 +1,8 @@ -import DirectCoreClient from '../../core/direct/client.ts'; import cases from "./cases.ts"; import { core } from '../base.ts'; +import { TokenTag } from '../../language/tags.ts'; -cases.forEach(({ input, tags }) => { +cases.forEach(({ input, mustHave, mustNotHave }) => { Deno.test(`deinflection - ${input}`, async () => { var { tokens } = await core.parseSentence(input); @@ -14,9 +14,14 @@ cases.forEach(({ input, tags }) => { if (!result) throw new Error("No deconjugation found for input"); - for (var tag of tags) + let tag: TokenTag; + for (tag of mustHave) if (!result.tags.includes(tag)) - throw new Error(`Deconjugation doesn't include tag ${tag}`); + throw new Error(`Deconjugation doesn't include required tag ${tag}`); + + for (tag of mustNotHave) + if (result.tags.includes(tag)) + throw new Error(`Deconjugation includes unallowed tag ${tag}`); }); }) -- cgit v1.2.3