diff options
author | lonkaars <loek@pipeframe.xyz> | 2023-07-10 18:58:15 +0200 |
---|---|---|
committer | lonkaars <loek@pipeframe.xyz> | 2023-07-10 18:58:15 +0200 |
commit | 65e7b2260d728a9c747d126f828e90ae34f05b40 (patch) | |
tree | 80bce68628fc763b8d24b97a089d79ef5c0d039c /db | |
parent | f7bfb89d7f400b48539b6f0712040caa6c6d3165 (diff) |
constrain deinflection test cases more
Diffstat (limited to 'db')
-rw-r--r-- | db/dict/deinflections.sql | 2 | ||||
-rw-r--r-- | db/find.sql | 128 |
2 files changed, 65 insertions, 65 deletions
diff --git a/db/dict/deinflections.sql b/db/dict/deinflections.sql index 0dbe19c..fd6ffc8 100644 --- a/db/dict/deinflections.sql +++ b/db/dict/deinflections.sql @@ -157,7 +157,7 @@ insert into deinflection_temp values -- obligation <https://guidetojapanese.org/learn/grammar/must> -- TODO: manually write these out instead of splitting particle and suffix - ('infl:must infl:tmp:must:res infl:negative', 'だめ', '', 'a', 'ot'), -- built-in negative because だめ can't be deconjugated + ('infl:negative infl:must infl:tmp:must:res', 'だめ', '', 'a', 'ot'), -- built-in negative because だめ can't be deconjugated ('infl:must infl:tmp:must:res', 'いける', '', 'ru', 'ot'), -- はいけない -> positive (stored this way because obligatory could be in past) ('infl:must infl:tmp:must:res', 'なる', '', 'u', 'ot'), -- はならない -> positive ('infl:must', 'は', '', 'ot', 'nt'), -- removes particle (negative -te + は + だめ/いけない/ならない) diff --git a/db/find.sql b/db/find.sql index e2d6ad8..6c8a80e 100644 --- a/db/find.sql +++ b/db/find.sql @@ -9,73 +9,73 @@ -- explain query plan -- testing only with results(id, expression, reading, tags, depth, rules, original, deinflected) as ( - -- stripped deinflection table (remove some columns and duplicates) - with deinflections(term, tags, depth, original, rules) as ( - -- recursively generated deinflection table - with deinflect(length, term, tags, rules, rules_in, rules_out, depth) as ( - -- input term all substrings until length 1 - with inputs(length, term, tags, rules, rules_in, rules_out, depth) as ( - select length(:term), :term, '', -1, 0, 0, 0 - union - select - inputs.length - 1, - substr(inputs.term, 1, inputs.length - 1), - inputs.tags, - inputs.rules, - inputs.rules_in, - inputs.rules_out, - inputs.depth - from inputs - where inputs.length > 1 - ) - select * from inputs - union -- join all recursive rows into one large table - select - deinflect.length, - substr(deinflect.term, 1, length(deinflect.term)-length(deinflection.kana_in)) || deinflection.kana_out, - deinflect.tags || ' ' || deinflection.tag, -- parsed to TokenTag[] on (sql) client-side - deinflection.rules_out, - deinflection.rules_in, - deinflect.rules, - deinflect.depth + 1 - from deinflect -- temp table - inner join deinflection -- deinflection rules table - on - -- rules_in has to contain any of the current deconjugation rules - (deinflect.rules & deinflection.rules_in != 0) and - -- term.endsWith(kana_in) - (substr(term, length(term) - length(kana_in) + 1) = kana_in) and - -- can't deconjugate to length <1 - (length(term) > 0) - limit 100 -- failsafe to catch any infinite loops - ) - select term, tags, depth, substr(:term, 1, deinflect.length), rules - from deinflect - ) - select - term.id, - term.expression, - term.reading, - deinflections.tags || ' ' || group_concat(tag.code, ' ') as tags, - deinflections.depth, - rules, - deinflections.original, + -- stripped deinflection table (remove some columns and duplicates) + with deinflections(term, tags, depth, original, rules) as ( + -- recursively generated deinflection table + with deinflect(length, term, tags, rules, rules_in, rules_out, depth) as ( + -- input term all substrings until length 1 + with inputs(length, term, tags, rules, rules_in, rules_out, depth) as ( + select length(:term), :term, '', -1, 0, 0, 0 + union + select + inputs.length - 1, + substr(inputs.term, 1, inputs.length - 1), + inputs.tags, + inputs.rules, + inputs.rules_in, + inputs.rules_out, + inputs.depth + from inputs + where inputs.length > 1 + ) + select * from inputs + union -- join all recursive rows into one large table + select + deinflect.length, + substr(deinflect.term, 1, length(deinflect.term)-length(deinflection.kana_in)) || deinflection.kana_out, + deinflect.tags || ' ' || deinflection.tag, -- parsed to TokenTag[] on (sql) client-side + deinflection.rules_out, + deinflection.rules_in, + deinflect.rules, + deinflect.depth + 1 + from deinflect -- temp table + inner join deinflection -- deinflection rules table + on + -- rules_in has to contain any of the current deconjugation rules + (deinflect.rules & deinflection.rules_in != 0) and + -- term.endsWith(kana_in) + (substr(term, length(term) - length(kana_in) + 1) = kana_in) and + -- can't deconjugate to length <1 + (length(term) > 0) + limit 100 -- failsafe to catch any infinite loops + ) + select term, tags, depth, substr(:term, 1, deinflect.length), rules + from deinflect + ) + select + term.id, + term.expression, + term.reading, + deinflections.tags || ' ' || group_concat(tag.code, ' ') as tags, + deinflections.depth, + rules, + deinflections.original, deinflections.term - from deinflections - inner join term on (term.expression = deinflections.term) or (term.reading = deinflections.term) - inner join term_tag on term_tag.term_id = term.id - inner join tag on term_tag.tag_id = tag.id - group by term.id, deinflections.original, deinflections.rules - having term.id is not null + from deinflections + inner join term on (term.expression = deinflections.term) or (term.reading = deinflections.term) + inner join term_tag on term_tag.term_id = term.id + inner join tag on term_tag.tag_id = tag.id + group by term.id, deinflections.original, deinflections.rules + having term.id is not null ) select - results.id, - results.expression, - results.reading, - results.tags, - group_concat(deinflection_rules.tag, ' ') as rules, - results.depth, - results.original, + results.id, + results.expression, + results.reading, + results.tags, + group_concat(deinflection_rules.tag, ' ') as rules, + results.depth, + results.original, results.deinflected, root_overlay.sort as root_overlay, user_overlay.sort as user_overlay |