aboutsummaryrefslogtreecommitdiff
path: root/db
diff options
context:
space:
mode:
Diffstat (limited to 'db')
-rw-r--r--db/dict/deinflections.sql2
-rw-r--r--db/find.sql128
2 files changed, 65 insertions, 65 deletions
diff --git a/db/dict/deinflections.sql b/db/dict/deinflections.sql
index 0dbe19c..fd6ffc8 100644
--- a/db/dict/deinflections.sql
+++ b/db/dict/deinflections.sql
@@ -157,7 +157,7 @@ insert into deinflection_temp values
-- obligation <https://guidetojapanese.org/learn/grammar/must>
-- TODO: manually write these out instead of splitting particle and suffix
- ('infl:must infl:tmp:must:res infl:negative', 'だめ', '', 'a', 'ot'), -- built-in negative because だめ can't be deconjugated
+ ('infl:negative infl:must infl:tmp:must:res', 'だめ', '', 'a', 'ot'), -- built-in negative because だめ can't be deconjugated
('infl:must infl:tmp:must:res', 'いける', '', 'ru', 'ot'), -- はいけない -> positive (stored this way because obligatory could be in past)
('infl:must infl:tmp:must:res', 'なる', '', 'u', 'ot'), -- はならない -> positive
('infl:must', 'は', '', 'ot', 'nt'), -- removes particle (negative -te + は + だめ/いけない/ならない)
diff --git a/db/find.sql b/db/find.sql
index e2d6ad8..6c8a80e 100644
--- a/db/find.sql
+++ b/db/find.sql
@@ -9,73 +9,73 @@
-- explain query plan -- testing only
with results(id, expression, reading, tags, depth, rules, original, deinflected) as (
- -- stripped deinflection table (remove some columns and duplicates)
- with deinflections(term, tags, depth, original, rules) as (
- -- recursively generated deinflection table
- with deinflect(length, term, tags, rules, rules_in, rules_out, depth) as (
- -- input term all substrings until length 1
- with inputs(length, term, tags, rules, rules_in, rules_out, depth) as (
- select length(:term), :term, '', -1, 0, 0, 0
- union
- select
- inputs.length - 1,
- substr(inputs.term, 1, inputs.length - 1),
- inputs.tags,
- inputs.rules,
- inputs.rules_in,
- inputs.rules_out,
- inputs.depth
- from inputs
- where inputs.length > 1
- )
- select * from inputs
- union -- join all recursive rows into one large table
- select
- deinflect.length,
- substr(deinflect.term, 1, length(deinflect.term)-length(deinflection.kana_in)) || deinflection.kana_out,
- deinflect.tags || ' ' || deinflection.tag, -- parsed to TokenTag[] on (sql) client-side
- deinflection.rules_out,
- deinflection.rules_in,
- deinflect.rules,
- deinflect.depth + 1
- from deinflect -- temp table
- inner join deinflection -- deinflection rules table
- on
- -- rules_in has to contain any of the current deconjugation rules
- (deinflect.rules & deinflection.rules_in != 0) and
- -- term.endsWith(kana_in)
- (substr(term, length(term) - length(kana_in) + 1) = kana_in) and
- -- can't deconjugate to length <1
- (length(term) > 0)
- limit 100 -- failsafe to catch any infinite loops
- )
- select term, tags, depth, substr(:term, 1, deinflect.length), rules
- from deinflect
- )
- select
- term.id,
- term.expression,
- term.reading,
- deinflections.tags || ' ' || group_concat(tag.code, ' ') as tags,
- deinflections.depth,
- rules,
- deinflections.original,
+ -- stripped deinflection table (remove some columns and duplicates)
+ with deinflections(term, tags, depth, original, rules) as (
+ -- recursively generated deinflection table
+ with deinflect(length, term, tags, rules, rules_in, rules_out, depth) as (
+ -- input term all substrings until length 1
+ with inputs(length, term, tags, rules, rules_in, rules_out, depth) as (
+ select length(:term), :term, '', -1, 0, 0, 0
+ union
+ select
+ inputs.length - 1,
+ substr(inputs.term, 1, inputs.length - 1),
+ inputs.tags,
+ inputs.rules,
+ inputs.rules_in,
+ inputs.rules_out,
+ inputs.depth
+ from inputs
+ where inputs.length > 1
+ )
+ select * from inputs
+ union -- join all recursive rows into one large table
+ select
+ deinflect.length,
+ substr(deinflect.term, 1, length(deinflect.term)-length(deinflection.kana_in)) || deinflection.kana_out,
+ deinflect.tags || ' ' || deinflection.tag, -- parsed to TokenTag[] on (sql) client-side
+ deinflection.rules_out,
+ deinflection.rules_in,
+ deinflect.rules,
+ deinflect.depth + 1
+ from deinflect -- temp table
+ inner join deinflection -- deinflection rules table
+ on
+ -- rules_in has to contain any of the current deconjugation rules
+ (deinflect.rules & deinflection.rules_in != 0) and
+ -- term.endsWith(kana_in)
+ (substr(term, length(term) - length(kana_in) + 1) = kana_in) and
+ -- can't deconjugate to length <1
+ (length(term) > 0)
+ limit 100 -- failsafe to catch any infinite loops
+ )
+ select term, tags, depth, substr(:term, 1, deinflect.length), rules
+ from deinflect
+ )
+ select
+ term.id,
+ term.expression,
+ term.reading,
+ deinflections.tags || ' ' || group_concat(tag.code, ' ') as tags,
+ deinflections.depth,
+ rules,
+ deinflections.original,
deinflections.term
- from deinflections
- inner join term on (term.expression = deinflections.term) or (term.reading = deinflections.term)
- inner join term_tag on term_tag.term_id = term.id
- inner join tag on term_tag.tag_id = tag.id
- group by term.id, deinflections.original, deinflections.rules
- having term.id is not null
+ from deinflections
+ inner join term on (term.expression = deinflections.term) or (term.reading = deinflections.term)
+ inner join term_tag on term_tag.term_id = term.id
+ inner join tag on term_tag.tag_id = tag.id
+ group by term.id, deinflections.original, deinflections.rules
+ having term.id is not null
)
select
- results.id,
- results.expression,
- results.reading,
- results.tags,
- group_concat(deinflection_rules.tag, ' ') as rules,
- results.depth,
- results.original,
+ results.id,
+ results.expression,
+ results.reading,
+ results.tags,
+ group_concat(deinflection_rules.tag, ' ') as rules,
+ results.depth,
+ results.original,
results.deinflected,
root_overlay.sort as root_overlay,
user_overlay.sort as user_overlay