1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
|
-- this statement is prepared and run using :term and :user as inputs (see
-- db.ts or test/find)
-- this file is kind of messy because it needs to be one large query, instead
-- of separate phases creating temporary tables. queries with more than one
-- statement can't return results because of the way sqlite3 works.
-- TODO: add more comments in this file to explain what is going on
-- explain query plan -- testing only
with results(id, expression, reading, tags, depth, rules, original, deinflected) as (
-- stripped deinflection table (remove some columns and duplicates)
with deinflections(term, tags, depth, original, rules) as (
-- recursively generated deinflection table
with deinflect(length, term, tags, rules, rules_in, rules_out, depth) as (
-- input term all substrings until length 1
with inputs(length, term, tags, rules, rules_in, rules_out, depth) as (
select length(:term), :term, '', -1, 0, 0, 0
union
select
inputs.length - 1,
substr(inputs.term, 1, inputs.length - 1),
inputs.tags,
inputs.rules,
inputs.rules_in,
inputs.rules_out,
inputs.depth
from inputs
where inputs.length > 1
)
select * from inputs
union -- join all recursive rows into one large table
select
deinflect.length,
substr(deinflect.term, 1, length(deinflect.term)-length(deinflection.kana_in)) || deinflection.kana_out,
deinflect.tags || ' ' || deinflection.tag, -- parsed to TokenTag[] on (sql) client-side
deinflection.rules_out,
deinflection.rules_in,
deinflect.rules,
deinflect.depth + 1
from deinflect -- temp table
inner join deinflection -- deinflection rules table
on
-- rules_in has to contain any of the current deconjugation rules
(deinflect.rules & deinflection.rules_in != 0) and
-- term.endsWith(kana_in)
(substr(term, length(term) - length(kana_in) + 1) = kana_in) and
-- can't deconjugate to length <1
(length(term) > 0)
limit 100 -- failsafe to catch any infinite loops
)
select term, tags, depth, substr(:term, 1, deinflect.length), rules
from deinflect
)
select
term.id,
term.expression,
term.reading,
deinflections.tags || ' ' || group_concat(tag.code, ' ') as tags,
deinflections.depth,
rules,
deinflections.original,
deinflections.term
from deinflections
inner join term on (term.expression = deinflections.term) or (term.reading = deinflections.term)
inner join term_tag on term_tag.term_id = term.id
inner join tag on term_tag.tag_id = tag.id
group by term.id, deinflections.original, deinflections.rules
having term.id is not null
)
select
results.id,
results.expression,
results.reading,
results.tags,
group_concat(deinflection_rules.tag, ' ') as rules,
results.depth,
results.original,
results.deinflected,
root_overlay.sort as root_overlay,
user_overlay.sort as user_overlay
from results
left join deinflection_rules
on results.rules & deinflection_rules.mask != 0
left join sort_overlay
as root_overlay
on (root_overlay.expression = results.expression) and
(root_overlay.reading = results.reading) and
(root_overlay.user_id = 0)
left join sort_overlay
as user_overlay
on (user_overlay.expression = results.expression) and
(user_overlay.reading = results.reading) and
(user_overlay.user_id = (select id from user where username = :user))
group by results.id, results.original, results.rules;
|