1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
|
-- this statement is prepared and run using :term and :user as input_substrings (see
-- db.ts or test/find)
-- this file is kind of messy because it needs to be one large query, instead
-- of separate phases creating temporary tables. queries with more than one
-- statement can't return results because of the way sqlite3 works.
--
-- the innermost `with` clause contains step 1, the outermost contains step 3,
-- the last select being step 4. the steps are:
-- 1. Create all possible substrings from input `:term`
-- 2. Apply deconjugation rules recursively to `:term`
-- 3. Find terms matching deconjugations
-- 4. Clean up and add sort_overlay
-- STEP 3: match deconjugated strings to terms
with results as (
-- STEP 2: apply definitions recursively
with deinflections as (
-- STEP 1: create all possible substrings from start of `:term`
with input_substrings as (
select -- first row of recursive query
length(:term) as length, -- original term length
:term as string -- original term
union
select -- recursive rows
input_substrings.length - 1, -- length = length - 1
substr(input_substrings.string, 1, input_substrings.length - 1) -- remove last character
from input_substrings
where input_substrings.length > 1 -- until only one character remains
)
select
input_substrings.length as length, -- input length
input_substrings.string as string, -- input substring
input_substrings.string as original, -- input substring (stays unmodified)
'' as tags, -- space-separated deinflection tag list
-1 as rules, -- bitmask rules (limits order of applied deinflection rules)
0 as rules_in, -- allowed rules for current deinflection step
0 as rules_out, -- replacement rules for next deinflection step
0 as depth -- amount of applied deconjugations (steps)
from input_substrings
union
select
deinflections.length, -- length (passthrough)
substr(deinflections.string, 1, length(deinflections.string)-length(deinflection.kana_in)) || deinflection.kana_out, -- replace kana_in with kana_out
deinflections.original, -- original (passthrough)
deinflections.tags || ' ' || deinflection.tag, -- append deinflection reason tag(s)
deinflection.rules_out, -- get next rules (pass rules_out to rules of next iteration)
deinflection.rules_in, -- get rules_in
deinflections.rules, -- rules (passthrough)
deinflections.depth + 1 -- increment depth
from deinflections -- recursive table
inner join deinflection -- internal deinflection rules table
on
-- rules_in has to contain any of the current deconjugation rules
(deinflections.rules & deinflection.rules_in != 0) and
-- string.endsWith(kana_in)
(substr(string, length(string) - length(kana_in) + 1) = kana_in) and
-- can't deconjugate to length <1
(length(string) > 0)
limit 100 -- failsafe to catch any infinite loops
)
select
term.id as id, -- term group id
term.expression as expression, -- term writing (kanji)
term.reading as reading, -- term reading (kana)
deinflections.tags || ' ' || group_concat(tag.code, ' ') as tags, -- deinflection reasons + dictionary tags
deinflections.depth as depth, -- deinflection depth
rules as rules, -- deinflection rules (bitmask)
deinflections.original as original, -- original input
deinflections.string as deinflected -- deinflected input (search term)
from deinflections
inner join term on (term.expression = deinflections.string) or (term.reading = deinflections.string)
left join term_tag on term_tag.term_id = term.id
left join tag on term_tag.tag_id = tag.id
group by term.id, deinflections.original, deinflections.rules
having term.id is not null
)
select -- STEP 4: clean up and join `sort_overlay`
results.id,
results.expression,
results.reading,
results.tags,
group_concat(deinflection_rules.tag, ' ') as rules,
results.depth,
results.original,
results.deinflected,
root_overlay.sort as root_overlay,
user_overlay.sort as user_overlay
from results
left join deinflection_rules
on results.rules & deinflection_rules.mask != 0
left join sort_overlay
as root_overlay
on (root_overlay.expression = results.expression) and
(root_overlay.reading = results.reading) and
(root_overlay.user_id = 0)
left join sort_overlay
as user_overlay
on (user_overlay.expression = results.expression) and
(user_overlay.reading = results.reading) and
(user_overlay.user_id = (select id from user where username = :user))
group by results.id, results.original, results.rules;
|