aboutsummaryrefslogtreecommitdiff
path: root/db/find.sql
blob: d678a159ae93e44150823fe840a10acbad1ff009 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
-- this statement is prepared and run using :term and :user as input_substrings (see
-- db.ts or test/find)

-- this file is kind of messy because it needs to be one large query, instead
-- of separate phases creating temporary tables. queries with more than one
-- statement can't return results because of the way sqlite3 works.
--
-- the innermost `with` clause contains step 1, the outermost contains step 3,
-- the last select being step 4. the steps are:
-- 1. Create all possible substrings from input `:term`
-- 2. Apply deconjugation rules recursively to `:term`
-- 3. Find terms matching deconjugations
-- 4. Clean up and add sort_overlay

-- STEP 3: match deconjugated strings to terms
with results as (
	-- STEP 2: apply definitions recursively
	with deinflections as (
		-- STEP 1: create all possible substrings from start of `:term`
		with input_substrings as (
			select -- first row of recursive query
				length(:term) as length, -- original term length
				:term         as string  -- original term
			union
			select -- recursive rows
				input_substrings.length - 1, -- length = length - 1
				substr(input_substrings.string, 1, input_substrings.length - 1) -- remove last character
			from input_substrings
			where input_substrings.length > 1 -- until only one character remains
		)
		select
			input_substrings.length as length,    -- input length
			input_substrings.string as string,    -- input substring
			input_substrings.string as original,  -- input substring (stays unmodified)
			''                      as tags,      -- space-separated deinflection tag list
			-1                      as rules,     -- bitmask rules (limits order of applied deinflection rules)
			0                       as rules_in,  -- allowed rules for current deinflection step
			0                       as rules_out, -- replacement rules for next deinflection step
			0                       as depth      -- amount of applied deconjugations (steps)
		from input_substrings
		union
		select
			deinflections.length, -- length (passthrough)
			substr(deinflections.string, 1, length(deinflections.string)-length(deinflection.kana_in)) || deinflection.kana_out, -- replace kana_in with kana_out
			deinflections.original, -- original (passthrough)
			deinflections.tags || ' ' || deinflection.tag, -- append deinflection reason tag(s)
			deinflection.rules_out, -- get next rules (pass rules_out to rules of next iteration)
			deinflection.rules_in, -- get rules_in
			deinflections.rules, -- rules (passthrough)
			deinflections.depth + 1 -- increment depth
		from deinflections -- recursive table
		inner join deinflection -- internal deinflection rules table
		on
			-- rules_in has to contain any of the current deconjugation rules
			(deinflections.rules & deinflection.rules_in != 0) and
			-- string.endsWith(kana_in)
			(substr(string, length(string) - length(kana_in) + 1) = kana_in) and
			-- can't deconjugate to length <1
			(length(string) > 0)
		limit 100 -- failsafe to catch any infinite loops
	)
	select
		term.id                                                  as id,         -- term group id
		term.expression                                          as expression, -- term writing (kanji)
		term.reading                                             as reading,    -- term reading (kana)
		deinflections.tags || ' ' || group_concat(tag.code, ' ') as tags,       -- deinflection reasons + dictionary tags
		deinflections.depth                                      as depth,      -- deinflection depth
		rules                                                    as rules,      -- deinflection rules (bitmask)
		deinflections.original                                   as original,   -- original input
		deinflections.string                                     as deinflected -- deinflected input (search term)
	from deinflections
	inner join term on (term.expression = deinflections.string) or (term.reading = deinflections.string)
	left join term_tag on term_tag.term_id = term.id
	left join tag on term_tag.tag_id = tag.id
	group by term.id, deinflections.original, deinflections.rules
	having term.id is not null
)
select -- STEP 4: clean up and join `sort_overlay`
	results.id,
	results.expression,
	results.reading,
	results.tags,
	group_concat(deinflection_rules.tag, ' ') as rules,
	results.depth,
	results.original,
	results.deinflected,
	root_overlay.sort as root_overlay,
	user_overlay.sort as user_overlay
from results
left join deinflection_rules
	on results.rules & deinflection_rules.mask != 0
left join sort_overlay
	as root_overlay
	on (root_overlay.expression = results.expression) and
	(root_overlay.reading = results.reading) and
	(root_overlay.user_id = 0)
left join sort_overlay
	as user_overlay
	on (user_overlay.expression = results.expression) and
	(user_overlay.reading = results.reading) and
	(user_overlay.user_id = (select id from user where username = :user))
group by results.id, results.original, results.rules;