aboutsummaryrefslogtreecommitdiff
path: root/db/find.sql
blob: 1d312171f72bfbb34fe3d3f186ab820b115c76f4 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
-- this statement is prepared and run using :term and :user as inputs (see
-- db.ts or test/find)

-- this file is kind of messy because it needs to be one large query, instead
-- of separate phases creating temporary tables. queries with more than one
-- statement can't return results because of the way sqlite3 works.

-- TODO: add more comments in this file to explain what is going on

-- explain query plan -- testing only
with results(id, expression, reading, tags, depth, rules, original, deinflected) as (
	-- stripped deinflection table (remove some columns and duplicates)
	with deinflections(term, tags, depth, original, rules) as (
		-- recursively generated deinflection table
		with deinflect(length, term, tags, rules, rules_in, rules_out, depth) as (
			-- input term all substrings until length 1
			with inputs(length, term, tags, rules, rules_in, rules_out, depth) as (
				select length(:term), :term, '', -1, 0, 0, 0
				union
				select
					inputs.length - 1,
					substr(inputs.term, 1, inputs.length - 1),
					inputs.tags,
					inputs.rules,
					inputs.rules_in,
					inputs.rules_out,
					inputs.depth
				from inputs
				where inputs.length > 1
			)
			select * from inputs
			union -- join all recursive rows into one large table
			select
				deinflect.length,
				substr(deinflect.term, 1, length(deinflect.term)-length(deinflection.kana_in)) || deinflection.kana_out,
				deinflect.tags || ' ' || deinflection.tag, -- parsed to TokenTag[] on (sql) client-side
				deinflection.rules_out,
				deinflection.rules_in,
				deinflect.rules,
				deinflect.depth + 1
			from deinflect -- temp table
			inner join deinflection -- deinflection rules table
			on
				-- rules_in has to contain any of the current deconjugation rules
				(deinflect.rules & deinflection.rules_in != 0) and
				-- term.endsWith(kana_in)
				(substr(term, length(term) - length(kana_in) + 1) = kana_in) and
				-- can't deconjugate to length <1
				(length(term) > 0)
			limit 100 -- failsafe to catch any infinite loops
		)
		select term, tags, depth, substr(:term, 1, deinflect.length), rules
		from deinflect
	)
	select
		term.id,
		term.expression,
		term.reading,
		deinflections.tags || ' ' || group_concat(tag.code, ' ') as tags,
		deinflections.depth,
		rules,
		deinflections.original,
		deinflections.term
	from deinflections
	inner join term on (term.expression = deinflections.term) or (term.reading = deinflections.term)
	left join term_tag on term_tag.term_id = term.id
	left join tag on term_tag.tag_id = tag.id
	group by term.id, deinflections.original, deinflections.rules
	having term.id is not null
)
select
	results.id,
	results.expression,
	results.reading,
	results.tags,
	group_concat(deinflection_rules.tag, ' ') as rules,
	results.depth,
	results.original,
	results.deinflected,
	root_overlay.sort as root_overlay,
	user_overlay.sort as user_overlay
from results
left join deinflection_rules
	on results.rules & deinflection_rules.mask != 0
left join sort_overlay
	as root_overlay
	on (root_overlay.expression = results.expression) and
	(root_overlay.reading = results.reading) and
	(root_overlay.user_id = 0)
left join sort_overlay
	as user_overlay
	on (user_overlay.expression = results.expression) and
	(user_overlay.reading = results.reading) and
	(user_overlay.user_id = (select id from user where username = :user))
group by results.id, results.original, results.rules;