aboutsummaryrefslogtreecommitdiff
path: root/db/find.sql
blob: e2d6ad8fb20e0337914160e5ded6611668537f2e (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
-- this statement is prepared and run using :term and :user as inputs (see
-- db.ts or test/find)

-- this file is kind of messy because it needs to be one large query, instead
-- of separate phases creating temporary tables. queries with more than one
-- statement can't return results because of the way sqlite3 works.

-- TODO: add more comments in this file to explain what is going on

-- explain query plan -- testing only
with results(id, expression, reading, tags, depth, rules, original, deinflected) as (
  -- stripped deinflection table (remove some columns and duplicates)
  with deinflections(term, tags, depth, original, rules) as (
    -- recursively generated deinflection table
    with deinflect(length, term, tags, rules, rules_in, rules_out, depth) as (
      -- input term all substrings until length 1
      with inputs(length, term, tags, rules, rules_in, rules_out, depth) as (
        select length(:term), :term, '', -1, 0, 0, 0
        union
        select
          inputs.length - 1,
          substr(inputs.term, 1, inputs.length - 1),
          inputs.tags,
          inputs.rules,
          inputs.rules_in,
          inputs.rules_out,
          inputs.depth
        from inputs
        where inputs.length > 1
      )
      select * from inputs
      union -- join all recursive rows into one large table
      select
        deinflect.length,
        substr(deinflect.term, 1, length(deinflect.term)-length(deinflection.kana_in)) || deinflection.kana_out,
        deinflect.tags || ' ' || deinflection.tag, -- parsed to TokenTag[] on (sql) client-side
        deinflection.rules_out,
        deinflection.rules_in,
        deinflect.rules,
        deinflect.depth + 1
      from deinflect -- temp table
      inner join deinflection -- deinflection rules table
      on
        -- rules_in has to contain any of the current deconjugation rules
        (deinflect.rules & deinflection.rules_in != 0) and
        -- term.endsWith(kana_in)
        (substr(term, length(term) - length(kana_in) + 1) = kana_in) and
        -- can't deconjugate to length <1
        (length(term) > 0)
      limit 100 -- failsafe to catch any infinite loops
    )
    select term, tags, depth, substr(:term, 1, deinflect.length), rules
    from deinflect
  )
  select
    term.id,
    term.expression,
    term.reading,
    deinflections.tags || ' ' || group_concat(tag.code, ' ') as tags,
    deinflections.depth,
    rules,
    deinflections.original,
		deinflections.term
  from deinflections
  inner join term on (term.expression = deinflections.term) or (term.reading = deinflections.term)
  inner join term_tag on term_tag.term_id = term.id
  inner join tag on term_tag.tag_id = tag.id
  group by term.id, deinflections.original, deinflections.rules
  having term.id is not null
)
select
  results.id,
  results.expression,
  results.reading,
  results.tags,
  group_concat(deinflection_rules.tag, ' ') as rules,
  results.depth,
  results.original,
	results.deinflected,
	root_overlay.sort as root_overlay,
	user_overlay.sort as user_overlay
from results
left join deinflection_rules
	on results.rules & deinflection_rules.mask != 0
left join sort_overlay
	as root_overlay
	on (root_overlay.expression = results.expression) and
	(root_overlay.reading = results.reading) and
	(root_overlay.user_id = 0)
left join sort_overlay
	as user_overlay
	on (user_overlay.expression = results.expression) and
	(user_overlay.reading = results.reading) and
	(user_overlay.user_id = (select id from user where username = :user))
group by results.id, results.original, results.rules;