aboutsummaryrefslogtreecommitdiff
path: root/db/dict/template.sql
blob: 6b17a0cfb751d5d6a02d193c130f677ac7fe2db2 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
-- create temporary ingest table
drop table if exists ingest;
create temporary table ingest(
	-- term fields
	expression text not null, -- kanji of term (e.g. 読み込む)
	reading text not null, -- reading of term (e.g. よみこむ)
	term_tags text not null default '', -- space-separated *term* tags, merged if term already exists in DB

	-- definition fields
	glossary_sort int not null default 1, -- order of multiple meanings (glossaries)
	glossary text null default null, -- glossary content (support for basic HTML markup/styling)
	glossary_tags text null default null -- add tags to single glossary entry
);
-- TODO: ingest pitch-accent dictionaries

-- create temporary alternate readings table
drop table if exists alts;
create temporary table alts(
	expression text not null, -- kanji of alternate version
	reading text not null, -- reading of alternate version
	normal_expression text not null, -- kanji of parent (original)
	normal_reading text not null -- reading of parent (original)
);

-- #DICTIONARY_CONTENT_BEGIN
-- this template is 'rendered' by pasting a .dict.sql file in between these
-- DICTIONARY_CONTENT markers. the makefile can render these using the
-- following m4 code (called using m4 -P template.sql < any.dict.sql):
--
-- m4_undivert(`/dev/stdin')
--
-- this breaks when the first line of the input file is not a comment or empty
-- line, so the makefile accounts for this by concatenating an empty line with
-- the dict first. the runtime typescript dictionary importer handles this by
-- not calling m4 for this.
-- #DICTIONARY_CONTENT_END

-- create dict id
insert into dict (tag, language) values ('dict:' || :dict, :lang);

-- add terms
insert into term (expression, reading)
select expression, reading
from ingest;

-- add alternates
insert into term (expression, reading, alt)
select expression, reading, (select id from term where expression = normal_expression and reading = normal_reading)
from alts;

-- add definitions
insert into definition (term_id, sort, glossary, dict_id)
select
	term.id,
	ingest.glossary_sort,
	ingest.glossary,
	(select id from dict where tag = 'dict:' || :dict)
from ingest
join term on term.expression = ingest.expression and term.reading = ingest.reading;

-- create map of term_id and tag code
drop table if exists term_tag_map;
create temporary table term_tag_map (term_id, tag);
insert into term_tag_map
with tag_map(term_id, temp, tag) as (
	select
		(select id from term where expression is ingest.expression and reading is ingest.reading),
		term_tags || ' ',
		''
	from ingest
	union
	select
		term_id,
		substr(temp, instr(temp, ' ') + 1),
		substr(temp, 0, instr(temp, ' '))
	from tag_map
	where length(temp) > 1
)
select term_id, replace(tag, ' ', '')
from tag_map
where length(tag) > 0;

-- create map of definition_id and tag code
drop table if exists definition_tag_map;
create temporary table definition_tag_map (definition_id, tag);
insert into definition_tag_map
with tag_map(definition_id, temp, tag) as (
	select
		(select id from definition where glossary is ingest.glossary),
		glossary_tags || ' ',
		''
	from ingest
	union
	select
		definition_id,
		substr(temp, instr(temp, ' ') + 1),
		substr(temp, 0, instr(temp, ' '))
	from tag_map
	where length(temp) > 1
)
select definition_id, replace(tag, ' ', '')
from tag_map
where length(tag) > 0;

-- make sure tags exist
insert into tag (code)
select tag from term_tag_map
union
select tag from definition_tag_map;

-- add tags to terms
insert into term_tag (term_id, tag_id)
select
	term_id,
	tag.id
from term_tag_map
join tag on tag.code = term_tag_map.tag;

-- add tags to alternates
insert into term_tag (term_id, tag_id)
select term_alt.id, term_tag.tag_id
from alts
inner join term as term_normal on term_normal.expression = alts.normal_expression and term_normal.reading = alts.normal_reading
inner join term as term_alt on term_alt.expression = alts.expression and term_alt.reading = alts.reading
left join term_tag on term_tag.term_id = term_normal.id
where term_tag.tag_id is not null;

-- add tags to definitions
insert into definition_tag (definition_id, tag_id)
select
	definition_id,
	tag.id
from definition_tag_map
join tag on tag.code = definition_tag_map.tag;