aboutsummaryrefslogtreecommitdiff
path: root/db/dict/template.sql
blob: 1a072527ada86979e59880b9e0dfc267c0f982a9 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
-- create temporary ingest table
drop table if exists ingest;
-- TODO: ingest pitch-accent dictionaries
-- TODO: ingest alternate writings (space-separated)
create temporary table ingest(
	-- term fields
	expression text not null, -- kanji of term (e.g. 読み込む)
	reading text not null, -- reading of term (e.g. よみこむ)
	term_tags text not null default '', -- space-separated *term* tags, merged if term already exists in DB

	-- definition fields
	glossary_sort int not null default 1, -- order of multiple meanings (glossaries)
	glossary text null default null, -- glossary content (support for basic HTML markup/styling)
	glossary_tags text null default null -- add tags to single glossary entry
);

-- #DICTIONARY_CONTENT_BEGIN
-- this template is 'rendered' by pasting a .dict.sql file in between these
-- DICTIONARY_CONTENT markers. the makefile can render these using the
-- following m4 code (called using m4 -P template.sql < any.dict.sql):
--
-- m4_undivert(`/dev/stdin')
--
-- this breaks when the first line of the input file is not a comment or empty
-- line, so the makefile accounts for this by concatenating an empty line with
-- the dict first. the runtime typescript dictionary importer handles this by
-- not calling m4 for this.
-- #DICTIONARY_CONTENT_END

-- create dict id
insert into dict (tag, language) values ('dict:' || :dict, :lang);

-- add terms
insert into term (expression, reading)
select expression, reading
from ingest;

-- add definitions
insert into definition (term_id, sort, glossary, dict_id)
select
	term.id,
	ingest.glossary_sort,
	ingest.glossary,
	(select id from dict where tag = 'dict:' || :dict)
from ingest
join term on term.expression = ingest.expression and term.reading = ingest.reading;

-- create map of term_id and tag code
drop table if exists term_tag_map;
create temporary table term_tag_map (term_id, tag);
insert into term_tag_map
with tag_map(term_id, temp, tag) as (
	select
		(select id from term where expression is ingest.expression and reading is ingest.reading),
		term_tags || ' ',
		''
	from ingest
	union
	select
		term_id,
		substr(temp, instr(temp, ' ') + 1),
		substr(temp, 0, instr(temp, ' '))
	from tag_map
	where length(temp) > 1
)
select term_id, replace(tag, ' ', '')
from tag_map
where length(tag) > 0;

-- create map of definition_id and tag code
drop table if exists definition_tag_map;
create temporary table definition_tag_map (definition_id, tag);
insert into definition_tag_map
with tag_map(definition_id, temp, tag) as (
	select
		(select id from definition where glossary is ingest.glossary),
		glossary_tags || ' ',
		''
	from ingest
	union
	select
		definition_id,
		substr(temp, instr(temp, ' ') + 1),
		substr(temp, 0, instr(temp, ' '))
	from tag_map
	where length(temp) > 1
)
select definition_id, replace(tag, ' ', '')
from tag_map
where length(tag) > 0;

-- make sure tags exist
insert into tag (code)
select tag from term_tag_map
union
select tag from definition_tag_map;

-- add tags to terms
insert into term_tag (term_id, tag_id)
select
	term_id,
	tag.id
from term_tag_map
join tag on tag.code = term_tag_map.tag;

-- add tags to definitions
insert into definition_tag (definition_id, tag_id)
select
	definition_id,
	tag.id
from definition_tag_map
join tag on tag.code = definition_tag_map.tag;