aboutsummaryrefslogtreecommitdiff
path: root/db/dict/init.sql
blob: 3c6dc5096e6d8cdc222d796a5a2fed62bd220326 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
-- deinflection map (see dict/deinflections.sql)
create table if not exists deinflection (
	tag text not null,
	kana_in text not null,
	kana_out text not null,
	rules_in int not null default 0,
	rules_out int not null default 0
);
create index deinflection_kana_in on deinflection (kana_in);

-- lookup table for deinflection rule bitmasks
create table if not exists deinflection_rules (
	mask int not null default 0,
	tag text not null
);

-- dictionary metadata
create table if not exists dict (
	id integer primary key autoincrement,
	tag text not null, -- TODO: dict.tag -> dict.tag_id
	language text null default null,
	priority int not null default 1
);

-- tag code<->id lookup
create table if not exists tag (
	id integer primary key autoincrement,
	code text not null,
	unique(code) on conflict ignore
);
create index tag_code on tag (code);


-- definition of term (with single meaning)
--
-- terms that have multiple meanings should have multiple entries in the
-- definition table with the same term_id. glossary contains an explanation or
-- list of synonyms to illustrate the meaning of the term.
create table if not exists definition (
	id integer primary key autoincrement,
	term_id int not null, -- reference to term (combined writing and reading)
	sort int not null default 1, -- sort multiple meanings/glossaries within dictionary
  -- search results for terms are grouped by term, then dictionary, then sort
  -- within each dictionary
	glossary text null default null, -- explanation or list of synonyms for term
	dict_id int not null, -- origin dictionary of term
	foreign key(term_id) references term(id),
	foreign key(dict_id) references dict(id),
	unique(glossary) on conflict ignore
);

-- human-readable tag label lookup table (used in UI and for translations)
create table if not exists tag_label (
	id integer primary key autoincrement,
	tag_id int not null, -- reference to tag
	language text not null default "en-US", -- label locale as RFC 5646 tag (e.g. ja)
  -- label locale is used when displaying tags in a UI frontend. labels are
  -- sorted based on langauge preference, with "en-US" being inserted last as a
  -- fallback.
	label_short text null default null, -- abbreviated label (e.g. ティアキン)
	label text not null, -- full label (e.g. ティアーズオブキングダム)
	description text null default null, -- short description (e.g. ゼルダの伝説シリーズより)
	foreign key(tag_id) references tag(id)
);

-- standalone combination of kanji and reading
--
-- terms are added automatically when importing a dictionary, but are shared
-- between dictionaries and thus not removed when disabling/removing a
-- dictionary. terms are ranked outside the database, and the starting rank
-- score can be changed using the user database (TODO)
create table if not exists term (
	id integer primary key autoincrement,
	expression text not null, -- writing of term (e.g. 乗り越える)
	reading text not null, -- reading of term (e.g. のりこえる)
  alt int null default null, -- reference to main writing if this is is an alternate
  -- alternate readings are stored as normal terms, but point to their main
  -- reading. also used internally to store alternate readings of irregular
  -- verbs example:
  -- id  expression  reading  alt
  -- 1   言葉        ことば   NULL
  -- 2             ことば   1
  -- 3             ことば   1
  -- 4   来る        くる     NULL
  -- 5   来た        きた     4
  -- NOTE: alternate writings can technically be stored recursively this way,
  -- but are only resolved one level deep TODO: resolve alternates
  foreign key(alt) references term(id),
	unique(expression, reading) on conflict ignore
);
create index term_expression on term (expression);
create index term_reading on term (reading);
-- TODO: (automatically) remove unused terms from db (using constraints?)

-- allow many<->many relation between definition and tag
-- TODO: remove this table!
create table if not exists definition_tag (
	id integer primary key autoincrement,
	definition_id int not null,
	tag_id int not null,
	foreign key(definition_id) references definition(id),
	foreign key(tag_id) references tag(id),
	unique(definition_id, tag_id) on conflict ignore
);

-- allow many<->many relation between term and tag
create table if not exists term_tag (
	id integer primary key autoincrement,
	term_id int not null,
	tag_id int not null,
	foreign key(term_id) references term(id),
	foreign key(tag_id) references tag(id),
	unique(term_id, tag_id) on conflict ignore
);