initial public commit

author: lonkaars <loek@pipeframe.xyz> 2023-06-28 23:59:50 +0200
committer: lonkaars <loek@pipeframe.xyz> 2023-06-28 23:59:50 +0200
commit: 67dbb6421976254658c5e38045513129dd18187a (patch)
tree: 288b599d1097b26bdbcad3b6749b38e133017cf2 /db/dict/init.sql
1 files changed, 115 insertions, 0 deletions
diff --git a/db/dict/init.sql b/db/dict/init.sql
new file mode 100644
index 0000000..4e9fcc9
--- /dev/null
+++ b/db/dict/init.sql
@@ -0,0 +1,115 @@
+-- deinflection map (see dict/deinflections.sql)
+create table if not exists deinflection (
+	tag text not null,
+	kana_in text not null,
+	kana_out text not null,
+	rules_in int not null default 0,
+	rules_out int not null default 0
+);
+create index deinflection_kana_in on deinflection (kana_in);
+
+-- lookup table for deinflection rule bitmasks
+create table if not exists deinflection_rules (
+	mask int not null default 0,
+	tag text not null
+);
+
+-- dictionary metadata
+create table if not exists dict (
+	id integer primary key autoincrement,
+	tag text not null, -- TODO: dict.tag -> dict.tag_id
+	language text null default null,
+	priority int not null default 1
+);
+
+-- tag code<->id lookup
+create table if not exists tag (
+	id integer primary key autoincrement,
+	code text not null,
+	unique(code) on conflict ignore
+);
+create index tag_code on tag (code);
+
+
+-- definition of term (with single meaning)
+--
+-- terms that have multiple meanings should have multiple entries in the
+-- definition table with the same term_id. glossary contains an explanation or
+-- list of synonyms to illustrate the meaning of the term.
+create table if not exists definition (
+	id integer primary key autoincrement,
+	term_id int not null, -- reference to term (combined writing and reading)
+	sort int not null default 1, -- sort multiple meanings/glossaries within dictionary
+  -- search results for terms are grouped by term, then dictionary, then sort
+  -- within each dictionary
+	glossary text null default null, -- explanation or list of synonyms for term
+	dict_id int not null, -- origin dictionary of term
+	foreign key(term_id) references term(id),
+	foreign key(dict_id) references dict(id),
+	unique(glossary) on conflict ignore
+);
+
+-- human-readable tag label lookup table (used in UI and for translations)
+create table if not exists tag_label (
+	id integer primary key autoincrement,
+	tag_id int not null, -- reference to tag
+	language text not null default "en-US", -- label locale as RFC 5646 tag (e.g. ja)
+  -- label locale is used when displaying tags in a UI frontend. labels are
+  -- sorted based on langauge preference, with "en-US" being inserted last as a
+  -- fallback.
+	label_short text null default null, -- abbreviated label (e.g. ティアキン)
+	label text not null, -- full label (e.g. ティアーズ・オブ・ザ・キングダム)
+	description text null default null, -- short description (e.g. ゼルダの伝説シリーズより)
+	foreign key(tag_id) references tag(id)
+);
+
+-- standalone combination of kanji and reading
+--
+-- terms are added automatically when importing a dictionary, but are shared
+-- between dictionaries and thus not removed when disabling/removing a
+-- dictionary. terms are ranked outside the database, and the starting rank
+-- score can be changed using the user database (TODO)
+create table if not exists term (
+	id integer primary key autoincrement,
+	expression text not null, -- writing of term (e.g. 乗り越える)
+	reading text not null, -- reading of term (e.g. のりこえる)
+  alt int null default null, -- reference to main writing if this is is an alternate
+  -- alternate readings are stored as normal terms, but point to their main
+  -- reading. also used internally to store alternate readings of irregular
+  -- verbs example:
+  -- id  expression  reading  alt
+  -- 1   言葉        ことば   NULL
+  -- 2   詞          ことば   1
+  -- 3   辞          ことば   1
+  -- 4   来る        くる     NULL
+  -- 5   来た        きた     4
+  -- NOTE: alternate writings can technically be stored recursively this way,
+  -- but are only resolved one level deep TODO: resolve alternates
+  foreign key(alt) references term(id),
+	unique(expression, reading) on conflict ignore
+);
+create index term_expression on term (expression);
+create index term_reading on term (reading);
+-- TODO: (automatically) remove unused terms from db (using constraints?)
+
+
+-- allow many<->many relation between definition and tag
+create table if not exists definition_tag (
+	id integer primary key autoincrement,
+	definition_id int not null,
+	tag_id int not null,
+	foreign key(definition_id) references definition(id),
+	foreign key(tag_id) references tag(id),
+	unique(definition_id, tag_id) on conflict ignore
+);
+
+-- allow many<->many relation between term and tag
+create table if not exists term_tag (
+	id integer primary key autoincrement,
+	term_id int not null,
+	tag_id int not null,
+	foreign key(term_id) references term(id),
+	foreign key(tag_id) references tag(id),
+	unique(term_id, tag_id) on conflict ignore
+);
+
author	lonkaars <loek@pipeframe.xyz>	2023-06-28 23:59:50 +0200
committer	lonkaars <loek@pipeframe.xyz>	2023-06-28 23:59:50 +0200
commit	67dbb6421976254658c5e38045513129dd18187a (patch)
tree	288b599d1097b26bdbcad3b6749b38e133017cf2 /db/dict/init.sql