aboutsummaryrefslogtreecommitdiff
path: root/db
diff options
context:
space:
mode:
authorlonkaars <loek@pipeframe.xyz>2023-06-29 11:33:23 +0200
committerlonkaars <loek@pipeframe.xyz>2023-06-29 11:33:23 +0200
commitc998e1c0477d51c886f9e4246e102dec4d7ef8dd (patch)
tree4d979c57f16b138ff4b2ce5fb3151ce241af6881 /db
parent67dbb6421976254658c5e38045513129dd18187a (diff)
add jmdict importer to repo
Diffstat (limited to 'db')
-rw-r--r--db/dict/template.sql (renamed from db/dict/template.sql.m4)23
-rw-r--r--db/makefile14
2 files changed, 26 insertions, 11 deletions
diff --git a/db/dict/template.sql.m4 b/db/dict/template.sql
index 00de413..1a07252 100644
--- a/db/dict/template.sql.m4
+++ b/db/dict/template.sql
@@ -14,9 +14,18 @@ create temporary table ingest(
glossary_tags text null default null -- add tags to single glossary entry
);
-include(`/dev/stdin')dnl --'
--- the apostrophe is so my editor highlighting keeps working if I force the
--- filetype to sql instead of m4
+-- #DICTIONARY_CONTENT_BEGIN
+-- this template is 'rendered' by pasting a .dict.sql file in between these
+-- DICTIONARY_CONTENT markers. the makefile can render these using the
+-- following m4 code (called using m4 -P template.sql < any.dict.sql):
+--
+-- m4_undivert(`/dev/stdin')
+--
+-- this breaks when the first line of the input file is not a comment or empty
+-- line, so the makefile accounts for this by concatenating an empty line with
+-- the dict first. the runtime typescript dictionary importer handles this by
+-- not calling m4 for this.
+-- #DICTIONARY_CONTENT_END
-- create dict id
insert into dict (tag, language) values ('dict:' || :dict, :lang);
@@ -49,8 +58,8 @@ with tag_map(term_id, temp, tag) as (
union
select
term_id,
- `substr'(temp, instr(temp, ' ') + 1),
- `substr'(temp, 0, instr(temp, ' '))
+ substr(temp, instr(temp, ' ') + 1),
+ substr(temp, 0, instr(temp, ' '))
from tag_map
where length(temp) > 1
)
@@ -71,8 +80,8 @@ with tag_map(definition_id, temp, tag) as (
union
select
definition_id,
- `substr'(temp, instr(temp, ' ') + 1),
- `substr'(temp, 0, instr(temp, ' '))
+ substr(temp, instr(temp, ' ') + 1),
+ substr(temp, 0, instr(temp, ' '))
from tag_map
where length(temp) > 1
)
diff --git a/db/makefile b/db/makefile
index c1e527e..88d4bba 100644
--- a/db/makefile
+++ b/db/makefile
@@ -2,7 +2,13 @@ SQL = sqlite3
DICT_DB = dict.db
USER_DB = user.db
-DICT_TEMPLATE = dict/template.sql.m4
+DICT_TEMPLATE = dict/template.sql
+
+# comment any of these lines to disable including in the default DB
+DEFAULT_DICTS += dict/test_a.sql
+DEFAULT_DICTS += dict/test_b.sql
+DEFAULT_DICTS += dict/test_pitch_accent.sql
+DEFAULT_DICTS += ../import/jmdict/jmdict.sql
.PHONY: clean test
@@ -20,7 +26,7 @@ dict/base.sql: dict/reset.sql dict/init.sql dict/deinflections.sql dict/tags.sql
dict/full.sql: dict/base.sql dict/dict.sql
cat $^ > $@
-dict/dict.sql: dict/test_a.sql dict/test_b.sql dict/test_pitch_accent.sql dict/jmdict.sql
+dict/dict.sql: $(DEFAULT_DICTS)
cat $^ > $@
user/base.sql: user/reset.sql user/init.sql
@@ -30,11 +36,11 @@ user/full.sql: user/base.sql user/root.sql
cat $^ > $@
%.sql: %.dict.sql $(DICT_TEMPLATE)
- m4 $(DICT_TEMPLATE) < $< > $@
+ echo "" | cat - $< | m4 -P $(DICT_TEMPLATE) > $@
# delete generated sql files and database
clean:
- $(RM) $(DICT_DB) $(USER_DB) dict/base.sql dict/full.sql dict/dict.sql dict/test_a.sql dict/test_b.sql dict/test_pitch_accent.sql user/base.sql user/full.sql
+ $(RM) $(DICT_DB) $(USER_DB) dict/base.sql dict/full.sql dict/dict.sql $(DEFAULT_DICTS) user/base.sql user/full.sql
test: $(DICT_DB) find.sql
./test/find '浮上しました'