summaryrefslogtreecommitdiff
path: root/ext
diff options
context:
space:
mode:
authorDarius Jahandarie <djahandarie@gmail.com>2023-11-03 23:32:33 +0900
committerDarius Jahandarie <djahandarie@gmail.com>2023-11-03 23:52:30 +0900
commit376151096431d4362e4baaacf0cef4a534e169f7 (patch)
tree18536a224fd4d833aaf691d0363a3a4b59175dd7 /ext
parentb64f51c3b13a46af4dd7f1e43048ac19c781ca7b (diff)
Replace JsonSchema with ajv for dictionary validation
Diffstat (limited to 'ext')
-rw-r--r--ext/data/schemas/custom-audio-list-schema.json1
-rw-r--r--ext/data/schemas/dictionary-index-schema.json1
-rw-r--r--ext/data/schemas/dictionary-kanji-bank-v1-schema.json3
-rw-r--r--ext/data/schemas/dictionary-kanji-bank-v3-schema.json3
-rw-r--r--ext/data/schemas/dictionary-kanji-meta-bank-v3-schema.json1
-rw-r--r--ext/data/schemas/dictionary-tag-bank-v3-schema.json1
-rw-r--r--ext/data/schemas/dictionary-term-bank-v1-schema.json1
-rw-r--r--ext/data/schemas/dictionary-term-bank-v3-schema.json1
-rw-r--r--ext/data/schemas/dictionary-term-meta-bank-v3-schema.json1
-rw-r--r--ext/data/schemas/options-schema.json1
-rw-r--r--ext/js/language/dictionary-importer.js99
-rw-r--r--ext/lib/ucs2length.js16
12 files changed, 53 insertions, 76 deletions
diff --git a/ext/data/schemas/custom-audio-list-schema.json b/ext/data/schemas/custom-audio-list-schema.json
index 2cb3ca78..885ad087 100644
--- a/ext/data/schemas/custom-audio-list-schema.json
+++ b/ext/data/schemas/custom-audio-list-schema.json
@@ -1,4 +1,5 @@
{
+ "$id": "customAudioList",
"$schema": "http://json-schema.org/draft-07/schema#",
"type": "object",
"required": [
diff --git a/ext/data/schemas/dictionary-index-schema.json b/ext/data/schemas/dictionary-index-schema.json
index a8ca0f23..98b27143 100644
--- a/ext/data/schemas/dictionary-index-schema.json
+++ b/ext/data/schemas/dictionary-index-schema.json
@@ -1,4 +1,5 @@
{
+ "$id": "dictionaryIndex",
"$schema": "http://json-schema.org/draft-07/schema#",
"type": "object",
"description": "Index file containing information about the data contained in the dictionary.",
diff --git a/ext/data/schemas/dictionary-kanji-bank-v1-schema.json b/ext/data/schemas/dictionary-kanji-bank-v1-schema.json
index 5aca2d6a..d506a19d 100644
--- a/ext/data/schemas/dictionary-kanji-bank-v1-schema.json
+++ b/ext/data/schemas/dictionary-kanji-bank-v1-schema.json
@@ -1,4 +1,5 @@
{
+ "$id": "dictionaryKanjiBankV1",
"$schema": "http://json-schema.org/draft-07/schema#",
"type": "array",
"description": "Data file containing kanji information.",
@@ -30,4 +31,4 @@
"description": "A meaning for the kanji character."
}
}
-} \ No newline at end of file
+}
diff --git a/ext/data/schemas/dictionary-kanji-bank-v3-schema.json b/ext/data/schemas/dictionary-kanji-bank-v3-schema.json
index ee508294..763ce3b1 100644
--- a/ext/data/schemas/dictionary-kanji-bank-v3-schema.json
+++ b/ext/data/schemas/dictionary-kanji-bank-v3-schema.json
@@ -1,4 +1,5 @@
{
+ "$id": "dictionaryKanjiBankV3",
"$schema": "http://json-schema.org/draft-07/schema#",
"type": "array",
"description": "Data file containing kanji information.",
@@ -42,4 +43,4 @@
}
]
}
-} \ No newline at end of file
+}
diff --git a/ext/data/schemas/dictionary-kanji-meta-bank-v3-schema.json b/ext/data/schemas/dictionary-kanji-meta-bank-v3-schema.json
index e478de93..d8f5031b 100644
--- a/ext/data/schemas/dictionary-kanji-meta-bank-v3-schema.json
+++ b/ext/data/schemas/dictionary-kanji-meta-bank-v3-schema.json
@@ -1,4 +1,5 @@
{
+ "$id": "dictionaryKanjiMetaBankV3",
"$schema": "http://json-schema.org/draft-07/schema#",
"definitions": {
"frequency": {
diff --git a/ext/data/schemas/dictionary-tag-bank-v3-schema.json b/ext/data/schemas/dictionary-tag-bank-v3-schema.json
index f7721119..ab6e3377 100644
--- a/ext/data/schemas/dictionary-tag-bank-v3-schema.json
+++ b/ext/data/schemas/dictionary-tag-bank-v3-schema.json
@@ -1,4 +1,5 @@
{
+ "$id": "dictionaryTagBankV3",
"$schema": "http://json-schema.org/draft-07/schema#",
"type": "array",
"description": "Data file containing tag information for terms and kanji.",
diff --git a/ext/data/schemas/dictionary-term-bank-v1-schema.json b/ext/data/schemas/dictionary-term-bank-v1-schema.json
index 9366e9ff..ab4c49f6 100644
--- a/ext/data/schemas/dictionary-term-bank-v1-schema.json
+++ b/ext/data/schemas/dictionary-term-bank-v1-schema.json
@@ -1,4 +1,5 @@
{
+ "$id": "dictionaryTermBankV1",
"$schema": "http://json-schema.org/draft-07/schema#",
"type": "array",
"description": "Data file containing term information.",
diff --git a/ext/data/schemas/dictionary-term-bank-v3-schema.json b/ext/data/schemas/dictionary-term-bank-v3-schema.json
index 335144c7..7d0b4868 100644
--- a/ext/data/schemas/dictionary-term-bank-v3-schema.json
+++ b/ext/data/schemas/dictionary-term-bank-v3-schema.json
@@ -1,4 +1,5 @@
{
+ "$id": "dictionaryTermBankV3",
"$schema": "http://json-schema.org/draft-07/schema#",
"definitions": {
"structuredContent": {
diff --git a/ext/data/schemas/dictionary-term-meta-bank-v3-schema.json b/ext/data/schemas/dictionary-term-meta-bank-v3-schema.json
index eb4d3fed..86e4af93 100644
--- a/ext/data/schemas/dictionary-term-meta-bank-v3-schema.json
+++ b/ext/data/schemas/dictionary-term-meta-bank-v3-schema.json
@@ -1,4 +1,5 @@
{
+ "$id": "dictionaryTermMetaBankV3",
"$schema": "http://json-schema.org/draft-07/schema#",
"definitions": {
"frequency": {
diff --git a/ext/data/schemas/options-schema.json b/ext/data/schemas/options-schema.json
index 601f5d06..8ccbfa94 100644
--- a/ext/data/schemas/options-schema.json
+++ b/ext/data/schemas/options-schema.json
@@ -1,4 +1,5 @@
{
+ "$id": "options",
"$schema": "http://json-schema.org/draft-07/schema#",
"type": "object",
"required": [
diff --git a/ext/js/language/dictionary-importer.js b/ext/js/language/dictionary-importer.js
index 718d9f1c..0cf3d5f5 100644
--- a/ext/js/language/dictionary-importer.js
+++ b/ext/js/language/dictionary-importer.js
@@ -18,7 +18,6 @@
/* global
* JSZip
- * JsonSchema
* MediaUtil
*/
@@ -51,8 +50,10 @@ class DictionaryImporter {
const index = JSON.parse(await indexFile.async('string'));
- const indexSchema = await this._getSchema('/data/schemas/dictionary-index-schema.json');
- this._validateJsonSchema(index, indexSchema, indexFileName);
+ const ajvSchemas = await import('/lib/validate-schemas.js');
+ if (!ajvSchemas.dictionaryIndex(index)) {
+ throw this._formatAjvSchemaError(ajvSchemas.dictionaryIndex, indexFileName);
+ }
const dictionaryTitle = index.title;
const version = index.format || index.version;
@@ -75,8 +76,7 @@ class DictionaryImporter {
// Load schemas
this._progressNextStep(0);
- const dataBankSchemaPaths = this._getDataBankSchemaPaths(version);
- const dataBankSchemas = await Promise.all(dataBankSchemaPaths.map((path) => this._getSchema(path)));
+ const dataBankSchemas = this._getDataBankSchemas(version);
// Files
const termFiles = this._getArchiveFiles(archive, 'term_bank_?.json');
@@ -87,11 +87,11 @@ class DictionaryImporter {
// Load data
this._progressNextStep(termFiles.length + termMetaFiles.length + kanjiFiles.length + kanjiMetaFiles.length + tagFiles.length);
- const termList = await this._readFileSequence(termFiles, convertTermBankEntry, dataBankSchemas[0], dictionaryTitle);
- const termMetaList = await this._readFileSequence(termMetaFiles, convertTermMetaBankEntry, dataBankSchemas[1], dictionaryTitle);
- const kanjiList = await this._readFileSequence(kanjiFiles, convertKanjiBankEntry, dataBankSchemas[2], dictionaryTitle);
- const kanjiMetaList = await this._readFileSequence(kanjiMetaFiles, convertKanjiMetaBankEntry, dataBankSchemas[3], dictionaryTitle);
- const tagList = await this._readFileSequence(tagFiles, convertTagBankEntry, dataBankSchemas[4], dictionaryTitle);
+ const termList = await this._readFileSequence(ajvSchemas, termFiles, convertTermBankEntry, dataBankSchemas[0], dictionaryTitle);
+ const termMetaList = await this._readFileSequence(ajvSchemas, termMetaFiles, convertTermMetaBankEntry, dataBankSchemas[1], dictionaryTitle);
+ const kanjiList = await this._readFileSequence(ajvSchemas, kanjiFiles, convertKanjiBankEntry, dataBankSchemas[2], dictionaryTitle);
+ const kanjiMetaList = await this._readFileSequence(ajvSchemas, kanjiMetaFiles, convertKanjiMetaBankEntry, dataBankSchemas[3], dictionaryTitle);
+ const tagList = await this._readFileSequence(ajvSchemas, tagFiles, convertTagBankEntry, dataBankSchemas[4], dictionaryTitle);
this._addOldIndexTags(index, tagList, dictionaryTitle);
// Prefix wildcard support
@@ -214,68 +214,27 @@ class DictionaryImporter {
return summary;
}
- async _getSchema(fileName) {
- const schema = await this._fetchJsonAsset(fileName);
- return new JsonSchema(schema);
- }
-
- _validateJsonSchema(value, schema, fileName) {
- try {
- schema.validate(value);
- } catch (e) {
- throw this._formatSchemaError(e, fileName);
- }
- }
-
- _formatSchemaError(e, fileName) {
- const valuePathString = this._getSchemaErrorPathString(e.valueStack, 'dictionary');
- const schemaPathString = this._getSchemaErrorPathString(e.schemaStack, 'schema');
-
- const e2 = new Error(`Dictionary has invalid data in '${fileName}' for value '${valuePathString}', validated against '${schemaPathString}': ${e.message}`);
- e2.data = e;
+ _formatAjvSchemaError(schema, fileName) {
+ const e2 = new Error(`Dictionary has invalid data in '${fileName}'`);
+ e2.data = schema.errors;
return e2;
}
- _getSchemaErrorPathString(infoList, base='') {
- let result = base;
- for (const {path} of infoList) {
- const pathArray = Array.isArray(path) ? path : [path];
- for (const pathPart of pathArray) {
- if (pathPart === null) {
- result = base;
- } else {
- switch (typeof pathPart) {
- case 'string':
- if (result.length > 0) {
- result += '.';
- }
- result += pathPart;
- break;
- case 'number':
- result += `[${pathPart}]`;
- break;
- }
- }
- }
- }
- return result;
- }
-
- _getDataBankSchemaPaths(version) {
+ _getDataBankSchemas(version) {
const termBank = (
version === 1 ?
- '/data/schemas/dictionary-term-bank-v1-schema.json' :
- '/data/schemas/dictionary-term-bank-v3-schema.json'
+ 'dictionaryTermBankV1' :
+ 'dictionaryTermBankV3'
);
- const termMetaBank = '/data/schemas/dictionary-term-meta-bank-v3-schema.json';
+ const termMetaBank = 'dictionaryTermMetaBankV3';
const kanjiBank = (
version === 1 ?
- '/data/schemas/dictionary-kanji-bank-v1-schema.json' :
- '/data/schemas/dictionary-kanji-bank-v3-schema.json'
+ 'dictionaryKanjiBankV1' :
+ 'dictionaryKanjiBankV3'
);
- const kanjiMetaBank = '/data/schemas/dictionary-kanji-meta-bank-v3-schema.json';
- const tagBank = '/data/schemas/dictionary-tag-bank-v3-schema.json';
+ const kanjiMetaBank = 'dictionaryKanjiMetaBankV3';
+ const tagBank = 'dictionaryTagBankV3';
return [termBank, termMetaBank, kanjiBank, kanjiMetaBank, tagBank];
}
@@ -539,28 +498,20 @@ class DictionaryImporter {
return results;
}
- async _readFileSequence(files, convertEntry, schema, dictionaryTitle) {
+ async _readFileSequence(ajvSchemas, files, convertEntry, schemaName, dictionaryTitle) {
const progressData = this._progressData;
- let count = 0;
let startIndex = 0;
- if (typeof this._onProgress === 'function') {
- schema.progressInterval = 1000;
- schema.progress = (s) => {
- const index = s.getValueStackLength() > 1 ? s.getValueStackItem(1).path : 0;
- progressData.index = startIndex + (index / count);
- this._progress();
- };
- }
const results = [];
for (const file of files) {
const entries = JSON.parse(await file.async('string'));
- count = Array.isArray(entries) ? Math.max(entries.length, 1) : 1;
startIndex = progressData.index;
this._progress();
- this._validateJsonSchema(entries, schema, file.name);
+ if (!ajvSchemas[schemaName](entries)) {
+ throw this._formatAjvSchemaError(ajvSchemas[schemaName], file.name);
+ }
progressData.index = startIndex + 1;
this._progress();
diff --git a/ext/lib/ucs2length.js b/ext/lib/ucs2length.js
new file mode 100644
index 00000000..120a64d4
--- /dev/null
+++ b/ext/lib/ucs2length.js
@@ -0,0 +1,16 @@
+export default function ucs2length(str) {
+ const len = str.length;
+ let length = 0;
+ let pos = 0;
+ let value;
+ while (pos < len) {
+ length++;
+ value = str.charCodeAt(pos++);
+ if (value >= 0xd800 && value <= 0xdbff && pos < len) {
+ // high surrogate, and there is a next character
+ value = str.charCodeAt(pos);
+ if ((value & 0xfc00) === 0xdc00) pos++; // low surrogate
+ }
+ }
+ return length;
+}