summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDarius Jahandarie <djahandarie@gmail.com>2023-11-03 23:32:33 +0900
committerDarius Jahandarie <djahandarie@gmail.com>2023-11-03 23:52:30 +0900
commit376151096431d4362e4baaacf0cef4a534e169f7 (patch)
tree18536a224fd4d833aaf691d0363a3a4b59175dd7
parentb64f51c3b13a46af4dd7f1e43048ac19c781ca7b (diff)
Replace JsonSchema with ajv for dictionary validation
-rw-r--r--.eslintrc.json5
-rw-r--r--.gitignore1
-rw-r--r--dev/build.js16
-rw-r--r--ext/data/schemas/custom-audio-list-schema.json1
-rw-r--r--ext/data/schemas/dictionary-index-schema.json1
-rw-r--r--ext/data/schemas/dictionary-kanji-bank-v1-schema.json3
-rw-r--r--ext/data/schemas/dictionary-kanji-bank-v3-schema.json3
-rw-r--r--ext/data/schemas/dictionary-kanji-meta-bank-v3-schema.json1
-rw-r--r--ext/data/schemas/dictionary-tag-bank-v3-schema.json1
-rw-r--r--ext/data/schemas/dictionary-term-bank-v1-schema.json1
-rw-r--r--ext/data/schemas/dictionary-term-bank-v3-schema.json1
-rw-r--r--ext/data/schemas/dictionary-term-meta-bank-v3-schema.json1
-rw-r--r--ext/data/schemas/options-schema.json1
-rw-r--r--ext/js/language/dictionary-importer.js99
-rw-r--r--ext/lib/ucs2length.js16
-rw-r--r--package-lock.json33
-rw-r--r--package.json3
17 files changed, 106 insertions, 81 deletions
diff --git a/.eslintrc.json b/.eslintrc.json
index 56bbcf09..a7fb842b 100644
--- a/.eslintrc.json
+++ b/.eslintrc.json
@@ -5,7 +5,7 @@
"plugin:jsonc/recommended-with-json"
],
"parserOptions": {
- "ecmaVersion": 9,
+ "ecmaVersion": 11,
"sourceType": "script",
"ecmaFeatures": {
"globalReturn": false,
@@ -401,7 +401,8 @@
"DynamicProperty": "readonly",
"EventDispatcher": "readonly",
"EventListenerCollection": "readonly",
- "Logger": "readonly"
+ "Logger": "readonly",
+ "import": "readonly"
}
},
{
diff --git a/.gitignore b/.gitignore
index 405fead0..426db4ad 100644
--- a/.gitignore
+++ b/.gitignore
@@ -7,3 +7,4 @@ dictionaries/
/playwright/.cache/
/test/playwright/__screenshots__/
ext/manifest.json
+ext/lib/validate-schemas.js
diff --git a/dev/build.js b/dev/build.js
index 24b1e2d0..3bfb5418 100644
--- a/dev/build.js
+++ b/dev/build.js
@@ -24,7 +24,8 @@ const childProcess = require('child_process');
const util = require('./util');
const {getAllFiles, getArgs, testMain} = util;
const {ManifestUtil} = require('./manifest-util');
-
+const Ajv = require('ajv');
+const standaloneCode = require('ajv/dist/standalone').default;
async function createZip(directory, excludeFiles, outputFileName, sevenZipExes, onUpdate, dryRun) {
try {
@@ -130,6 +131,19 @@ async function build(buildDir, extDir, manifestUtil, variantNames, manifestPath,
process.stdout.write(message);
};
+ process.stdout.write('Building schema validators using ajv\n');
+ const schemaDir = path.join(extDir, 'data/schemas/');
+ const schemaFileNames = fs.readdirSync(schemaDir);
+ const schemas = schemaFileNames.map((schemaFileName) => JSON.parse(fs.readFileSync(path.join(schemaDir, schemaFileName))));
+ const ajv = new Ajv({schemas: schemas, code: {source: true, esm: true}});
+ const moduleCode = standaloneCode(ajv);
+
+ // https://github.com/ajv-validator/ajv/issues/2209
+ const patchedModuleCode = moduleCode.replaceAll('require("ajv/dist/runtime/ucs2length").default', 'import("/lib/ucs2length.js").default');
+
+ fs.writeFileSync(path.join(extDir, 'lib/validate-schemas.js'), patchedModuleCode);
+
+
process.stdout.write(`Version: ${yomitanVersion}...\n`);
for (const variantName of variantNames) {
diff --git a/ext/data/schemas/custom-audio-list-schema.json b/ext/data/schemas/custom-audio-list-schema.json
index 2cb3ca78..885ad087 100644
--- a/ext/data/schemas/custom-audio-list-schema.json
+++ b/ext/data/schemas/custom-audio-list-schema.json
@@ -1,4 +1,5 @@
{
+ "$id": "customAudioList",
"$schema": "http://json-schema.org/draft-07/schema#",
"type": "object",
"required": [
diff --git a/ext/data/schemas/dictionary-index-schema.json b/ext/data/schemas/dictionary-index-schema.json
index a8ca0f23..98b27143 100644
--- a/ext/data/schemas/dictionary-index-schema.json
+++ b/ext/data/schemas/dictionary-index-schema.json
@@ -1,4 +1,5 @@
{
+ "$id": "dictionaryIndex",
"$schema": "http://json-schema.org/draft-07/schema#",
"type": "object",
"description": "Index file containing information about the data contained in the dictionary.",
diff --git a/ext/data/schemas/dictionary-kanji-bank-v1-schema.json b/ext/data/schemas/dictionary-kanji-bank-v1-schema.json
index 5aca2d6a..d506a19d 100644
--- a/ext/data/schemas/dictionary-kanji-bank-v1-schema.json
+++ b/ext/data/schemas/dictionary-kanji-bank-v1-schema.json
@@ -1,4 +1,5 @@
{
+ "$id": "dictionaryKanjiBankV1",
"$schema": "http://json-schema.org/draft-07/schema#",
"type": "array",
"description": "Data file containing kanji information.",
@@ -30,4 +31,4 @@
"description": "A meaning for the kanji character."
}
}
-} \ No newline at end of file
+}
diff --git a/ext/data/schemas/dictionary-kanji-bank-v3-schema.json b/ext/data/schemas/dictionary-kanji-bank-v3-schema.json
index ee508294..763ce3b1 100644
--- a/ext/data/schemas/dictionary-kanji-bank-v3-schema.json
+++ b/ext/data/schemas/dictionary-kanji-bank-v3-schema.json
@@ -1,4 +1,5 @@
{
+ "$id": "dictionaryKanjiBankV3",
"$schema": "http://json-schema.org/draft-07/schema#",
"type": "array",
"description": "Data file containing kanji information.",
@@ -42,4 +43,4 @@
}
]
}
-} \ No newline at end of file
+}
diff --git a/ext/data/schemas/dictionary-kanji-meta-bank-v3-schema.json b/ext/data/schemas/dictionary-kanji-meta-bank-v3-schema.json
index e478de93..d8f5031b 100644
--- a/ext/data/schemas/dictionary-kanji-meta-bank-v3-schema.json
+++ b/ext/data/schemas/dictionary-kanji-meta-bank-v3-schema.json
@@ -1,4 +1,5 @@
{
+ "$id": "dictionaryKanjiMetaBankV3",
"$schema": "http://json-schema.org/draft-07/schema#",
"definitions": {
"frequency": {
diff --git a/ext/data/schemas/dictionary-tag-bank-v3-schema.json b/ext/data/schemas/dictionary-tag-bank-v3-schema.json
index f7721119..ab6e3377 100644
--- a/ext/data/schemas/dictionary-tag-bank-v3-schema.json
+++ b/ext/data/schemas/dictionary-tag-bank-v3-schema.json
@@ -1,4 +1,5 @@
{
+ "$id": "dictionaryTagBankV3",
"$schema": "http://json-schema.org/draft-07/schema#",
"type": "array",
"description": "Data file containing tag information for terms and kanji.",
diff --git a/ext/data/schemas/dictionary-term-bank-v1-schema.json b/ext/data/schemas/dictionary-term-bank-v1-schema.json
index 9366e9ff..ab4c49f6 100644
--- a/ext/data/schemas/dictionary-term-bank-v1-schema.json
+++ b/ext/data/schemas/dictionary-term-bank-v1-schema.json
@@ -1,4 +1,5 @@
{
+ "$id": "dictionaryTermBankV1",
"$schema": "http://json-schema.org/draft-07/schema#",
"type": "array",
"description": "Data file containing term information.",
diff --git a/ext/data/schemas/dictionary-term-bank-v3-schema.json b/ext/data/schemas/dictionary-term-bank-v3-schema.json
index 335144c7..7d0b4868 100644
--- a/ext/data/schemas/dictionary-term-bank-v3-schema.json
+++ b/ext/data/schemas/dictionary-term-bank-v3-schema.json
@@ -1,4 +1,5 @@
{
+ "$id": "dictionaryTermBankV3",
"$schema": "http://json-schema.org/draft-07/schema#",
"definitions": {
"structuredContent": {
diff --git a/ext/data/schemas/dictionary-term-meta-bank-v3-schema.json b/ext/data/schemas/dictionary-term-meta-bank-v3-schema.json
index eb4d3fed..86e4af93 100644
--- a/ext/data/schemas/dictionary-term-meta-bank-v3-schema.json
+++ b/ext/data/schemas/dictionary-term-meta-bank-v3-schema.json
@@ -1,4 +1,5 @@
{
+ "$id": "dictionaryTermMetaBankV3",
"$schema": "http://json-schema.org/draft-07/schema#",
"definitions": {
"frequency": {
diff --git a/ext/data/schemas/options-schema.json b/ext/data/schemas/options-schema.json
index 601f5d06..8ccbfa94 100644
--- a/ext/data/schemas/options-schema.json
+++ b/ext/data/schemas/options-schema.json
@@ -1,4 +1,5 @@
{
+ "$id": "options",
"$schema": "http://json-schema.org/draft-07/schema#",
"type": "object",
"required": [
diff --git a/ext/js/language/dictionary-importer.js b/ext/js/language/dictionary-importer.js
index 718d9f1c..0cf3d5f5 100644
--- a/ext/js/language/dictionary-importer.js
+++ b/ext/js/language/dictionary-importer.js
@@ -18,7 +18,6 @@
/* global
* JSZip
- * JsonSchema
* MediaUtil
*/
@@ -51,8 +50,10 @@ class DictionaryImporter {
const index = JSON.parse(await indexFile.async('string'));
- const indexSchema = await this._getSchema('/data/schemas/dictionary-index-schema.json');
- this._validateJsonSchema(index, indexSchema, indexFileName);
+ const ajvSchemas = await import('/lib/validate-schemas.js');
+ if (!ajvSchemas.dictionaryIndex(index)) {
+ throw this._formatAjvSchemaError(ajvSchemas.dictionaryIndex, indexFileName);
+ }
const dictionaryTitle = index.title;
const version = index.format || index.version;
@@ -75,8 +76,7 @@ class DictionaryImporter {
// Load schemas
this._progressNextStep(0);
- const dataBankSchemaPaths = this._getDataBankSchemaPaths(version);
- const dataBankSchemas = await Promise.all(dataBankSchemaPaths.map((path) => this._getSchema(path)));
+ const dataBankSchemas = this._getDataBankSchemas(version);
// Files
const termFiles = this._getArchiveFiles(archive, 'term_bank_?.json');
@@ -87,11 +87,11 @@ class DictionaryImporter {
// Load data
this._progressNextStep(termFiles.length + termMetaFiles.length + kanjiFiles.length + kanjiMetaFiles.length + tagFiles.length);
- const termList = await this._readFileSequence(termFiles, convertTermBankEntry, dataBankSchemas[0], dictionaryTitle);
- const termMetaList = await this._readFileSequence(termMetaFiles, convertTermMetaBankEntry, dataBankSchemas[1], dictionaryTitle);
- const kanjiList = await this._readFileSequence(kanjiFiles, convertKanjiBankEntry, dataBankSchemas[2], dictionaryTitle);
- const kanjiMetaList = await this._readFileSequence(kanjiMetaFiles, convertKanjiMetaBankEntry, dataBankSchemas[3], dictionaryTitle);
- const tagList = await this._readFileSequence(tagFiles, convertTagBankEntry, dataBankSchemas[4], dictionaryTitle);
+ const termList = await this._readFileSequence(ajvSchemas, termFiles, convertTermBankEntry, dataBankSchemas[0], dictionaryTitle);
+ const termMetaList = await this._readFileSequence(ajvSchemas, termMetaFiles, convertTermMetaBankEntry, dataBankSchemas[1], dictionaryTitle);
+ const kanjiList = await this._readFileSequence(ajvSchemas, kanjiFiles, convertKanjiBankEntry, dataBankSchemas[2], dictionaryTitle);
+ const kanjiMetaList = await this._readFileSequence(ajvSchemas, kanjiMetaFiles, convertKanjiMetaBankEntry, dataBankSchemas[3], dictionaryTitle);
+ const tagList = await this._readFileSequence(ajvSchemas, tagFiles, convertTagBankEntry, dataBankSchemas[4], dictionaryTitle);
this._addOldIndexTags(index, tagList, dictionaryTitle);
// Prefix wildcard support
@@ -214,68 +214,27 @@ class DictionaryImporter {
return summary;
}
- async _getSchema(fileName) {
- const schema = await this._fetchJsonAsset(fileName);
- return new JsonSchema(schema);
- }
-
- _validateJsonSchema(value, schema, fileName) {
- try {
- schema.validate(value);
- } catch (e) {
- throw this._formatSchemaError(e, fileName);
- }
- }
-
- _formatSchemaError(e, fileName) {
- const valuePathString = this._getSchemaErrorPathString(e.valueStack, 'dictionary');
- const schemaPathString = this._getSchemaErrorPathString(e.schemaStack, 'schema');
-
- const e2 = new Error(`Dictionary has invalid data in '${fileName}' for value '${valuePathString}', validated against '${schemaPathString}': ${e.message}`);
- e2.data = e;
+ _formatAjvSchemaError(schema, fileName) {
+ const e2 = new Error(`Dictionary has invalid data in '${fileName}'`);
+ e2.data = schema.errors;
return e2;
}
- _getSchemaErrorPathString(infoList, base='') {
- let result = base;
- for (const {path} of infoList) {
- const pathArray = Array.isArray(path) ? path : [path];
- for (const pathPart of pathArray) {
- if (pathPart === null) {
- result = base;
- } else {
- switch (typeof pathPart) {
- case 'string':
- if (result.length > 0) {
- result += '.';
- }
- result += pathPart;
- break;
- case 'number':
- result += `[${pathPart}]`;
- break;
- }
- }
- }
- }
- return result;
- }
-
- _getDataBankSchemaPaths(version) {
+ _getDataBankSchemas(version) {
const termBank = (
version === 1 ?
- '/data/schemas/dictionary-term-bank-v1-schema.json' :
- '/data/schemas/dictionary-term-bank-v3-schema.json'
+ 'dictionaryTermBankV1' :
+ 'dictionaryTermBankV3'
);
- const termMetaBank = '/data/schemas/dictionary-term-meta-bank-v3-schema.json';
+ const termMetaBank = 'dictionaryTermMetaBankV3';
const kanjiBank = (
version === 1 ?
- '/data/schemas/dictionary-kanji-bank-v1-schema.json' :
- '/data/schemas/dictionary-kanji-bank-v3-schema.json'
+ 'dictionaryKanjiBankV1' :
+ 'dictionaryKanjiBankV3'
);
- const kanjiMetaBank = '/data/schemas/dictionary-kanji-meta-bank-v3-schema.json';
- const tagBank = '/data/schemas/dictionary-tag-bank-v3-schema.json';
+ const kanjiMetaBank = 'dictionaryKanjiMetaBankV3';
+ const tagBank = 'dictionaryTagBankV3';
return [termBank, termMetaBank, kanjiBank, kanjiMetaBank, tagBank];
}
@@ -539,28 +498,20 @@ class DictionaryImporter {
return results;
}
- async _readFileSequence(files, convertEntry, schema, dictionaryTitle) {
+ async _readFileSequence(ajvSchemas, files, convertEntry, schemaName, dictionaryTitle) {
const progressData = this._progressData;
- let count = 0;
let startIndex = 0;
- if (typeof this._onProgress === 'function') {
- schema.progressInterval = 1000;
- schema.progress = (s) => {
- const index = s.getValueStackLength() > 1 ? s.getValueStackItem(1).path : 0;
- progressData.index = startIndex + (index / count);
- this._progress();
- };
- }
const results = [];
for (const file of files) {
const entries = JSON.parse(await file.async('string'));
- count = Array.isArray(entries) ? Math.max(entries.length, 1) : 1;
startIndex = progressData.index;
this._progress();
- this._validateJsonSchema(entries, schema, file.name);
+ if (!ajvSchemas[schemaName](entries)) {
+ throw this._formatAjvSchemaError(ajvSchemas[schemaName], file.name);
+ }
progressData.index = startIndex + 1;
this._progress();
diff --git a/ext/lib/ucs2length.js b/ext/lib/ucs2length.js
new file mode 100644
index 00000000..120a64d4
--- /dev/null
+++ b/ext/lib/ucs2length.js
@@ -0,0 +1,16 @@
+export default function ucs2length(str) {
+ const len = str.length;
+ let length = 0;
+ let pos = 0;
+ let value;
+ while (pos < len) {
+ length++;
+ value = str.charCodeAt(pos++);
+ if (value >= 0xd800 && value <= 0xdbff && pos < len) {
+ // high surrogate, and there is a next character
+ value = str.charCodeAt(pos);
+ if ((value & 0xfc00) === 0xdc00) pos++; // low surrogate
+ }
+ }
+ return length;
+}
diff --git a/package-lock.json b/package-lock.json
index 24e49c86..6f6581b7 100644
--- a/package-lock.json
+++ b/package-lock.json
@@ -11,7 +11,8 @@
"license": "GPL-3.0-or-later",
"devDependencies": {
"@playwright/test": "^1.39.0",
- "ajv": "^8.11.0",
+ "@types/node": "^20.8.10",
+ "ajv": "^8.12.0",
"browserify": "^17.0.0",
"css": "^3.0.0",
"eslint": "^8.52.0",
@@ -523,6 +524,15 @@
"integrity": "sha512-jhuKLIRrhvCPLqwPcx6INqmKeiA5EWrsCOPhrlFSrbrmU4ZMPjj5Ul/oLCMDO98XRUIwVm78xICz4EPCektzeQ==",
"dev": true
},
+ "node_modules/@types/node": {
+ "version": "20.8.10",
+ "resolved": "https://registry.npmjs.org/@types/node/-/node-20.8.10.tgz",
+ "integrity": "sha512-TlgT8JntpcbmKUFzjhsyhGfP2fsiz1Mv56im6enJ905xG1DAYesxJaeSbGqQmAw8OWPdhyJGhGSQGKRNJ45u9w==",
+ "dev": true,
+ "dependencies": {
+ "undici-types": "~5.26.4"
+ }
+ },
"node_modules/@types/normalize-package-data": {
"version": "2.4.1",
"resolved": "https://registry.npmjs.org/@types/normalize-package-data/-/normalize-package-data-2.4.1.tgz",
@@ -5246,6 +5256,12 @@
"undeclared-identifiers": "bin.js"
}
},
+ "node_modules/undici-types": {
+ "version": "5.26.5",
+ "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-5.26.5.tgz",
+ "integrity": "sha512-JlCMO+ehdEIKqlFxk6IfVoAUVmgz7cU7zD/h9XZ0qzeosSHmUJVOzSQvvYSYWXkFXC+IfLKSIffhv0sVZup6pA==",
+ "dev": true
+ },
"node_modules/universalify": {
"version": "0.2.0",
"resolved": "https://registry.npmjs.org/universalify/-/universalify-0.2.0.tgz",
@@ -5985,6 +6001,15 @@
"integrity": "sha512-jhuKLIRrhvCPLqwPcx6INqmKeiA5EWrsCOPhrlFSrbrmU4ZMPjj5Ul/oLCMDO98XRUIwVm78xICz4EPCektzeQ==",
"dev": true
},
+ "@types/node": {
+ "version": "20.8.10",
+ "resolved": "https://registry.npmjs.org/@types/node/-/node-20.8.10.tgz",
+ "integrity": "sha512-TlgT8JntpcbmKUFzjhsyhGfP2fsiz1Mv56im6enJ905xG1DAYesxJaeSbGqQmAw8OWPdhyJGhGSQGKRNJ45u9w==",
+ "dev": true,
+ "requires": {
+ "undici-types": "~5.26.4"
+ }
+ },
"@types/normalize-package-data": {
"version": "2.4.1",
"resolved": "https://registry.npmjs.org/@types/normalize-package-data/-/normalize-package-data-2.4.1.tgz",
@@ -9581,6 +9606,12 @@
"xtend": "^4.0.1"
}
},
+ "undici-types": {
+ "version": "5.26.5",
+ "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-5.26.5.tgz",
+ "integrity": "sha512-JlCMO+ehdEIKqlFxk6IfVoAUVmgz7cU7zD/h9XZ0qzeosSHmUJVOzSQvvYSYWXkFXC+IfLKSIffhv0sVZup6pA==",
+ "dev": true
+ },
"universalify": {
"version": "0.2.0",
"resolved": "https://registry.npmjs.org/universalify/-/universalify-0.2.0.tgz",
diff --git a/package.json b/package.json
index 22f5bd56..ee95d388 100644
--- a/package.json
+++ b/package.json
@@ -36,7 +36,8 @@
},
"devDependencies": {
"@playwright/test": "^1.39.0",
- "ajv": "^8.11.0",
+ "@types/node": "^20.8.10",
+ "ajv": "^8.12.0",
"browserify": "^17.0.0",
"css": "^3.0.0",
"eslint": "^8.52.0",