summaryrefslogtreecommitdiff
path: root/test
diff options
context:
space:
mode:
authortoasted-nutbread <toasted-nutbread@users.noreply.github.com>2021-01-10 14:43:06 -0500
committerGitHub <noreply@github.com>2021-01-10 14:43:06 -0500
commitf6a38f40dc52c4517e41ddb381278ecf5efba056 (patch)
tree0b56e9224ee25c0b6cc2c18cf8ae8ab891427569 /test
parent083da93142ec6302021ee1c29428121b54fc9e68 (diff)
Customizable sentence parsing (#1217)
* Add new sentenceParsing options * Update TextScanner.setOptions * Assign terminator/quote maps * Pass sentence parsing info to extractSentence * Simplify setting * Add setting for enableTerminationCharacters * Create new settings for sentence termination characters
Diffstat (limited to 'test')
-rw-r--r--test/test-document-util.js23
-rw-r--r--test/test-options-util.js17
2 files changed, 38 insertions, 2 deletions
diff --git a/test/test-document-util.js b/test/test-document-util.js
index 09f0c5e7..2311e25f 100644
--- a/test/test-document-util.js
+++ b/test/test-document-util.js
@@ -181,8 +181,29 @@ async function testDocumentTextScanningFunctions(dom, {DocumentUtil, TextSourceR
}
if (source === null) { continue; }
+ // Sentence info
+ const terminatorString = '…。..??!!';
+ const terminatorMap = new Map();
+ for (const char of terminatorString) {
+ terminatorMap.set(char, [false, true]);
+ }
+ const quoteArray = [['「', '」'], ['『', '』'], ['\'', '\''], ['"', '"']];
+ const forwardQuoteMap = new Map();
+ const backwardQuoteMap = new Map();
+ for (const [char1, char2] of quoteArray) {
+ forwardQuoteMap.set(char1, [char2, false]);
+ backwardQuoteMap.set(char2, [char1, false]);
+ }
+
// Test docSentenceExtract
- const sentenceActual = documentUtil.extractSentence(source, false, sentenceScanExtent).text;
+ const sentenceActual = documentUtil.extractSentence(
+ source,
+ false,
+ sentenceScanExtent,
+ terminatorMap,
+ forwardQuoteMap,
+ backwardQuoteMap
+ ).text;
assert.strictEqual(sentenceActual, sentence);
// Clean
diff --git a/test/test-options-util.js b/test/test-options-util.js
index 8b7254c6..5ce9313e 100644
--- a/test/test-options-util.js
+++ b/test/test-options-util.js
@@ -420,7 +420,22 @@ function createProfileOptionsUpdatedTestData1() {
fieldTemplates: null
},
sentenceParsing: {
- scanExtent: 200
+ scanExtent: 200,
+ enableTerminationCharacters: true,
+ terminationCharacters: [
+ {enabled: true, character1: '「', character2: '」', includeCharacterAtStart: false, includeCharacterAtEnd: false},
+ {enabled: true, character1: '『', character2: '』', includeCharacterAtStart: false, includeCharacterAtEnd: false},
+ {enabled: true, character1: '"', character2: '"', includeCharacterAtStart: false, includeCharacterAtEnd: false},
+ {enabled: true, character1: '\'', character2: '\'', includeCharacterAtStart: false, includeCharacterAtEnd: false},
+ {enabled: true, character1: '.', character2: null, includeCharacterAtStart: false, includeCharacterAtEnd: true},
+ {enabled: true, character1: '!', character2: null, includeCharacterAtStart: false, includeCharacterAtEnd: true},
+ {enabled: true, character1: '?', character2: null, includeCharacterAtStart: false, includeCharacterAtEnd: true},
+ {enabled: true, character1: '.', character2: null, includeCharacterAtStart: false, includeCharacterAtEnd: true},
+ {enabled: true, character1: '。', character2: null, includeCharacterAtStart: false, includeCharacterAtEnd: true},
+ {enabled: true, character1: '!', character2: null, includeCharacterAtStart: false, includeCharacterAtEnd: true},
+ {enabled: true, character1: '?', character2: null, includeCharacterAtStart: false, includeCharacterAtEnd: true},
+ {enabled: true, character1: '…', character2: null, includeCharacterAtStart: false, includeCharacterAtEnd: true}
+ ]
}
};
}