summaryrefslogtreecommitdiff
path: root/test/test-document-util.js
diff options
context:
space:
mode:
authortoasted-nutbread <toasted-nutbread@users.noreply.github.com>2021-01-10 14:43:06 -0500
committerGitHub <noreply@github.com>2021-01-10 14:43:06 -0500
commitf6a38f40dc52c4517e41ddb381278ecf5efba056 (patch)
tree0b56e9224ee25c0b6cc2c18cf8ae8ab891427569 /test/test-document-util.js
parent083da93142ec6302021ee1c29428121b54fc9e68 (diff)
Customizable sentence parsing (#1217)
* Add new sentenceParsing options * Update TextScanner.setOptions * Assign terminator/quote maps * Pass sentence parsing info to extractSentence * Simplify setting * Add setting for enableTerminationCharacters * Create new settings for sentence termination characters
Diffstat (limited to 'test/test-document-util.js')
-rw-r--r--test/test-document-util.js23
1 files changed, 22 insertions, 1 deletions
diff --git a/test/test-document-util.js b/test/test-document-util.js
index 09f0c5e7..2311e25f 100644
--- a/test/test-document-util.js
+++ b/test/test-document-util.js
@@ -181,8 +181,29 @@ async function testDocumentTextScanningFunctions(dom, {DocumentUtil, TextSourceR
}
if (source === null) { continue; }
+ // Sentence info
+ const terminatorString = '…。..??!!';
+ const terminatorMap = new Map();
+ for (const char of terminatorString) {
+ terminatorMap.set(char, [false, true]);
+ }
+ const quoteArray = [['「', '」'], ['『', '』'], ['\'', '\''], ['"', '"']];
+ const forwardQuoteMap = new Map();
+ const backwardQuoteMap = new Map();
+ for (const [char1, char2] of quoteArray) {
+ forwardQuoteMap.set(char1, [char2, false]);
+ backwardQuoteMap.set(char2, [char1, false]);
+ }
+
// Test docSentenceExtract
- const sentenceActual = documentUtil.extractSentence(source, false, sentenceScanExtent).text;
+ const sentenceActual = documentUtil.extractSentence(
+ source,
+ false,
+ sentenceScanExtent,
+ terminatorMap,
+ forwardQuoteMap,
+ backwardQuoteMap
+ ).text;
assert.strictEqual(sentenceActual, sentence);
// Clean