diff options
author | toasted-nutbread <toasted-nutbread@users.noreply.github.com> | 2021-01-10 14:43:06 -0500 |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-01-10 14:43:06 -0500 |
commit | f6a38f40dc52c4517e41ddb381278ecf5efba056 (patch) | |
tree | 0b56e9224ee25c0b6cc2c18cf8ae8ab891427569 /test/test-document-util.js | |
parent | 083da93142ec6302021ee1c29428121b54fc9e68 (diff) |
Customizable sentence parsing (#1217)
* Add new sentenceParsing options
* Update TextScanner.setOptions
* Assign terminator/quote maps
* Pass sentence parsing info to extractSentence
* Simplify setting
* Add setting for enableTerminationCharacters
* Create new settings for sentence termination characters
Diffstat (limited to 'test/test-document-util.js')
-rw-r--r-- | test/test-document-util.js | 23 |
1 files changed, 22 insertions, 1 deletions
diff --git a/test/test-document-util.js b/test/test-document-util.js index 09f0c5e7..2311e25f 100644 --- a/test/test-document-util.js +++ b/test/test-document-util.js @@ -181,8 +181,29 @@ async function testDocumentTextScanningFunctions(dom, {DocumentUtil, TextSourceR } if (source === null) { continue; } + // Sentence info + const terminatorString = '…。..??!!'; + const terminatorMap = new Map(); + for (const char of terminatorString) { + terminatorMap.set(char, [false, true]); + } + const quoteArray = [['「', '」'], ['『', '』'], ['\'', '\''], ['"', '"']]; + const forwardQuoteMap = new Map(); + const backwardQuoteMap = new Map(); + for (const [char1, char2] of quoteArray) { + forwardQuoteMap.set(char1, [char2, false]); + backwardQuoteMap.set(char2, [char1, false]); + } + // Test docSentenceExtract - const sentenceActual = documentUtil.extractSentence(source, false, sentenceScanExtent).text; + const sentenceActual = documentUtil.extractSentence( + source, + false, + sentenceScanExtent, + terminatorMap, + forwardQuoteMap, + backwardQuoteMap + ).text; assert.strictEqual(sentenceActual, sentence); // Clean |