From 648c0add5957e8085ebaebb1c91c76ead2a86a06 Mon Sep 17 00:00:00 2001 From: LoneRifle Date: Sat, 27 Jul 2019 15:45:53 +0800 Subject: [PATCH] DetectTOC: only go for lines containing '...' words DetectTOC will work on all lines, shaving off numbers from the last of the words in a given line, so long as the word is not all full-stops. This implies that a TOC line is one that contains strings containing only full-stops, and so, DetectTOC should only work on such lines. This change will remove unwanted behaviour where DetectTOC removes trailing numbers that we actually want to keep in lines, eg: Case Number : ABC 12/1234 --- .../models/transformations/lineitem/DetectTOC.jsx | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/javascript/models/transformations/lineitem/DetectTOC.jsx b/src/javascript/models/transformations/lineitem/DetectTOC.jsx index 46525f52..bfc2979c 100644 --- a/src/javascript/models/transformations/lineitem/DetectTOC.jsx +++ b/src/javascript/models/transformations/lineitem/DetectTOC.jsx @@ -30,8 +30,10 @@ export default class DetectTOC extends ToLineItemTransformation { const pageTocLinks = []; var lastWordsWithoutNumber; var lastLine; - //find lines ending with a number per page - page.items.forEach(line => { + // find lines with words containing only "." ... + const tocLines = page.items.filter(line => line.words.includes(word => hasOnly(word.string, '.'))) + // ... and ending with a number per page + tocLines.forEach(line => { var words = line.words.filter(word => !hasOnly(word.string, '.')); const digits = []; while (words.length > 0 && isNumber(words[words.length - 1].string)) {