ATS-480 : Update to Tika 1.21 and matching POI (#93)

- upgrade tika
- upgrade poi
- fix/update test resource for PDF parsing
(multi-page PDF parsing was changed in tika-parsers 1.21)
This commit is contained in:
CezarLeahu
2019-08-20 22:20:47 +03:00
committed by GitHub
parent 18973d9ca5
commit 3c977bd914
3 changed files with 10 additions and 8 deletions

View File

@@ -26,6 +26,7 @@
*/
package org.alfresco.transformer;
import static java.nio.file.Files.readAllBytes;
import static org.alfresco.transformer.executors.Tika.ARCHIVE;
import static org.alfresco.transformer.executors.Tika.CSV;
import static org.alfresco.transformer.executors.Tika.DOC;
@@ -58,6 +59,7 @@ import static org.alfresco.transformer.util.MimetypeMap.MIMETYPE_WORD;
import static org.alfresco.transformer.util.MimetypeMap.MIMETYPE_XHTML;
import static org.alfresco.transformer.util.MimetypeMap.MIMETYPE_XML;
import static org.alfresco.transformer.util.MimetypeMap.MIMETYPE_ZIP;
import static org.junit.Assert.assertArrayEquals;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertTrue;
@@ -79,8 +81,6 @@ import static org.springframework.util.StringUtils.getFilenameExtension;
import java.io.File;
import java.io.IOException;
import java.nio.file.Files;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Map;
import java.util.UUID;
@@ -212,9 +212,9 @@ public class TikaControllerTest extends AbstractTransformerControllerTest
}
// Check the supplied source file has not been changed.
byte[] actualSourceFileBytes = Files.readAllBytes(new File(actualSource).toPath());
assertTrue("Source file is not the same",
Arrays.equals(expectedSourceFileBytes, actualSourceFileBytes));
byte[] actualSourceFileBytes = readAllBytes(new File(actualSource).toPath());
assertArrayEquals("Source file is not the same", expectedSourceFileBytes,
actualSourceFileBytes);
return mockExecutionResult;
});

View File

@@ -2,5 +2,7 @@
The quick brown fox jumps over the lazy dog
Blank Page