From 3c977bd914f5a2d497ee00bf1e6bbc56bac55414 Mon Sep 17 00:00:00 2001 From: CezarLeahu <35226487+CezarLeahu@users.noreply.github.com> Date: Tue, 20 Aug 2019 22:20:47 +0300 Subject: [PATCH] ATS-480 : Update to Tika 1.21 and matching POI (#93) - upgrade tika - upgrade poi - fix/update test resource for PDF parsing (multi-page PDF parsing was changed in tika-parsers 1.21) --- alfresco-docker-tika/pom.xml | 6 +++--- .../org/alfresco/transformer/TikaControllerTest.java | 10 +++++----- alfresco-docker-tika/src/test/resources/quick.txt | 2 ++ 3 files changed, 10 insertions(+), 8 deletions(-) diff --git a/alfresco-docker-tika/pom.xml b/alfresco-docker-tika/pom.xml index 7e3abd4b..e797e61c 100644 --- a/alfresco-docker-tika/pom.xml +++ b/alfresco-docker-tika/pom.xml @@ -14,7 +14,7 @@ alfresco/alfresco-tika quay.io - 3.17 + 4.0.1 @@ -56,12 +56,12 @@ org.apache.tika tika-core - 1.17-20180201-alfresco-patched + 1.21-20190624-alfresco-patched org.apache.tika tika-parsers - 1.17-20180201-alfresco-patched + 1.21-20190624-alfresco-patched com.tdunning diff --git a/alfresco-docker-tika/src/test/java/org/alfresco/transformer/TikaControllerTest.java b/alfresco-docker-tika/src/test/java/org/alfresco/transformer/TikaControllerTest.java index ce54ce97..7df956b3 100644 --- a/alfresco-docker-tika/src/test/java/org/alfresco/transformer/TikaControllerTest.java +++ b/alfresco-docker-tika/src/test/java/org/alfresco/transformer/TikaControllerTest.java @@ -26,6 +26,7 @@ */ package org.alfresco.transformer; +import static java.nio.file.Files.readAllBytes; import static org.alfresco.transformer.executors.Tika.ARCHIVE; import static org.alfresco.transformer.executors.Tika.CSV; import static org.alfresco.transformer.executors.Tika.DOC; @@ -58,6 +59,7 @@ import static org.alfresco.transformer.util.MimetypeMap.MIMETYPE_WORD; import static org.alfresco.transformer.util.MimetypeMap.MIMETYPE_XHTML; import static org.alfresco.transformer.util.MimetypeMap.MIMETYPE_XML; import static org.alfresco.transformer.util.MimetypeMap.MIMETYPE_ZIP; +import static org.junit.Assert.assertArrayEquals; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertTrue; @@ -79,8 +81,6 @@ import static org.springframework.util.StringUtils.getFilenameExtension; import java.io.File; import java.io.IOException; -import java.nio.file.Files; -import java.util.Arrays; import java.util.HashMap; import java.util.Map; import java.util.UUID; @@ -212,9 +212,9 @@ public class TikaControllerTest extends AbstractTransformerControllerTest } // Check the supplied source file has not been changed. - byte[] actualSourceFileBytes = Files.readAllBytes(new File(actualSource).toPath()); - assertTrue("Source file is not the same", - Arrays.equals(expectedSourceFileBytes, actualSourceFileBytes)); + byte[] actualSourceFileBytes = readAllBytes(new File(actualSource).toPath()); + assertArrayEquals("Source file is not the same", expectedSourceFileBytes, + actualSourceFileBytes); return mockExecutionResult; }); diff --git a/alfresco-docker-tika/src/test/resources/quick.txt b/alfresco-docker-tika/src/test/resources/quick.txt index 39deeeca..43a5841a 100644 --- a/alfresco-docker-tika/src/test/resources/quick.txt +++ b/alfresco-docker-tika/src/test/resources/quick.txt @@ -2,5 +2,7 @@ The quick brown fox jumps over the lazy dog + + Blank Page