From 3c977bd914f5a2d497ee00bf1e6bbc56bac55414 Mon Sep 17 00:00:00 2001
From: CezarLeahu <35226487+CezarLeahu@users.noreply.github.com>
Date: Tue, 20 Aug 2019 22:20:47 +0300
Subject: [PATCH] ATS-480 : Update to Tika 1.21 and matching POI (#93)
- upgrade tika
- upgrade poi
- fix/update test resource for PDF parsing
(multi-page PDF parsing was changed in tika-parsers 1.21)
---
alfresco-docker-tika/pom.xml | 6 +++---
.../org/alfresco/transformer/TikaControllerTest.java | 10 +++++-----
alfresco-docker-tika/src/test/resources/quick.txt | 2 ++
3 files changed, 10 insertions(+), 8 deletions(-)
diff --git a/alfresco-docker-tika/pom.xml b/alfresco-docker-tika/pom.xml
index 7e3abd4b..e797e61c 100644
--- a/alfresco-docker-tika/pom.xml
+++ b/alfresco-docker-tika/pom.xml
@@ -14,7 +14,7 @@
alfresco/alfresco-tika
quay.io
- 3.17
+ 4.0.1
@@ -56,12 +56,12 @@
org.apache.tika
tika-core
- 1.17-20180201-alfresco-patched
+ 1.21-20190624-alfresco-patched
org.apache.tika
tika-parsers
- 1.17-20180201-alfresco-patched
+ 1.21-20190624-alfresco-patched
com.tdunning
diff --git a/alfresco-docker-tika/src/test/java/org/alfresco/transformer/TikaControllerTest.java b/alfresco-docker-tika/src/test/java/org/alfresco/transformer/TikaControllerTest.java
index ce54ce97..7df956b3 100644
--- a/alfresco-docker-tika/src/test/java/org/alfresco/transformer/TikaControllerTest.java
+++ b/alfresco-docker-tika/src/test/java/org/alfresco/transformer/TikaControllerTest.java
@@ -26,6 +26,7 @@
*/
package org.alfresco.transformer;
+import static java.nio.file.Files.readAllBytes;
import static org.alfresco.transformer.executors.Tika.ARCHIVE;
import static org.alfresco.transformer.executors.Tika.CSV;
import static org.alfresco.transformer.executors.Tika.DOC;
@@ -58,6 +59,7 @@ import static org.alfresco.transformer.util.MimetypeMap.MIMETYPE_WORD;
import static org.alfresco.transformer.util.MimetypeMap.MIMETYPE_XHTML;
import static org.alfresco.transformer.util.MimetypeMap.MIMETYPE_XML;
import static org.alfresco.transformer.util.MimetypeMap.MIMETYPE_ZIP;
+import static org.junit.Assert.assertArrayEquals;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertTrue;
@@ -79,8 +81,6 @@ import static org.springframework.util.StringUtils.getFilenameExtension;
import java.io.File;
import java.io.IOException;
-import java.nio.file.Files;
-import java.util.Arrays;
import java.util.HashMap;
import java.util.Map;
import java.util.UUID;
@@ -212,9 +212,9 @@ public class TikaControllerTest extends AbstractTransformerControllerTest
}
// Check the supplied source file has not been changed.
- byte[] actualSourceFileBytes = Files.readAllBytes(new File(actualSource).toPath());
- assertTrue("Source file is not the same",
- Arrays.equals(expectedSourceFileBytes, actualSourceFileBytes));
+ byte[] actualSourceFileBytes = readAllBytes(new File(actualSource).toPath());
+ assertArrayEquals("Source file is not the same", expectedSourceFileBytes,
+ actualSourceFileBytes);
return mockExecutionResult;
});
diff --git a/alfresco-docker-tika/src/test/resources/quick.txt b/alfresco-docker-tika/src/test/resources/quick.txt
index 39deeeca..43a5841a 100644
--- a/alfresco-docker-tika/src/test/resources/quick.txt
+++ b/alfresco-docker-tika/src/test/resources/quick.txt
@@ -2,5 +2,7 @@
The quick brown fox jumps over the lazy dog
+
+
Blank Page