ATS-480 : Update to Tika 1.21 and matching POI (#93)

- upgrade tika
- upgrade poi
- fix/update test resource for PDF parsing
(multi-page PDF parsing was changed in tika-parsers 1.21)
This commit is contained in:
CezarLeahu
2019-08-20 22:20:47 +03:00
committed by GitHub
parent 18973d9ca5
commit 3c977bd914
3 changed files with 10 additions and 8 deletions

View File

@@ -14,7 +14,7 @@
<properties>
<image.name>alfresco/alfresco-tika</image.name>
<image.registry>quay.io</image.registry>
<dependency.poi.version>3.17</dependency.poi.version>
<dependency.poi.version>4.0.1</dependency.poi.version>
</properties>
<dependencies>
@@ -56,12 +56,12 @@
<dependency>
<groupId>org.apache.tika</groupId>
<artifactId>tika-core</artifactId>
<version>1.17-20180201-alfresco-patched</version>
<version>1.21-20190624-alfresco-patched</version>
</dependency>
<dependency>
<groupId>org.apache.tika</groupId>
<artifactId>tika-parsers</artifactId>
<version>1.17-20180201-alfresco-patched</version>
<version>1.21-20190624-alfresco-patched</version>
<exclusions>
<exclusion>
<groupId>com.tdunning</groupId>

View File

@@ -26,6 +26,7 @@
*/
package org.alfresco.transformer;
import static java.nio.file.Files.readAllBytes;
import static org.alfresco.transformer.executors.Tika.ARCHIVE;
import static org.alfresco.transformer.executors.Tika.CSV;
import static org.alfresco.transformer.executors.Tika.DOC;
@@ -58,6 +59,7 @@ import static org.alfresco.transformer.util.MimetypeMap.MIMETYPE_WORD;
import static org.alfresco.transformer.util.MimetypeMap.MIMETYPE_XHTML;
import static org.alfresco.transformer.util.MimetypeMap.MIMETYPE_XML;
import static org.alfresco.transformer.util.MimetypeMap.MIMETYPE_ZIP;
import static org.junit.Assert.assertArrayEquals;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertTrue;
@@ -79,8 +81,6 @@ import static org.springframework.util.StringUtils.getFilenameExtension;
import java.io.File;
import java.io.IOException;
import java.nio.file.Files;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Map;
import java.util.UUID;
@@ -212,9 +212,9 @@ public class TikaControllerTest extends AbstractTransformerControllerTest
}
// Check the supplied source file has not been changed.
byte[] actualSourceFileBytes = Files.readAllBytes(new File(actualSource).toPath());
assertTrue("Source file is not the same",
Arrays.equals(expectedSourceFileBytes, actualSourceFileBytes));
byte[] actualSourceFileBytes = readAllBytes(new File(actualSource).toPath());
assertArrayEquals("Source file is not the same", expectedSourceFileBytes,
actualSourceFileBytes);
return mockExecutionResult;
});

View File

@@ -2,5 +2,7 @@
The quick brown fox jumps over the lazy dog
Blank Page