mirror of
https://github.com/Alfresco/alfresco-transform-core.git
synced 2025-08-21 18:08:37 +00:00
ATS-480 : Update to Tika 1.21 and matching POI (#93)
- upgrade tika - upgrade poi - fix/update test resource for PDF parsing (multi-page PDF parsing was changed in tika-parsers 1.21)
This commit is contained in:
@@ -14,7 +14,7 @@
|
|||||||
<properties>
|
<properties>
|
||||||
<image.name>alfresco/alfresco-tika</image.name>
|
<image.name>alfresco/alfresco-tika</image.name>
|
||||||
<image.registry>quay.io</image.registry>
|
<image.registry>quay.io</image.registry>
|
||||||
<dependency.poi.version>3.17</dependency.poi.version>
|
<dependency.poi.version>4.0.1</dependency.poi.version>
|
||||||
</properties>
|
</properties>
|
||||||
|
|
||||||
<dependencies>
|
<dependencies>
|
||||||
@@ -56,12 +56,12 @@
|
|||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.apache.tika</groupId>
|
<groupId>org.apache.tika</groupId>
|
||||||
<artifactId>tika-core</artifactId>
|
<artifactId>tika-core</artifactId>
|
||||||
<version>1.17-20180201-alfresco-patched</version>
|
<version>1.21-20190624-alfresco-patched</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.apache.tika</groupId>
|
<groupId>org.apache.tika</groupId>
|
||||||
<artifactId>tika-parsers</artifactId>
|
<artifactId>tika-parsers</artifactId>
|
||||||
<version>1.17-20180201-alfresco-patched</version>
|
<version>1.21-20190624-alfresco-patched</version>
|
||||||
<exclusions>
|
<exclusions>
|
||||||
<exclusion>
|
<exclusion>
|
||||||
<groupId>com.tdunning</groupId>
|
<groupId>com.tdunning</groupId>
|
||||||
|
@@ -26,6 +26,7 @@
|
|||||||
*/
|
*/
|
||||||
package org.alfresco.transformer;
|
package org.alfresco.transformer;
|
||||||
|
|
||||||
|
import static java.nio.file.Files.readAllBytes;
|
||||||
import static org.alfresco.transformer.executors.Tika.ARCHIVE;
|
import static org.alfresco.transformer.executors.Tika.ARCHIVE;
|
||||||
import static org.alfresco.transformer.executors.Tika.CSV;
|
import static org.alfresco.transformer.executors.Tika.CSV;
|
||||||
import static org.alfresco.transformer.executors.Tika.DOC;
|
import static org.alfresco.transformer.executors.Tika.DOC;
|
||||||
@@ -58,6 +59,7 @@ import static org.alfresco.transformer.util.MimetypeMap.MIMETYPE_WORD;
|
|||||||
import static org.alfresco.transformer.util.MimetypeMap.MIMETYPE_XHTML;
|
import static org.alfresco.transformer.util.MimetypeMap.MIMETYPE_XHTML;
|
||||||
import static org.alfresco.transformer.util.MimetypeMap.MIMETYPE_XML;
|
import static org.alfresco.transformer.util.MimetypeMap.MIMETYPE_XML;
|
||||||
import static org.alfresco.transformer.util.MimetypeMap.MIMETYPE_ZIP;
|
import static org.alfresco.transformer.util.MimetypeMap.MIMETYPE_ZIP;
|
||||||
|
import static org.junit.Assert.assertArrayEquals;
|
||||||
import static org.junit.Assert.assertEquals;
|
import static org.junit.Assert.assertEquals;
|
||||||
import static org.junit.Assert.assertNotNull;
|
import static org.junit.Assert.assertNotNull;
|
||||||
import static org.junit.Assert.assertTrue;
|
import static org.junit.Assert.assertTrue;
|
||||||
@@ -79,8 +81,6 @@ import static org.springframework.util.StringUtils.getFilenameExtension;
|
|||||||
|
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.nio.file.Files;
|
|
||||||
import java.util.Arrays;
|
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.UUID;
|
import java.util.UUID;
|
||||||
@@ -212,9 +212,9 @@ public class TikaControllerTest extends AbstractTransformerControllerTest
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Check the supplied source file has not been changed.
|
// Check the supplied source file has not been changed.
|
||||||
byte[] actualSourceFileBytes = Files.readAllBytes(new File(actualSource).toPath());
|
byte[] actualSourceFileBytes = readAllBytes(new File(actualSource).toPath());
|
||||||
assertTrue("Source file is not the same",
|
assertArrayEquals("Source file is not the same", expectedSourceFileBytes,
|
||||||
Arrays.equals(expectedSourceFileBytes, actualSourceFileBytes));
|
actualSourceFileBytes);
|
||||||
|
|
||||||
return mockExecutionResult;
|
return mockExecutionResult;
|
||||||
});
|
});
|
||||||
|
@@ -2,5 +2,7 @@
|
|||||||
The quick brown fox jumps over the lazy dog
|
The quick brown fox jumps over the lazy dog
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
Blank Page
|
Blank Page
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user