From 411a7bd508f5d90d5adb988d238b9c99f7a3d66a Mon Sep 17 00:00:00 2001 From: "Cezar.Leahu" Date: Fri, 13 Sep 2019 12:50:37 +0300 Subject: [PATCH 1/4] ATS-515: Default options for TIKA when called through ATS - add default "UTF-8" target encoding - restore previously default values for the other options --- .../org/alfresco/transformer/TikaController.java | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/alfresco-docker-tika/src/main/java/org/alfresco/transformer/TikaController.java b/alfresco-docker-tika/src/main/java/org/alfresco/transformer/TikaController.java index d223f0c0..4b0182a3 100644 --- a/alfresco-docker-tika/src/main/java/org/alfresco/transformer/TikaController.java +++ b/alfresco-docker-tika/src/main/java/org/alfresco/transformer/TikaController.java @@ -26,6 +26,7 @@ */ package org.alfresco.transformer; +import static java.lang.Boolean.parseBoolean; import static org.alfresco.transformer.executors.Tika.INCLUDE_CONTENTS; import static org.alfresco.transformer.executors.Tika.NOT_EXTRACT_BOOKMARKS_TEXT; import static org.alfresco.transformer.executors.Tika.PDF_BOX; @@ -36,7 +37,6 @@ import static org.alfresco.transformer.fs.FileManager.createSourceFile; import static org.alfresco.transformer.fs.FileManager.createTargetFile; import static org.alfresco.transformer.fs.FileManager.createTargetFileName; import static org.alfresco.transformer.util.MimetypeMap.MIMETYPE_TEXT_PLAIN; -import static org.alfresco.transformer.util.Util.stringToBoolean; import static org.springframework.http.HttpStatus.OK; import static org.springframework.http.MediaType.MULTIPART_FORM_DATA_VALUE; @@ -173,16 +173,18 @@ public class TikaController extends AbstractTransformerController logger.debug("Processing request with: sourceFile '{}', targetFile '{}', transformOptions" + " '{}', timeout {} ms", sourceFile, targetFile, transformOptions, timeout); - final Boolean includeContents = stringToBoolean(transformOptions.get("includeContents")); - final Boolean notExtractBookmarksText = stringToBoolean( - transformOptions.get("notExtractBookmarksText")); - final String targetEncoding = transformOptions.get("targetEncoding"); + final boolean includeContents = parseBoolean( + transformOptions.getOrDefault("includeContents", "false")); + final boolean notExtractBookmarksText = parseBoolean( + transformOptions.getOrDefault("notExtractBookmarksText", "false")); + final String targetEncoding = transformOptions.getOrDefault("targetEncoding", "UTF-8"); final String transform = getTransformerName(sourceFile, sourceMimetype, targetMimetype, transformOptions); + javaExecutor.call(sourceFile, targetFile, transform, - includeContents != null && includeContents ? INCLUDE_CONTENTS : null, - notExtractBookmarksText != null && notExtractBookmarksText ? NOT_EXTRACT_BOOKMARKS_TEXT : null, + includeContents ? INCLUDE_CONTENTS : null, + notExtractBookmarksText ? NOT_EXTRACT_BOOKMARKS_TEXT : null, TARGET_MIMETYPE + targetMimetype, TARGET_ENCODING + targetEncoding); } } From 7a114062e4e24e6a99c12490bae6241cb75b0e78 Mon Sep 17 00:00:00 2001 From: "Cezar.Leahu" Date: Fri, 13 Sep 2019 15:05:51 +0300 Subject: [PATCH 2/4] ATS-515: Default "targetEncoding=UTF-8" option for TIKA --- .../main/java/org/alfresco/transformer/TikaController.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/alfresco-docker-tika/src/main/java/org/alfresco/transformer/TikaController.java b/alfresco-docker-tika/src/main/java/org/alfresco/transformer/TikaController.java index 4b0182a3..a78e9880 100644 --- a/alfresco-docker-tika/src/main/java/org/alfresco/transformer/TikaController.java +++ b/alfresco-docker-tika/src/main/java/org/alfresco/transformer/TikaController.java @@ -122,7 +122,7 @@ public class TikaController extends AbstractTransformerController @RequestParam("sourceMimetype") final String sourceMimetype, @RequestParam("targetExtension") final String targetExtension, @RequestParam("targetMimetype") final String targetMimetype, - @RequestParam("targetEncoding") final String targetEncoding, + @RequestParam(value = "targetEncoding", required = false, defaultValue = "UTF-8") final String targetEncoding, @RequestParam(value = "timeout", required = false) final Long timeout, @RequestParam(value = "testDelay", required = false) final Long testDelay, @@ -181,7 +181,7 @@ public class TikaController extends AbstractTransformerController final String transform = getTransformerName(sourceFile, sourceMimetype, targetMimetype, transformOptions); - + javaExecutor.call(sourceFile, targetFile, transform, includeContents ? INCLUDE_CONTENTS : null, notExtractBookmarksText ? NOT_EXTRACT_BOOKMARKS_TEXT : null, From 0e09a0c41539a5b66e20da34af724292136f47da Mon Sep 17 00:00:00 2001 From: "Cezar.Leahu" Date: Fri, 13 Sep 2019 15:48:22 +0300 Subject: [PATCH 3/4] ATS-515: Minor test update --- .../test/java/org/alfresco/transformer/TikaControllerTest.java | 1 - 1 file changed, 1 deletion(-) diff --git a/alfresco-docker-tika/src/test/java/org/alfresco/transformer/TikaControllerTest.java b/alfresco-docker-tika/src/test/java/org/alfresco/transformer/TikaControllerTest.java index 0405b9e4..c573ee6d 100644 --- a/alfresco-docker-tika/src/test/java/org/alfresco/transformer/TikaControllerTest.java +++ b/alfresco-docker-tika/src/test/java/org/alfresco/transformer/TikaControllerTest.java @@ -139,7 +139,6 @@ public class TikaControllerTest extends AbstractTransformerControllerTest @SpyBean private TikaController controller; - private String transform = PDF_BOX; private String targetEncoding = "UTF-8"; private String targetMimetype = MIMETYPE_TEXT_PLAIN; From cfcb8bc907424f53b433e9efd79ddb2c24458d11 Mon Sep 17 00:00:00 2001 From: "dependabot-preview[bot]" <27856297+dependabot-preview[bot]@users.noreply.github.com> Date: Thu, 12 Sep 2019 13:48:21 +0000 Subject: [PATCH 4/4] Bump spring-boot-starter-parent from 2.1.7.RELEASE to 2.1.8.RELEASE Bumps spring-boot-starter-parent from 2.1.7.RELEASE to 2.1.8.RELEASE. Signed-off-by: dependabot-preview[bot] --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 356fdc1f..35876733 100644 --- a/pom.xml +++ b/pom.xml @@ -5,7 +5,7 @@ org.springframework.boot spring-boot-starter-parent - 2.1.7.RELEASE + 2.1.8.RELEASE