Merge pull request #115 from Alfresco/ATS-515

ATS-515: Default options for TIKA when called through ATS
This commit is contained in:
Lucian Tuca 2019-09-13 13:36:16 +03:00 committed by GitHub
commit 6d8e1b7ca9
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -26,6 +26,7 @@
*/ */
package org.alfresco.transformer; package org.alfresco.transformer;
import static java.lang.Boolean.parseBoolean;
import static org.alfresco.transformer.executors.Tika.INCLUDE_CONTENTS; import static org.alfresco.transformer.executors.Tika.INCLUDE_CONTENTS;
import static org.alfresco.transformer.executors.Tika.NOT_EXTRACT_BOOKMARKS_TEXT; import static org.alfresco.transformer.executors.Tika.NOT_EXTRACT_BOOKMARKS_TEXT;
import static org.alfresco.transformer.executors.Tika.PDF_BOX; import static org.alfresco.transformer.executors.Tika.PDF_BOX;
@ -36,7 +37,6 @@ import static org.alfresco.transformer.fs.FileManager.createSourceFile;
import static org.alfresco.transformer.fs.FileManager.createTargetFile; import static org.alfresco.transformer.fs.FileManager.createTargetFile;
import static org.alfresco.transformer.fs.FileManager.createTargetFileName; import static org.alfresco.transformer.fs.FileManager.createTargetFileName;
import static org.alfresco.transformer.util.MimetypeMap.MIMETYPE_TEXT_PLAIN; import static org.alfresco.transformer.util.MimetypeMap.MIMETYPE_TEXT_PLAIN;
import static org.alfresco.transformer.util.Util.stringToBoolean;
import static org.springframework.http.HttpStatus.OK; import static org.springframework.http.HttpStatus.OK;
import static org.springframework.http.MediaType.MULTIPART_FORM_DATA_VALUE; import static org.springframework.http.MediaType.MULTIPART_FORM_DATA_VALUE;
@ -173,16 +173,18 @@ public class TikaController extends AbstractTransformerController
logger.debug("Processing request with: sourceFile '{}', targetFile '{}', transformOptions" + logger.debug("Processing request with: sourceFile '{}', targetFile '{}', transformOptions" +
" '{}', timeout {} ms", sourceFile, targetFile, transformOptions, timeout); " '{}', timeout {} ms", sourceFile, targetFile, transformOptions, timeout);
final Boolean includeContents = stringToBoolean(transformOptions.get("includeContents")); final boolean includeContents = parseBoolean(
final Boolean notExtractBookmarksText = stringToBoolean( transformOptions.getOrDefault("includeContents", "false"));
transformOptions.get("notExtractBookmarksText")); final boolean notExtractBookmarksText = parseBoolean(
final String targetEncoding = transformOptions.get("targetEncoding"); transformOptions.getOrDefault("notExtractBookmarksText", "false"));
final String targetEncoding = transformOptions.getOrDefault("targetEncoding", "UTF-8");
final String transform = getTransformerName(sourceFile, sourceMimetype, targetMimetype, final String transform = getTransformerName(sourceFile, sourceMimetype, targetMimetype,
transformOptions); transformOptions);
javaExecutor.call(sourceFile, targetFile, transform, javaExecutor.call(sourceFile, targetFile, transform,
includeContents != null && includeContents ? INCLUDE_CONTENTS : null, includeContents ? INCLUDE_CONTENTS : null,
notExtractBookmarksText != null && notExtractBookmarksText ? NOT_EXTRACT_BOOKMARKS_TEXT : null, notExtractBookmarksText ? NOT_EXTRACT_BOOKMARKS_TEXT : null,
TARGET_MIMETYPE + targetMimetype, TARGET_ENCODING + targetEncoding); TARGET_MIMETYPE + targetMimetype, TARGET_ENCODING + targetEncoding);
} }
} }