From 3af9d706671e0ed6ef668e0c4e83b2db08c02a28 Mon Sep 17 00:00:00 2001 From: alandavis Date: Thu, 14 Jul 2022 08:51:09 +0100 Subject: [PATCH] Save point: [skip ci] * Tika extractors no longer use Files --- .../transform/base/TransformHandler.java | 135 ++++++++++-------- ...rmer.java => AbstractTikaTransformer.java} | 65 +++------ .../tika/transformers/ArchiveTransformer.java | 4 +- .../tika/transformers/OOXMLTransformer.java | 2 +- .../tika/transformers/OfficeTransformer.java | 2 +- .../transformers/OutlookMsgTransformer.java | 2 +- .../tika/transformers/PdfBoxTransformer.java | 2 +- .../tika/transformers/PoiTransformer.java | 2 +- .../transformers/TextMiningTransformer.java | 2 +- .../transform/tika/transformers/Tika.java | 86 ++++------- .../transformers/TikaAutoTransformer.java | 2 +- .../transform/tika/TikaControllerTest.java | 3 - ....java => AbstractTikaTransformerTest.java} | 58 ++++---- 13 files changed, 159 insertions(+), 206 deletions(-) rename engines/tika/src/main/java/org/alfresco/transform/tika/transformers/{GenericTikaTransformer.java => AbstractTikaTransformer.java} (66%) rename engines/tika/src/test/java/org/alfresco/transform/tika/transformers/{GenericTikaTransformerTest.java => AbstractTikaTransformerTest.java} (62%) diff --git a/engines/base/src/main/java/org/alfresco/transform/base/TransformHandler.java b/engines/base/src/main/java/org/alfresco/transform/base/TransformHandler.java index 8e7df0ed..47083a82 100644 --- a/engines/base/src/main/java/org/alfresco/transform/base/TransformHandler.java +++ b/engines/base/src/main/java/org/alfresco/transform/base/TransformHandler.java @@ -56,6 +56,7 @@ import org.springframework.web.servlet.mvc.method.annotation.StreamingResponseBo import javax.annotation.PostConstruct; import javax.servlet.http.HttpServletRequest; +import java.io.BufferedInputStream; import java.io.BufferedOutputStream; import java.io.File; import java.io.FileOutputStream; @@ -187,17 +188,14 @@ public class TransformHandler } probeTestTransform.incrementTransformerCount(); - // Obtain the source final String directUrl = requestParameters.getOrDefault(DIRECT_ACCESS_URL, ""); - InputStream inputStream = directUrl.isBlank() + InputStream inputStream = new BufferedInputStream(directUrl.isBlank() ? FileManager.getMultipartFileInputStream(sourceMultipartFile) - : getDirectAccessUrlInputStream(directUrl); + : getDirectAccessUrlInputStream(directUrl)); long sourceSizeInBytes = -1L; // TODO pass in t-options or just ignore for http request as the repo will have checked. Map transformOptions = getTransformOptions(requestParameters); String transformName = getTransformerName(sourceSizeInBytes, sourceMimetype, targetMimetype, transformOptions); CustomTransformer customTransformer = getCustomTransformer(transformName); - String sourceEncoding = transformOptions.get(SOURCE_ENCODING); - String targetEncoding = transformOptions.get(TARGET_ENCODING); // TODO not normally set String reference = "e"+httpRequestCount.getAndIncrement(); transformerDebug.pushTransform(reference, sourceMimetype, targetMimetype, sourceSizeInBytes, transformName); transformerDebug.logOptions(reference, requestParameters); @@ -254,72 +252,76 @@ public class TransformHandler return new ResponseEntity<>(reply, HttpStatus.valueOf(reply.getStatus())); } - String targetMimetype = request.getTargetMediaType(); - String sourceMimetype = request.getSourceMediaType(); - File targetFile = createTargetFile(null, sourceMimetype, targetMimetype); - transformerDebug.pushTransform(request); - try { - OutputStreamLengthRecorder outputStream = - new OutputStreamLengthRecorder(new BufferedOutputStream(new FileOutputStream(targetFile))); + String targetMimetype = request.getTargetMediaType(); + String sourceMimetype = request.getSourceMediaType(); + File targetFile = createTargetFile(null, sourceMimetype, targetMimetype); + transformerDebug.pushTransform(request); - long sourceSizeInBytes = request.getSourceSize(); - Map transformOptions = getTransformOptions(request.getTransformRequestOptions()); - String sourceEncoding = transformOptions.get(SOURCE_ENCODING); - String targetEncoding = transformOptions.get(TARGET_ENCODING); // TODO not normally set - transformerDebug.logOptions(request); - String transformName = getTransformerName(sourceSizeInBytes, sourceMimetype, targetMimetype, transformOptions); - CustomTransformer customTransformer = getCustomTransformer(transformName); - - TransformManagerImpl transformManager = TransformManagerImpl.builder() - .withSourceMimetype(sourceMimetype) - .withTargetMimetype(targetMimetype) - .withInputStream(inputStream) - .withOutputStream(outputStream) - .withTargetFile(targetFile) - .build(); - - customTransformer.transform(sourceMimetype, inputStream, - targetMimetype, outputStream, transformOptions, transformManager); - - transformManager.ifUsedCopyTargetFileToOutputStream(); - - reply.getInternalContext().setCurrentSourceSize(outputStream.getLength()); - - if (saveTargetFileInSharedFileStore(targetFile, reply) == false) + try (OutputStreamLengthRecorder outputStream = new OutputStreamLengthRecorder(new BufferedOutputStream( + new FileOutputStream(targetFile)))) { + long sourceSizeInBytes = request.getSourceSize(); + Map transformOptions = getTransformOptions(request.getTransformRequestOptions()); + transformerDebug.logOptions(request); + String transformName = getTransformerName(sourceSizeInBytes, sourceMimetype, targetMimetype, transformOptions); + CustomTransformer customTransformer = getCustomTransformer(transformName); + + TransformManagerImpl transformManager = TransformManagerImpl.builder() + .withSourceMimetype(sourceMimetype) + .withTargetMimetype(targetMimetype) + .withInputStream(inputStream) + .withOutputStream(outputStream) + .withTargetFile(targetFile) + .build(); + + customTransformer.transform(sourceMimetype, inputStream, + targetMimetype, outputStream, transformOptions, transformManager); + + transformManager.ifUsedCopyTargetFileToOutputStream(); + + reply.getInternalContext().setCurrentSourceSize(outputStream.getLength()); + + if (saveTargetFileInSharedFileStore(targetFile, reply) == false) + { + return new ResponseEntity<>(reply, HttpStatus.valueOf(reply.getStatus())); + } + + transformManager.deleteSourceFileIfExists(); + transformManager.deleteTargetFileIfExists(); + + probeTestTransform.recordTransformTime(System.currentTimeMillis()-start); + transformerDebug.popTransform(reply); + + logger.trace("Sending successful {}, timeout {} ms", reply, timeout); return new ResponseEntity<>(reply, HttpStatus.valueOf(reply.getStatus())); } + catch (TransformException e) + { + reply.setStatus(e.getStatusCode()); + reply.setErrorDetails(messageWithCause("Failed at processing transformation", e)); - transformManager.deleteSourceFileIfExists(); - transformManager.deleteTargetFileIfExists(); + transformerDebug.logFailure(reply); + logger.trace("Failed to perform transform (TransformException), sending " + reply, e); + return new ResponseEntity<>(reply, HttpStatus.valueOf(reply.getStatus())); + } + catch (Exception e) + { + reply.setStatus(INTERNAL_SERVER_ERROR.value()); + reply.setErrorDetails(messageWithCause("Failed at processing transformation", e)); - probeTestTransform.recordTransformTime(System.currentTimeMillis()-start); - transformerDebug.popTransform(reply); - - logger.trace("Sending successful {}, timeout {} ms", reply, timeout); - return new ResponseEntity<>(reply, HttpStatus.valueOf(reply.getStatus())); + transformerDebug.logFailure(reply); + logger.trace("Failed to perform transform (Exception), sending " + reply, e); + return new ResponseEntity<>(reply, HttpStatus.valueOf(reply.getStatus())); + } } - catch (TransformException e) + finally { - reply.setStatus(e.getStatusCode()); - reply.setErrorDetails(messageWithCause("Failed at processing transformation", e)); - - transformerDebug.logFailure(reply); - logger.trace("Failed to perform transform (TransformException), sending " + reply, e); - return new ResponseEntity<>(reply, HttpStatus.valueOf(reply.getStatus())); - } - catch (Exception e) - { - reply.setStatus(INTERNAL_SERVER_ERROR.value()); - reply.setErrorDetails(messageWithCause("Failed at processing transformation", e)); - - transformerDebug.logFailure(reply); - logger.trace("Failed to perform transform (Exception), sending " + reply, e); - return new ResponseEntity<>(reply, HttpStatus.valueOf(reply.getStatus())); + closeInputStreamWithoutException(inputStream); } } + private boolean isTransformRequestValid(TransformRequest request, TransformReply reply) { final Errors errors = validateTransformRequest(request); @@ -412,9 +414,9 @@ public class TransformHandler InputStream inputStream = null; try { - inputStream = directUrl.isBlank() + inputStream = new BufferedInputStream(directUrl.isBlank() ? getSharedFileStoreInputStream(request.getSourceReference()) - : getDirectAccessUrlInputStream(directUrl); + : getDirectAccessUrlInputStream(directUrl)); } catch (TransformException e) { @@ -574,4 +576,15 @@ public class TransformHandler } return customTransformer; } + + private void closeInputStreamWithoutException(InputStream inputStream) { + try + { + inputStream.close(); + } + catch (IOException e) + { + throw new RuntimeException(e); + } + } } diff --git a/engines/tika/src/main/java/org/alfresco/transform/tika/transformers/GenericTikaTransformer.java b/engines/tika/src/main/java/org/alfresco/transform/tika/transformers/AbstractTikaTransformer.java similarity index 66% rename from engines/tika/src/main/java/org/alfresco/transform/tika/transformers/GenericTikaTransformer.java rename to engines/tika/src/main/java/org/alfresco/transform/tika/transformers/AbstractTikaTransformer.java index 56a2bc2e..a43f6e27 100644 --- a/engines/tika/src/main/java/org/alfresco/transform/tika/transformers/GenericTikaTransformer.java +++ b/engines/tika/src/main/java/org/alfresco/transform/tika/transformers/AbstractTikaTransformer.java @@ -26,8 +26,9 @@ */ package org.alfresco.transform.tika.transformers; +import org.alfresco.transform.base.CustomTransformer; +import org.alfresco.transform.base.TransformManager; import org.alfresco.transform.base.logging.LogEntry; -import org.alfresco.transform.base.util.CustomTransformerFileAdaptor; import org.alfresco.transform.common.RequestParamMap; import org.apache.tika.extractor.DocumentSelector; import org.apache.tika.parser.Parser; @@ -36,16 +37,18 @@ import org.slf4j.LoggerFactory; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.beans.factory.annotation.Value; -import java.io.File; -import java.util.ArrayList; +import java.io.InputStream; +import java.io.OutputStream; +import java.util.Arrays; import java.util.Map; +import java.util.Objects; import java.util.StringJoiner; import static java.lang.Boolean.parseBoolean; -public abstract class GenericTikaTransformer implements CustomTransformerFileAdaptor +public abstract class AbstractTikaTransformer implements CustomTransformer { - private static final Logger logger = LoggerFactory.getLogger(GenericTikaTransformer.class); + private static final Logger logger = LoggerFactory.getLogger(AbstractTikaTransformer.class); @Value("${transform.core.tika.pdfBox.notExtractBookmarksTextDefault:false}") boolean notExtractBookmarksTextDefault; @@ -67,9 +70,9 @@ public abstract class GenericTikaTransformer implements CustomTransformerFileAda } @Override - public void transform(String sourceMimetype, String targetMimetype, - Map transformOptions, File sourceFile, File targetFile) - throws Exception + public void transform(String sourceMimetype, InputStream inputStream, + String targetMimetype, OutputStream outputStream, + Map transformOptions, TransformManager transformManager) throws Exception { final boolean includeContents = parseBoolean( transformOptions.getOrDefault(RequestParamMap.INCLUDE_CONTENTS, "false")); @@ -80,56 +83,26 @@ public abstract class GenericTikaTransformer implements CustomTransformerFileAda { logger.trace("notExtractBookmarksText default value has been overridden to {}", notExtractBookmarksTextDefault); } - String transformerName = getTransformerName(); - call(sourceFile, targetFile, transformerName, + call(inputStream, outputStream, includeContents ? Tika.INCLUDE_CONTENTS : null, notExtractBookmarksText ? Tika.NOT_EXTRACT_BOOKMARKS_TEXT : null, Tika.TARGET_MIMETYPE + targetMimetype, Tika.TARGET_ENCODING + targetEncoding); } - void call(File sourceFile, File targetFile, String... args) + void call(InputStream inputStream, OutputStream outputStream, String... args) { Parser parser = getParser(); DocumentSelector documentSelector = getDocumentSelector(); - args = buildArgs(sourceFile, targetFile, args); - tika.transform(parser, documentSelector, args); + logArgs(args); + tika.transform(parser, documentSelector, inputStream, outputStream, args); } - private static String[] buildArgs(File sourceFile, File targetFile, String[] args) + private void logArgs(String[] args) { - ArrayList methodArgs = new ArrayList<>(args.length + 2); StringJoiner sj = new StringJoiner(" "); - for (String arg : args) - { - addArg(methodArgs, sj, arg); - } - - addFileArg(methodArgs, sj, sourceFile); - addFileArg(methodArgs, sj, targetFile); - + Arrays.stream(args) + .filter(Objects::nonNull) + .forEach(arg -> sj.add(arg)); LogEntry.setOptions(sj.toString()); - - return methodArgs.toArray(new String[0]); - } - - private static void addArg(ArrayList methodArgs, StringJoiner sj, String arg) - { - if (arg != null) - { - sj.add(arg); - methodArgs.add(arg); - } - } - - private static void addFileArg(ArrayList methodArgs, StringJoiner sj, File arg) - { - if (arg != null) - { - String path = arg.getAbsolutePath(); - int i = path.lastIndexOf('.'); - String ext = i == -1 ? "???" : path.substring(i + 1); - sj.add(ext); - methodArgs.add(path); - } } } diff --git a/engines/tika/src/main/java/org/alfresco/transform/tika/transformers/ArchiveTransformer.java b/engines/tika/src/main/java/org/alfresco/transform/tika/transformers/ArchiveTransformer.java index 68bbf943..457df502 100644 --- a/engines/tika/src/main/java/org/alfresco/transform/tika/transformers/ArchiveTransformer.java +++ b/engines/tika/src/main/java/org/alfresco/transform/tika/transformers/ArchiveTransformer.java @@ -29,10 +29,8 @@ package org.alfresco.transform.tika.transformers; import org.apache.tika.parser.Parser; import org.springframework.stereotype.Component; -import javax.annotation.PostConstruct; - @Component -public class ArchiveTransformer extends GenericTikaTransformer +public class ArchiveTransformer extends AbstractTikaTransformer { @Override protected Parser getParser() diff --git a/engines/tika/src/main/java/org/alfresco/transform/tika/transformers/OOXMLTransformer.java b/engines/tika/src/main/java/org/alfresco/transform/tika/transformers/OOXMLTransformer.java index ffafdbd6..2a987a0f 100644 --- a/engines/tika/src/main/java/org/alfresco/transform/tika/transformers/OOXMLTransformer.java +++ b/engines/tika/src/main/java/org/alfresco/transform/tika/transformers/OOXMLTransformer.java @@ -30,7 +30,7 @@ import org.apache.tika.parser.Parser; import org.springframework.stereotype.Component; @Component -public class OOXMLTransformer extends GenericTikaTransformer +public class OOXMLTransformer extends AbstractTikaTransformer { @Override protected Parser getParser() diff --git a/engines/tika/src/main/java/org/alfresco/transform/tika/transformers/OfficeTransformer.java b/engines/tika/src/main/java/org/alfresco/transform/tika/transformers/OfficeTransformer.java index a9abdeb3..d613fede 100644 --- a/engines/tika/src/main/java/org/alfresco/transform/tika/transformers/OfficeTransformer.java +++ b/engines/tika/src/main/java/org/alfresco/transform/tika/transformers/OfficeTransformer.java @@ -30,7 +30,7 @@ import org.apache.tika.parser.Parser; import org.springframework.stereotype.Component; @Component -public class OfficeTransformer extends GenericTikaTransformer +public class OfficeTransformer extends AbstractTikaTransformer { @Override protected Parser getParser() diff --git a/engines/tika/src/main/java/org/alfresco/transform/tika/transformers/OutlookMsgTransformer.java b/engines/tika/src/main/java/org/alfresco/transform/tika/transformers/OutlookMsgTransformer.java index f4ffa72d..aefdfc92 100644 --- a/engines/tika/src/main/java/org/alfresco/transform/tika/transformers/OutlookMsgTransformer.java +++ b/engines/tika/src/main/java/org/alfresco/transform/tika/transformers/OutlookMsgTransformer.java @@ -30,7 +30,7 @@ import org.apache.tika.parser.Parser; import org.springframework.stereotype.Component; @Component -public class OutlookMsgTransformer extends GenericTikaTransformer +public class OutlookMsgTransformer extends AbstractTikaTransformer { @Override protected Parser getParser() diff --git a/engines/tika/src/main/java/org/alfresco/transform/tika/transformers/PdfBoxTransformer.java b/engines/tika/src/main/java/org/alfresco/transform/tika/transformers/PdfBoxTransformer.java index 11c686da..9a5bd3ad 100644 --- a/engines/tika/src/main/java/org/alfresco/transform/tika/transformers/PdfBoxTransformer.java +++ b/engines/tika/src/main/java/org/alfresco/transform/tika/transformers/PdfBoxTransformer.java @@ -31,7 +31,7 @@ import org.apache.tika.parser.Parser; import org.springframework.stereotype.Component; @Component -public class PdfBoxTransformer extends GenericTikaTransformer +public class PdfBoxTransformer extends AbstractTikaTransformer { @Override protected Parser getParser() diff --git a/engines/tika/src/main/java/org/alfresco/transform/tika/transformers/PoiTransformer.java b/engines/tika/src/main/java/org/alfresco/transform/tika/transformers/PoiTransformer.java index d26f98e4..7a228a0f 100644 --- a/engines/tika/src/main/java/org/alfresco/transform/tika/transformers/PoiTransformer.java +++ b/engines/tika/src/main/java/org/alfresco/transform/tika/transformers/PoiTransformer.java @@ -30,7 +30,7 @@ import org.apache.tika.parser.Parser; import org.springframework.stereotype.Component; @Component -public class PoiTransformer extends GenericTikaTransformer +public class PoiTransformer extends AbstractTikaTransformer { @Override protected Parser getParser() diff --git a/engines/tika/src/main/java/org/alfresco/transform/tika/transformers/TextMiningTransformer.java b/engines/tika/src/main/java/org/alfresco/transform/tika/transformers/TextMiningTransformer.java index 7b611887..3c21e641 100644 --- a/engines/tika/src/main/java/org/alfresco/transform/tika/transformers/TextMiningTransformer.java +++ b/engines/tika/src/main/java/org/alfresco/transform/tika/transformers/TextMiningTransformer.java @@ -30,7 +30,7 @@ import org.apache.tika.parser.Parser; import org.springframework.stereotype.Component; @Component -public class TextMiningTransformer extends GenericTikaTransformer +public class TextMiningTransformer extends AbstractTikaTransformer { @Override protected Parser getParser() diff --git a/engines/tika/src/main/java/org/alfresco/transform/tika/transformers/Tika.java b/engines/tika/src/main/java/org/alfresco/transform/tika/transformers/Tika.java index e3064683..2fa22ad6 100644 --- a/engines/tika/src/main/java/org/alfresco/transform/tika/transformers/Tika.java +++ b/engines/tika/src/main/java/org/alfresco/transform/tika/transformers/Tika.java @@ -65,6 +65,7 @@ import java.io.OutputStreamWriter; import java.io.Writer; import java.net.URL; import java.util.List; +import java.util.Objects; import java.util.regex.Pattern; import static org.alfresco.transform.common.Mimetype.MIMETYPE_HTML; @@ -158,72 +159,48 @@ public class Tika } // Extracts parameters form args - public void transform(Parser parser, DocumentSelector documentSelector, String[] args) + void transform(Parser parser, DocumentSelector documentSelector, InputStream inputStream, + OutputStream outputStream, String[] args) { - String transform = null; String targetMimetype = null; String targetEncoding = null; - String sourceFilename = null; - String targetFilename = null; Boolean includeContents = null; Boolean notExtractBookmarksText = null; for (String arg : args) { - if (arg.startsWith("--")) + if (Objects.isNull(arg)) { - if (INCLUDE_CONTENTS.startsWith(arg)) - { - getValue(arg, false, includeContents, INCLUDE_CONTENTS); - includeContents = true; - } - else if (arg.startsWith(TARGET_ENCODING)) - { - targetEncoding = getValue(arg, true, targetEncoding, TARGET_ENCODING); - } - else if (arg.startsWith(TARGET_MIMETYPE)) - { - targetMimetype = getValue(arg, true, targetMimetype, TARGET_MIMETYPE); - } - else if (arg.startsWith(NOT_EXTRACT_BOOKMARKS_TEXT)) - { - getValue(arg, false, notExtractBookmarksText, NOT_EXTRACT_BOOKMARKS_TEXT); - notExtractBookmarksText = true; - } - else - { - throw new IllegalArgumentException("Unexpected argument " + arg); - } + // ignore + } + else if (arg.startsWith(INCLUDE_CONTENTS)) + { + getValue(arg, false, includeContents, INCLUDE_CONTENTS); + includeContents = true; + } + else if (arg.startsWith(TARGET_ENCODING)) + { + targetEncoding = getValue(arg, true, targetEncoding, TARGET_ENCODING); + } + else if (arg.startsWith(TARGET_MIMETYPE)) + { + targetMimetype = getValue(arg, true, targetMimetype, TARGET_MIMETYPE); + } + else if (arg.startsWith(NOT_EXTRACT_BOOKMARKS_TEXT)) + { + getValue(arg, false, notExtractBookmarksText, NOT_EXTRACT_BOOKMARKS_TEXT); + notExtractBookmarksText = true; } else { - if (transform == null) - { - transform = arg; - } - else if (sourceFilename == null) - { - sourceFilename = arg; - } - else if (targetFilename == null) - { - targetFilename = arg; - } - else - { - throw new IllegalArgumentException("Unexpected argument " + arg); - } + throw new IllegalArgumentException("Unexpected argument " + arg); } } - if (targetFilename == null) - { - throw new IllegalArgumentException("Missing arguments"); - } includeContents = includeContents == null ? false : includeContents; notExtractBookmarksText = notExtractBookmarksText == null ? false : notExtractBookmarksText; - transform(parser, documentSelector, includeContents, notExtractBookmarksText, sourceFilename, - targetFilename, targetMimetype, targetEncoding); + transform(parser, documentSelector, includeContents, notExtractBookmarksText, inputStream, + outputStream, targetMimetype, targetEncoding); } private String getValue(String arg, boolean valueExpected, Object value, String optionName) @@ -247,20 +224,17 @@ public class Tika private void transform(Parser parser, DocumentSelector documentSelector, Boolean includeContents, Boolean notExtractBookmarksText, - String sourceFilename, - String targetFilename, String targetMimetype, String targetEncoding) + InputStream inputStream, + OutputStream outputStream, String targetMimetype, String targetEncoding) { - - try (InputStream is = new BufferedInputStream(new FileInputStream(sourceFilename)); - OutputStream os = new FileOutputStream(targetFilename); - Writer ow = new BufferedWriter(new OutputStreamWriter(os, targetEncoding))) + try (Writer ow = new BufferedWriter(new OutputStreamWriter(outputStream, targetEncoding))) { Metadata metadata = new Metadata(); ParseContext context = buildParseContext(documentSelector, includeContents, notExtractBookmarksText); ContentHandler handler = getContentHandler(targetMimetype, ow); - parser.parse(is, handler, metadata, context); + parser.parse(inputStream, handler, metadata, context); } catch (SAXException | TikaException | IOException e) { diff --git a/engines/tika/src/main/java/org/alfresco/transform/tika/transformers/TikaAutoTransformer.java b/engines/tika/src/main/java/org/alfresco/transform/tika/transformers/TikaAutoTransformer.java index 93135a5e..54c26a62 100644 --- a/engines/tika/src/main/java/org/alfresco/transform/tika/transformers/TikaAutoTransformer.java +++ b/engines/tika/src/main/java/org/alfresco/transform/tika/transformers/TikaAutoTransformer.java @@ -30,7 +30,7 @@ import org.apache.tika.parser.Parser; import org.springframework.stereotype.Component; @Component -public class TikaAutoTransformer extends GenericTikaTransformer +public class TikaAutoTransformer extends AbstractTikaTransformer { @Override protected Parser getParser() diff --git a/engines/tika/src/test/java/org/alfresco/transform/tika/TikaControllerTest.java b/engines/tika/src/test/java/org/alfresco/transform/tika/TikaControllerTest.java index 62d26a3c..e452e35a 100644 --- a/engines/tika/src/test/java/org/alfresco/transform/tika/TikaControllerTest.java +++ b/engines/tika/src/test/java/org/alfresco/transform/tika/TikaControllerTest.java @@ -137,9 +137,6 @@ public class TikaControllerTest extends AbstractTransformControllerTest @Mock private RuntimeExec mockTransformCommand; - @Mock - private RuntimeExec mockCheckCommand; - private String targetEncoding = "UTF-8"; private String targetMimetype = MIMETYPE_TEXT_PLAIN; diff --git a/engines/tika/src/test/java/org/alfresco/transform/tika/transformers/GenericTikaTransformerTest.java b/engines/tika/src/test/java/org/alfresco/transform/tika/transformers/AbstractTikaTransformerTest.java similarity index 62% rename from engines/tika/src/test/java/org/alfresco/transform/tika/transformers/GenericTikaTransformerTest.java rename to engines/tika/src/test/java/org/alfresco/transform/tika/transformers/AbstractTikaTransformerTest.java index e56cfbe7..cbde8a72 100644 --- a/engines/tika/src/test/java/org/alfresco/transform/tika/transformers/GenericTikaTransformerTest.java +++ b/engines/tika/src/test/java/org/alfresco/transform/tika/transformers/AbstractTikaTransformerTest.java @@ -37,19 +37,21 @@ import static org.mockito.Mockito.spy; import static org.mockito.Mockito.times; import static org.mockito.Mockito.verify; -import java.io.File; +import java.io.InputStream; +import java.io.OutputStream; import java.util.HashMap; import java.util.Map; +import org.alfresco.transform.base.TransformManager; import org.apache.tika.parser.Parser; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.extension.ExtendWith; import org.mockito.junit.jupiter.MockitoExtension; @ExtendWith(MockitoExtension.class) -public class GenericTikaTransformerTest +public class AbstractTikaTransformerTest { - private static class TikaTestTransformer extends GenericTikaTransformer + private static class TikaTestTransformer extends AbstractTikaTransformer { @Override protected Parser getParser() @@ -66,75 +68,71 @@ public class GenericTikaTransformerTest @Test public void testNotExtractBookmarkTextDefault() throws Exception { - GenericTikaTransformer executorSpyDefaultTrue = spy(new TikaTestTransformer(true)); - GenericTikaTransformer executorSpyDefaultFalse = spy(new TikaTestTransformer(false)); + AbstractTikaTransformer executorSpyDefaultTrue = spy(new TikaTestTransformer(true)); + AbstractTikaTransformer executorSpyDefaultFalse = spy(new TikaTestTransformer(false)); - File mockSourceFile = mock(File.class); - File mockTargetFile = mock(File.class); - String transformName = "transformName"; + InputStream mockInputStream = mock(InputStream.class); + OutputStream mockOutputStream = mock(OutputStream.class); + TransformManager mockTransformManager = mock(TransformManager.class); String sourceMimetype = "sourceMimetype"; String targetMimetype = "targetMimetype"; String defaultEncoding = "UTF-8"; // no need to continue execution passed here or check values as we're checking the correct params passed to this method later. - lenient().doNothing().when(executorSpyDefaultTrue).call(any(), any(), any(), any(), any(), any(), any()); - lenient().doNothing().when(executorSpyDefaultFalse).call(any(), any(), any(), any(), any(), any(), any()); + lenient().doNothing().when(executorSpyDefaultTrue).call(any(), any(), any(), any(), any(), any()); + lenient().doNothing().when(executorSpyDefaultFalse).call(any(), any(), any(), any(), any(), any()); Map transformOptions = new HashMap<>(); // use empty transformOptions to test defaults - executorSpyDefaultTrue.transform(sourceMimetype, targetMimetype, transformOptions, - mockSourceFile, mockTargetFile); - executorSpyDefaultFalse.transform(sourceMimetype, targetMimetype, transformOptions, - mockSourceFile, mockTargetFile); + executorSpyDefaultTrue.transform(sourceMimetype, mockInputStream, targetMimetype, mockOutputStream, transformOptions, mockTransformManager); + executorSpyDefaultFalse.transform(sourceMimetype, mockInputStream, targetMimetype, mockOutputStream, transformOptions, mockTransformManager); // when default set to true, with no options passed we should get a call method with NOT_EXTRACT_BOOKMARKS_TEXT - verify(executorSpyDefaultTrue, times(1)).call(mockSourceFile, mockTargetFile, transformName, null, + verify(executorSpyDefaultTrue, times(1)).call(mockInputStream, mockOutputStream, null, NOT_EXTRACT_BOOKMARKS_TEXT, TARGET_MIMETYPE + targetMimetype, TARGET_ENCODING + defaultEncoding); // when default set to false, with no options passed we should get a call method without NOT_EXTRACT_BOOKMARKS_TEXT - verify(executorSpyDefaultFalse, times(1)).call(mockSourceFile, mockTargetFile, transformName, null, null, + verify(executorSpyDefaultFalse, times(1)).call(mockInputStream, mockOutputStream, null, null, TARGET_MIMETYPE + targetMimetype, TARGET_ENCODING + defaultEncoding); // use transforms with notExtractBookmarksText set to true clearInvocations(executorSpyDefaultTrue, executorSpyDefaultFalse); transformOptions.put("notExtractBookmarksText", "true"); - executorSpyDefaultTrue.transform(sourceMimetype, targetMimetype, transformOptions, - mockSourceFile, mockTargetFile); - executorSpyDefaultFalse.transform(sourceMimetype, targetMimetype, transformOptions, - mockSourceFile, mockTargetFile); + executorSpyDefaultTrue.transform(sourceMimetype, mockInputStream, targetMimetype, mockOutputStream, transformOptions, mockTransformManager); + executorSpyDefaultFalse.transform(sourceMimetype, mockInputStream, targetMimetype, mockOutputStream, transformOptions, mockTransformManager); // both call methods should have NOT_EXTRACT_BOOKMARKS_TEXT - verify(executorSpyDefaultTrue, times(1)).call(mockSourceFile, mockTargetFile, transformName, null, + verify(executorSpyDefaultTrue, times(1)).call(mockInputStream, mockOutputStream, null, NOT_EXTRACT_BOOKMARKS_TEXT, TARGET_MIMETYPE + targetMimetype, TARGET_ENCODING + defaultEncoding); - verify(executorSpyDefaultFalse, times(1)).call(mockSourceFile, mockTargetFile, transformName, null, + verify(executorSpyDefaultFalse, times(1)).call(mockInputStream, mockOutputStream, null, NOT_EXTRACT_BOOKMARKS_TEXT, TARGET_MIMETYPE + targetMimetype, TARGET_ENCODING + defaultEncoding); // use transforms with notExtractBookmarksText set to false clearInvocations(executorSpyDefaultTrue, executorSpyDefaultFalse); transformOptions.replace("notExtractBookmarksText", "true", "false"); - executorSpyDefaultTrue.transform(sourceMimetype, targetMimetype, transformOptions, mockSourceFile, mockTargetFile); - executorSpyDefaultFalse.transform(sourceMimetype, targetMimetype, transformOptions, mockSourceFile, mockTargetFile); + executorSpyDefaultTrue.transform(sourceMimetype, mockInputStream, targetMimetype, mockOutputStream, transformOptions, mockTransformManager); + executorSpyDefaultFalse.transform(sourceMimetype, mockInputStream, targetMimetype, mockOutputStream, transformOptions, mockTransformManager); // both call methods should have NOT_EXTRACT_BOOKMARKS_TEXT - verify(executorSpyDefaultTrue, times(1)).call(mockSourceFile, mockTargetFile, transformName, null, null, + verify(executorSpyDefaultTrue, times(1)).call(mockInputStream, mockOutputStream, null, null, TARGET_MIMETYPE + targetMimetype, TARGET_ENCODING + defaultEncoding); - verify(executorSpyDefaultFalse, times(1)).call(mockSourceFile, mockTargetFile, transformName, null, null, + verify(executorSpyDefaultFalse, times(1)).call(mockInputStream, mockOutputStream, null, null, TARGET_MIMETYPE + targetMimetype, TARGET_ENCODING + defaultEncoding); // useful set of pdfbox transformOptions just to be safe clearInvocations(executorSpyDefaultTrue, executorSpyDefaultFalse); transformOptions.put("targetEncoding", "anyEncoding"); - executorSpyDefaultTrue.transform(sourceMimetype, targetMimetype, transformOptions, mockSourceFile, mockTargetFile); - executorSpyDefaultFalse.transform(sourceMimetype, targetMimetype, transformOptions, mockSourceFile, mockTargetFile); + executorSpyDefaultTrue.transform(sourceMimetype, mockInputStream, targetMimetype, mockOutputStream, transformOptions, mockTransformManager); + executorSpyDefaultFalse.transform(sourceMimetype, mockInputStream, targetMimetype, mockOutputStream, transformOptions, mockTransformManager); // both call methods should have NOT_EXTRACT_BOOKMARKS_TEXT but the encoding will change - verify(executorSpyDefaultTrue, times(1)).call(mockSourceFile, mockTargetFile, transformName, null, null, + verify(executorSpyDefaultTrue, times(1)).call(mockInputStream, mockOutputStream, null, null, TARGET_MIMETYPE + targetMimetype, TARGET_ENCODING + "anyEncoding"); - verify(executorSpyDefaultFalse, times(1)).call(mockSourceFile, mockTargetFile, transformName, null, null, + verify(executorSpyDefaultFalse, times(1)).call(mockInputStream, mockOutputStream, null, null, TARGET_MIMETYPE + targetMimetype, TARGET_ENCODING + "anyEncoding"); } }