Save point: [skip ci]

* Tika extractors no longer use Files
This commit is contained in:
alandavis
2022-07-14 08:51:09 +01:00
parent 57416f5949
commit 3af9d70667
13 changed files with 159 additions and 206 deletions

View File

@@ -56,6 +56,7 @@ import org.springframework.web.servlet.mvc.method.annotation.StreamingResponseBo
import javax.annotation.PostConstruct; import javax.annotation.PostConstruct;
import javax.servlet.http.HttpServletRequest; import javax.servlet.http.HttpServletRequest;
import java.io.BufferedInputStream;
import java.io.BufferedOutputStream; import java.io.BufferedOutputStream;
import java.io.File; import java.io.File;
import java.io.FileOutputStream; import java.io.FileOutputStream;
@@ -187,17 +188,14 @@ public class TransformHandler
} }
probeTestTransform.incrementTransformerCount(); probeTestTransform.incrementTransformerCount();
// Obtain the source
final String directUrl = requestParameters.getOrDefault(DIRECT_ACCESS_URL, ""); final String directUrl = requestParameters.getOrDefault(DIRECT_ACCESS_URL, "");
InputStream inputStream = directUrl.isBlank() InputStream inputStream = new BufferedInputStream(directUrl.isBlank()
? FileManager.getMultipartFileInputStream(sourceMultipartFile) ? FileManager.getMultipartFileInputStream(sourceMultipartFile)
: getDirectAccessUrlInputStream(directUrl); : getDirectAccessUrlInputStream(directUrl));
long sourceSizeInBytes = -1L; // TODO pass in t-options or just ignore for http request as the repo will have checked. long sourceSizeInBytes = -1L; // TODO pass in t-options or just ignore for http request as the repo will have checked.
Map<String, String> transformOptions = getTransformOptions(requestParameters); Map<String, String> transformOptions = getTransformOptions(requestParameters);
String transformName = getTransformerName(sourceSizeInBytes, sourceMimetype, targetMimetype, transformOptions); String transformName = getTransformerName(sourceSizeInBytes, sourceMimetype, targetMimetype, transformOptions);
CustomTransformer customTransformer = getCustomTransformer(transformName); CustomTransformer customTransformer = getCustomTransformer(transformName);
String sourceEncoding = transformOptions.get(SOURCE_ENCODING);
String targetEncoding = transformOptions.get(TARGET_ENCODING); // TODO not normally set
String reference = "e"+httpRequestCount.getAndIncrement(); String reference = "e"+httpRequestCount.getAndIncrement();
transformerDebug.pushTransform(reference, sourceMimetype, targetMimetype, sourceSizeInBytes, transformName); transformerDebug.pushTransform(reference, sourceMimetype, targetMimetype, sourceSizeInBytes, transformName);
transformerDebug.logOptions(reference, requestParameters); transformerDebug.logOptions(reference, requestParameters);
@@ -254,72 +252,76 @@ public class TransformHandler
return new ResponseEntity<>(reply, HttpStatus.valueOf(reply.getStatus())); return new ResponseEntity<>(reply, HttpStatus.valueOf(reply.getStatus()));
} }
String targetMimetype = request.getTargetMediaType();
String sourceMimetype = request.getSourceMediaType();
File targetFile = createTargetFile(null, sourceMimetype, targetMimetype);
transformerDebug.pushTransform(request);
try try
{ {
OutputStreamLengthRecorder outputStream = String targetMimetype = request.getTargetMediaType();
new OutputStreamLengthRecorder(new BufferedOutputStream(new FileOutputStream(targetFile))); String sourceMimetype = request.getSourceMediaType();
File targetFile = createTargetFile(null, sourceMimetype, targetMimetype);
transformerDebug.pushTransform(request);
long sourceSizeInBytes = request.getSourceSize(); try (OutputStreamLengthRecorder outputStream = new OutputStreamLengthRecorder(new BufferedOutputStream(
Map<String, String> transformOptions = getTransformOptions(request.getTransformRequestOptions()); new FileOutputStream(targetFile))))
String sourceEncoding = transformOptions.get(SOURCE_ENCODING);
String targetEncoding = transformOptions.get(TARGET_ENCODING); // TODO not normally set
transformerDebug.logOptions(request);
String transformName = getTransformerName(sourceSizeInBytes, sourceMimetype, targetMimetype, transformOptions);
CustomTransformer customTransformer = getCustomTransformer(transformName);
TransformManagerImpl transformManager = TransformManagerImpl.builder()
.withSourceMimetype(sourceMimetype)
.withTargetMimetype(targetMimetype)
.withInputStream(inputStream)
.withOutputStream(outputStream)
.withTargetFile(targetFile)
.build();
customTransformer.transform(sourceMimetype, inputStream,
targetMimetype, outputStream, transformOptions, transformManager);
transformManager.ifUsedCopyTargetFileToOutputStream();
reply.getInternalContext().setCurrentSourceSize(outputStream.getLength());
if (saveTargetFileInSharedFileStore(targetFile, reply) == false)
{ {
long sourceSizeInBytes = request.getSourceSize();
Map<String, String> transformOptions = getTransformOptions(request.getTransformRequestOptions());
transformerDebug.logOptions(request);
String transformName = getTransformerName(sourceSizeInBytes, sourceMimetype, targetMimetype, transformOptions);
CustomTransformer customTransformer = getCustomTransformer(transformName);
TransformManagerImpl transformManager = TransformManagerImpl.builder()
.withSourceMimetype(sourceMimetype)
.withTargetMimetype(targetMimetype)
.withInputStream(inputStream)
.withOutputStream(outputStream)
.withTargetFile(targetFile)
.build();
customTransformer.transform(sourceMimetype, inputStream,
targetMimetype, outputStream, transformOptions, transformManager);
transformManager.ifUsedCopyTargetFileToOutputStream();
reply.getInternalContext().setCurrentSourceSize(outputStream.getLength());
if (saveTargetFileInSharedFileStore(targetFile, reply) == false)
{
return new ResponseEntity<>(reply, HttpStatus.valueOf(reply.getStatus()));
}
transformManager.deleteSourceFileIfExists();
transformManager.deleteTargetFileIfExists();
probeTestTransform.recordTransformTime(System.currentTimeMillis()-start);
transformerDebug.popTransform(reply);
logger.trace("Sending successful {}, timeout {} ms", reply, timeout);
return new ResponseEntity<>(reply, HttpStatus.valueOf(reply.getStatus())); return new ResponseEntity<>(reply, HttpStatus.valueOf(reply.getStatus()));
} }
catch (TransformException e)
{
reply.setStatus(e.getStatusCode());
reply.setErrorDetails(messageWithCause("Failed at processing transformation", e));
transformManager.deleteSourceFileIfExists(); transformerDebug.logFailure(reply);
transformManager.deleteTargetFileIfExists(); logger.trace("Failed to perform transform (TransformException), sending " + reply, e);
return new ResponseEntity<>(reply, HttpStatus.valueOf(reply.getStatus()));
}
catch (Exception e)
{
reply.setStatus(INTERNAL_SERVER_ERROR.value());
reply.setErrorDetails(messageWithCause("Failed at processing transformation", e));
probeTestTransform.recordTransformTime(System.currentTimeMillis()-start); transformerDebug.logFailure(reply);
transformerDebug.popTransform(reply); logger.trace("Failed to perform transform (Exception), sending " + reply, e);
return new ResponseEntity<>(reply, HttpStatus.valueOf(reply.getStatus()));
logger.trace("Sending successful {}, timeout {} ms", reply, timeout); }
return new ResponseEntity<>(reply, HttpStatus.valueOf(reply.getStatus()));
} }
catch (TransformException e) finally
{ {
reply.setStatus(e.getStatusCode()); closeInputStreamWithoutException(inputStream);
reply.setErrorDetails(messageWithCause("Failed at processing transformation", e));
transformerDebug.logFailure(reply);
logger.trace("Failed to perform transform (TransformException), sending " + reply, e);
return new ResponseEntity<>(reply, HttpStatus.valueOf(reply.getStatus()));
}
catch (Exception e)
{
reply.setStatus(INTERNAL_SERVER_ERROR.value());
reply.setErrorDetails(messageWithCause("Failed at processing transformation", e));
transformerDebug.logFailure(reply);
logger.trace("Failed to perform transform (Exception), sending " + reply, e);
return new ResponseEntity<>(reply, HttpStatus.valueOf(reply.getStatus()));
} }
} }
private boolean isTransformRequestValid(TransformRequest request, TransformReply reply) private boolean isTransformRequestValid(TransformRequest request, TransformReply reply)
{ {
final Errors errors = validateTransformRequest(request); final Errors errors = validateTransformRequest(request);
@@ -412,9 +414,9 @@ public class TransformHandler
InputStream inputStream = null; InputStream inputStream = null;
try try
{ {
inputStream = directUrl.isBlank() inputStream = new BufferedInputStream(directUrl.isBlank()
? getSharedFileStoreInputStream(request.getSourceReference()) ? getSharedFileStoreInputStream(request.getSourceReference())
: getDirectAccessUrlInputStream(directUrl); : getDirectAccessUrlInputStream(directUrl));
} }
catch (TransformException e) catch (TransformException e)
{ {
@@ -574,4 +576,15 @@ public class TransformHandler
} }
return customTransformer; return customTransformer;
} }
private void closeInputStreamWithoutException(InputStream inputStream) {
try
{
inputStream.close();
}
catch (IOException e)
{
throw new RuntimeException(e);
}
}
} }

View File

@@ -26,8 +26,9 @@
*/ */
package org.alfresco.transform.tika.transformers; package org.alfresco.transform.tika.transformers;
import org.alfresco.transform.base.CustomTransformer;
import org.alfresco.transform.base.TransformManager;
import org.alfresco.transform.base.logging.LogEntry; import org.alfresco.transform.base.logging.LogEntry;
import org.alfresco.transform.base.util.CustomTransformerFileAdaptor;
import org.alfresco.transform.common.RequestParamMap; import org.alfresco.transform.common.RequestParamMap;
import org.apache.tika.extractor.DocumentSelector; import org.apache.tika.extractor.DocumentSelector;
import org.apache.tika.parser.Parser; import org.apache.tika.parser.Parser;
@@ -36,16 +37,18 @@ import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired; import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Value; import org.springframework.beans.factory.annotation.Value;
import java.io.File; import java.io.InputStream;
import java.util.ArrayList; import java.io.OutputStream;
import java.util.Arrays;
import java.util.Map; import java.util.Map;
import java.util.Objects;
import java.util.StringJoiner; import java.util.StringJoiner;
import static java.lang.Boolean.parseBoolean; import static java.lang.Boolean.parseBoolean;
public abstract class GenericTikaTransformer implements CustomTransformerFileAdaptor public abstract class AbstractTikaTransformer implements CustomTransformer
{ {
private static final Logger logger = LoggerFactory.getLogger(GenericTikaTransformer.class); private static final Logger logger = LoggerFactory.getLogger(AbstractTikaTransformer.class);
@Value("${transform.core.tika.pdfBox.notExtractBookmarksTextDefault:false}") @Value("${transform.core.tika.pdfBox.notExtractBookmarksTextDefault:false}")
boolean notExtractBookmarksTextDefault; boolean notExtractBookmarksTextDefault;
@@ -67,9 +70,9 @@ public abstract class GenericTikaTransformer implements CustomTransformerFileAda
} }
@Override @Override
public void transform(String sourceMimetype, String targetMimetype, public void transform(String sourceMimetype, InputStream inputStream,
Map<String, String> transformOptions, File sourceFile, File targetFile) String targetMimetype, OutputStream outputStream,
throws Exception Map<String, String> transformOptions, TransformManager transformManager) throws Exception
{ {
final boolean includeContents = parseBoolean( final boolean includeContents = parseBoolean(
transformOptions.getOrDefault(RequestParamMap.INCLUDE_CONTENTS, "false")); transformOptions.getOrDefault(RequestParamMap.INCLUDE_CONTENTS, "false"));
@@ -80,56 +83,26 @@ public abstract class GenericTikaTransformer implements CustomTransformerFileAda
{ {
logger.trace("notExtractBookmarksText default value has been overridden to {}", notExtractBookmarksTextDefault); logger.trace("notExtractBookmarksText default value has been overridden to {}", notExtractBookmarksTextDefault);
} }
String transformerName = getTransformerName(); call(inputStream, outputStream,
call(sourceFile, targetFile, transformerName,
includeContents ? Tika.INCLUDE_CONTENTS : null, includeContents ? Tika.INCLUDE_CONTENTS : null,
notExtractBookmarksText ? Tika.NOT_EXTRACT_BOOKMARKS_TEXT : null, notExtractBookmarksText ? Tika.NOT_EXTRACT_BOOKMARKS_TEXT : null,
Tika.TARGET_MIMETYPE + targetMimetype, Tika.TARGET_ENCODING + targetEncoding); Tika.TARGET_MIMETYPE + targetMimetype, Tika.TARGET_ENCODING + targetEncoding);
} }
void call(File sourceFile, File targetFile, String... args) void call(InputStream inputStream, OutputStream outputStream, String... args)
{ {
Parser parser = getParser(); Parser parser = getParser();
DocumentSelector documentSelector = getDocumentSelector(); DocumentSelector documentSelector = getDocumentSelector();
args = buildArgs(sourceFile, targetFile, args); logArgs(args);
tika.transform(parser, documentSelector, args); tika.transform(parser, documentSelector, inputStream, outputStream, args);
} }
private static String[] buildArgs(File sourceFile, File targetFile, String[] args) private void logArgs(String[] args)
{ {
ArrayList<String> methodArgs = new ArrayList<>(args.length + 2);
StringJoiner sj = new StringJoiner(" "); StringJoiner sj = new StringJoiner(" ");
for (String arg : args) Arrays.stream(args)
{ .filter(Objects::nonNull)
addArg(methodArgs, sj, arg); .forEach(arg -> sj.add(arg));
}
addFileArg(methodArgs, sj, sourceFile);
addFileArg(methodArgs, sj, targetFile);
LogEntry.setOptions(sj.toString()); LogEntry.setOptions(sj.toString());
return methodArgs.toArray(new String[0]);
}
private static void addArg(ArrayList<String> methodArgs, StringJoiner sj, String arg)
{
if (arg != null)
{
sj.add(arg);
methodArgs.add(arg);
}
}
private static void addFileArg(ArrayList<String> methodArgs, StringJoiner sj, File arg)
{
if (arg != null)
{
String path = arg.getAbsolutePath();
int i = path.lastIndexOf('.');
String ext = i == -1 ? "???" : path.substring(i + 1);
sj.add(ext);
methodArgs.add(path);
}
} }
} }

View File

@@ -29,10 +29,8 @@ package org.alfresco.transform.tika.transformers;
import org.apache.tika.parser.Parser; import org.apache.tika.parser.Parser;
import org.springframework.stereotype.Component; import org.springframework.stereotype.Component;
import javax.annotation.PostConstruct;
@Component @Component
public class ArchiveTransformer extends GenericTikaTransformer public class ArchiveTransformer extends AbstractTikaTransformer
{ {
@Override @Override
protected Parser getParser() protected Parser getParser()

View File

@@ -30,7 +30,7 @@ import org.apache.tika.parser.Parser;
import org.springframework.stereotype.Component; import org.springframework.stereotype.Component;
@Component @Component
public class OOXMLTransformer extends GenericTikaTransformer public class OOXMLTransformer extends AbstractTikaTransformer
{ {
@Override @Override
protected Parser getParser() protected Parser getParser()

View File

@@ -30,7 +30,7 @@ import org.apache.tika.parser.Parser;
import org.springframework.stereotype.Component; import org.springframework.stereotype.Component;
@Component @Component
public class OfficeTransformer extends GenericTikaTransformer public class OfficeTransformer extends AbstractTikaTransformer
{ {
@Override @Override
protected Parser getParser() protected Parser getParser()

View File

@@ -30,7 +30,7 @@ import org.apache.tika.parser.Parser;
import org.springframework.stereotype.Component; import org.springframework.stereotype.Component;
@Component @Component
public class OutlookMsgTransformer extends GenericTikaTransformer public class OutlookMsgTransformer extends AbstractTikaTransformer
{ {
@Override @Override
protected Parser getParser() protected Parser getParser()

View File

@@ -31,7 +31,7 @@ import org.apache.tika.parser.Parser;
import org.springframework.stereotype.Component; import org.springframework.stereotype.Component;
@Component @Component
public class PdfBoxTransformer extends GenericTikaTransformer public class PdfBoxTransformer extends AbstractTikaTransformer
{ {
@Override @Override
protected Parser getParser() protected Parser getParser()

View File

@@ -30,7 +30,7 @@ import org.apache.tika.parser.Parser;
import org.springframework.stereotype.Component; import org.springframework.stereotype.Component;
@Component @Component
public class PoiTransformer extends GenericTikaTransformer public class PoiTransformer extends AbstractTikaTransformer
{ {
@Override @Override
protected Parser getParser() protected Parser getParser()

View File

@@ -30,7 +30,7 @@ import org.apache.tika.parser.Parser;
import org.springframework.stereotype.Component; import org.springframework.stereotype.Component;
@Component @Component
public class TextMiningTransformer extends GenericTikaTransformer public class TextMiningTransformer extends AbstractTikaTransformer
{ {
@Override @Override
protected Parser getParser() protected Parser getParser()

View File

@@ -65,6 +65,7 @@ import java.io.OutputStreamWriter;
import java.io.Writer; import java.io.Writer;
import java.net.URL; import java.net.URL;
import java.util.List; import java.util.List;
import java.util.Objects;
import java.util.regex.Pattern; import java.util.regex.Pattern;
import static org.alfresco.transform.common.Mimetype.MIMETYPE_HTML; import static org.alfresco.transform.common.Mimetype.MIMETYPE_HTML;
@@ -158,72 +159,48 @@ public class Tika
} }
// Extracts parameters form args // Extracts parameters form args
public void transform(Parser parser, DocumentSelector documentSelector, String[] args) void transform(Parser parser, DocumentSelector documentSelector, InputStream inputStream,
OutputStream outputStream, String[] args)
{ {
String transform = null;
String targetMimetype = null; String targetMimetype = null;
String targetEncoding = null; String targetEncoding = null;
String sourceFilename = null;
String targetFilename = null;
Boolean includeContents = null; Boolean includeContents = null;
Boolean notExtractBookmarksText = null; Boolean notExtractBookmarksText = null;
for (String arg : args) for (String arg : args)
{ {
if (arg.startsWith("--")) if (Objects.isNull(arg))
{ {
if (INCLUDE_CONTENTS.startsWith(arg)) // ignore
{ }
getValue(arg, false, includeContents, INCLUDE_CONTENTS); else if (arg.startsWith(INCLUDE_CONTENTS))
includeContents = true; {
} getValue(arg, false, includeContents, INCLUDE_CONTENTS);
else if (arg.startsWith(TARGET_ENCODING)) includeContents = true;
{ }
targetEncoding = getValue(arg, true, targetEncoding, TARGET_ENCODING); else if (arg.startsWith(TARGET_ENCODING))
} {
else if (arg.startsWith(TARGET_MIMETYPE)) targetEncoding = getValue(arg, true, targetEncoding, TARGET_ENCODING);
{ }
targetMimetype = getValue(arg, true, targetMimetype, TARGET_MIMETYPE); else if (arg.startsWith(TARGET_MIMETYPE))
} {
else if (arg.startsWith(NOT_EXTRACT_BOOKMARKS_TEXT)) targetMimetype = getValue(arg, true, targetMimetype, TARGET_MIMETYPE);
{ }
getValue(arg, false, notExtractBookmarksText, NOT_EXTRACT_BOOKMARKS_TEXT); else if (arg.startsWith(NOT_EXTRACT_BOOKMARKS_TEXT))
notExtractBookmarksText = true; {
} getValue(arg, false, notExtractBookmarksText, NOT_EXTRACT_BOOKMARKS_TEXT);
else notExtractBookmarksText = true;
{
throw new IllegalArgumentException("Unexpected argument " + arg);
}
} }
else else
{ {
if (transform == null) throw new IllegalArgumentException("Unexpected argument " + arg);
{
transform = arg;
}
else if (sourceFilename == null)
{
sourceFilename = arg;
}
else if (targetFilename == null)
{
targetFilename = arg;
}
else
{
throw new IllegalArgumentException("Unexpected argument " + arg);
}
} }
} }
if (targetFilename == null)
{
throw new IllegalArgumentException("Missing arguments");
}
includeContents = includeContents == null ? false : includeContents; includeContents = includeContents == null ? false : includeContents;
notExtractBookmarksText = notExtractBookmarksText == null ? false : notExtractBookmarksText; notExtractBookmarksText = notExtractBookmarksText == null ? false : notExtractBookmarksText;
transform(parser, documentSelector, includeContents, notExtractBookmarksText, sourceFilename, transform(parser, documentSelector, includeContents, notExtractBookmarksText, inputStream,
targetFilename, targetMimetype, targetEncoding); outputStream, targetMimetype, targetEncoding);
} }
private String getValue(String arg, boolean valueExpected, Object value, String optionName) private String getValue(String arg, boolean valueExpected, Object value, String optionName)
@@ -247,20 +224,17 @@ public class Tika
private void transform(Parser parser, DocumentSelector documentSelector, private void transform(Parser parser, DocumentSelector documentSelector,
Boolean includeContents, Boolean includeContents,
Boolean notExtractBookmarksText, Boolean notExtractBookmarksText,
String sourceFilename, InputStream inputStream,
String targetFilename, String targetMimetype, String targetEncoding) OutputStream outputStream, String targetMimetype, String targetEncoding)
{ {
try (Writer ow = new BufferedWriter(new OutputStreamWriter(outputStream, targetEncoding)))
try (InputStream is = new BufferedInputStream(new FileInputStream(sourceFilename));
OutputStream os = new FileOutputStream(targetFilename);
Writer ow = new BufferedWriter(new OutputStreamWriter(os, targetEncoding)))
{ {
Metadata metadata = new Metadata(); Metadata metadata = new Metadata();
ParseContext context = buildParseContext(documentSelector, includeContents, ParseContext context = buildParseContext(documentSelector, includeContents,
notExtractBookmarksText); notExtractBookmarksText);
ContentHandler handler = getContentHandler(targetMimetype, ow); ContentHandler handler = getContentHandler(targetMimetype, ow);
parser.parse(is, handler, metadata, context); parser.parse(inputStream, handler, metadata, context);
} }
catch (SAXException | TikaException | IOException e) catch (SAXException | TikaException | IOException e)
{ {

View File

@@ -30,7 +30,7 @@ import org.apache.tika.parser.Parser;
import org.springframework.stereotype.Component; import org.springframework.stereotype.Component;
@Component @Component
public class TikaAutoTransformer extends GenericTikaTransformer public class TikaAutoTransformer extends AbstractTikaTransformer
{ {
@Override @Override
protected Parser getParser() protected Parser getParser()

View File

@@ -137,9 +137,6 @@ public class TikaControllerTest extends AbstractTransformControllerTest
@Mock @Mock
private RuntimeExec mockTransformCommand; private RuntimeExec mockTransformCommand;
@Mock
private RuntimeExec mockCheckCommand;
private String targetEncoding = "UTF-8"; private String targetEncoding = "UTF-8";
private String targetMimetype = MIMETYPE_TEXT_PLAIN; private String targetMimetype = MIMETYPE_TEXT_PLAIN;

View File

@@ -37,19 +37,21 @@ import static org.mockito.Mockito.spy;
import static org.mockito.Mockito.times; import static org.mockito.Mockito.times;
import static org.mockito.Mockito.verify; import static org.mockito.Mockito.verify;
import java.io.File; import java.io.InputStream;
import java.io.OutputStream;
import java.util.HashMap; import java.util.HashMap;
import java.util.Map; import java.util.Map;
import org.alfresco.transform.base.TransformManager;
import org.apache.tika.parser.Parser; import org.apache.tika.parser.Parser;
import org.junit.jupiter.api.Test; import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.extension.ExtendWith; import org.junit.jupiter.api.extension.ExtendWith;
import org.mockito.junit.jupiter.MockitoExtension; import org.mockito.junit.jupiter.MockitoExtension;
@ExtendWith(MockitoExtension.class) @ExtendWith(MockitoExtension.class)
public class GenericTikaTransformerTest public class AbstractTikaTransformerTest
{ {
private static class TikaTestTransformer extends GenericTikaTransformer private static class TikaTestTransformer extends AbstractTikaTransformer
{ {
@Override @Override
protected Parser getParser() protected Parser getParser()
@@ -66,75 +68,71 @@ public class GenericTikaTransformerTest
@Test @Test
public void testNotExtractBookmarkTextDefault() throws Exception public void testNotExtractBookmarkTextDefault() throws Exception
{ {
GenericTikaTransformer executorSpyDefaultTrue = spy(new TikaTestTransformer(true)); AbstractTikaTransformer executorSpyDefaultTrue = spy(new TikaTestTransformer(true));
GenericTikaTransformer executorSpyDefaultFalse = spy(new TikaTestTransformer(false)); AbstractTikaTransformer executorSpyDefaultFalse = spy(new TikaTestTransformer(false));
File mockSourceFile = mock(File.class); InputStream mockInputStream = mock(InputStream.class);
File mockTargetFile = mock(File.class); OutputStream mockOutputStream = mock(OutputStream.class);
String transformName = "transformName"; TransformManager mockTransformManager = mock(TransformManager.class);
String sourceMimetype = "sourceMimetype"; String sourceMimetype = "sourceMimetype";
String targetMimetype = "targetMimetype"; String targetMimetype = "targetMimetype";
String defaultEncoding = "UTF-8"; String defaultEncoding = "UTF-8";
// no need to continue execution passed here or check values as we're checking the correct params passed to this method later. // no need to continue execution passed here or check values as we're checking the correct params passed to this method later.
lenient().doNothing().when(executorSpyDefaultTrue).call(any(), any(), any(), any(), any(), any(), any()); lenient().doNothing().when(executorSpyDefaultTrue).call(any(), any(), any(), any(), any(), any());
lenient().doNothing().when(executorSpyDefaultFalse).call(any(), any(), any(), any(), any(), any(), any()); lenient().doNothing().when(executorSpyDefaultFalse).call(any(), any(), any(), any(), any(), any());
Map<String, String> transformOptions = new HashMap<>(); Map<String, String> transformOptions = new HashMap<>();
// use empty transformOptions to test defaults // use empty transformOptions to test defaults
executorSpyDefaultTrue.transform(sourceMimetype, targetMimetype, transformOptions, executorSpyDefaultTrue.transform(sourceMimetype, mockInputStream, targetMimetype, mockOutputStream, transformOptions, mockTransformManager);
mockSourceFile, mockTargetFile); executorSpyDefaultFalse.transform(sourceMimetype, mockInputStream, targetMimetype, mockOutputStream, transformOptions, mockTransformManager);
executorSpyDefaultFalse.transform(sourceMimetype, targetMimetype, transformOptions,
mockSourceFile, mockTargetFile);
// when default set to true, with no options passed we should get a call method with NOT_EXTRACT_BOOKMARKS_TEXT // when default set to true, with no options passed we should get a call method with NOT_EXTRACT_BOOKMARKS_TEXT
verify(executorSpyDefaultTrue, times(1)).call(mockSourceFile, mockTargetFile, transformName, null, verify(executorSpyDefaultTrue, times(1)).call(mockInputStream, mockOutputStream, null,
NOT_EXTRACT_BOOKMARKS_TEXT, TARGET_MIMETYPE + targetMimetype, TARGET_ENCODING + defaultEncoding); NOT_EXTRACT_BOOKMARKS_TEXT, TARGET_MIMETYPE + targetMimetype, TARGET_ENCODING + defaultEncoding);
// when default set to false, with no options passed we should get a call method without NOT_EXTRACT_BOOKMARKS_TEXT // when default set to false, with no options passed we should get a call method without NOT_EXTRACT_BOOKMARKS_TEXT
verify(executorSpyDefaultFalse, times(1)).call(mockSourceFile, mockTargetFile, transformName, null, null, verify(executorSpyDefaultFalse, times(1)).call(mockInputStream, mockOutputStream, null, null,
TARGET_MIMETYPE + targetMimetype, TARGET_ENCODING + defaultEncoding); TARGET_MIMETYPE + targetMimetype, TARGET_ENCODING + defaultEncoding);
// use transforms with notExtractBookmarksText set to true // use transforms with notExtractBookmarksText set to true
clearInvocations(executorSpyDefaultTrue, executorSpyDefaultFalse); clearInvocations(executorSpyDefaultTrue, executorSpyDefaultFalse);
transformOptions.put("notExtractBookmarksText", "true"); transformOptions.put("notExtractBookmarksText", "true");
executorSpyDefaultTrue.transform(sourceMimetype, targetMimetype, transformOptions, executorSpyDefaultTrue.transform(sourceMimetype, mockInputStream, targetMimetype, mockOutputStream, transformOptions, mockTransformManager);
mockSourceFile, mockTargetFile); executorSpyDefaultFalse.transform(sourceMimetype, mockInputStream, targetMimetype, mockOutputStream, transformOptions, mockTransformManager);
executorSpyDefaultFalse.transform(sourceMimetype, targetMimetype, transformOptions,
mockSourceFile, mockTargetFile);
// both call methods should have NOT_EXTRACT_BOOKMARKS_TEXT // both call methods should have NOT_EXTRACT_BOOKMARKS_TEXT
verify(executorSpyDefaultTrue, times(1)).call(mockSourceFile, mockTargetFile, transformName, null, verify(executorSpyDefaultTrue, times(1)).call(mockInputStream, mockOutputStream, null,
NOT_EXTRACT_BOOKMARKS_TEXT, TARGET_MIMETYPE + targetMimetype, TARGET_ENCODING + defaultEncoding); NOT_EXTRACT_BOOKMARKS_TEXT, TARGET_MIMETYPE + targetMimetype, TARGET_ENCODING + defaultEncoding);
verify(executorSpyDefaultFalse, times(1)).call(mockSourceFile, mockTargetFile, transformName, null, verify(executorSpyDefaultFalse, times(1)).call(mockInputStream, mockOutputStream, null,
NOT_EXTRACT_BOOKMARKS_TEXT, TARGET_MIMETYPE + targetMimetype, TARGET_ENCODING + defaultEncoding); NOT_EXTRACT_BOOKMARKS_TEXT, TARGET_MIMETYPE + targetMimetype, TARGET_ENCODING + defaultEncoding);
// use transforms with notExtractBookmarksText set to false // use transforms with notExtractBookmarksText set to false
clearInvocations(executorSpyDefaultTrue, executorSpyDefaultFalse); clearInvocations(executorSpyDefaultTrue, executorSpyDefaultFalse);
transformOptions.replace("notExtractBookmarksText", "true", "false"); transformOptions.replace("notExtractBookmarksText", "true", "false");
executorSpyDefaultTrue.transform(sourceMimetype, targetMimetype, transformOptions, mockSourceFile, mockTargetFile); executorSpyDefaultTrue.transform(sourceMimetype, mockInputStream, targetMimetype, mockOutputStream, transformOptions, mockTransformManager);
executorSpyDefaultFalse.transform(sourceMimetype, targetMimetype, transformOptions, mockSourceFile, mockTargetFile); executorSpyDefaultFalse.transform(sourceMimetype, mockInputStream, targetMimetype, mockOutputStream, transformOptions, mockTransformManager);
// both call methods should have NOT_EXTRACT_BOOKMARKS_TEXT // both call methods should have NOT_EXTRACT_BOOKMARKS_TEXT
verify(executorSpyDefaultTrue, times(1)).call(mockSourceFile, mockTargetFile, transformName, null, null, verify(executorSpyDefaultTrue, times(1)).call(mockInputStream, mockOutputStream, null, null,
TARGET_MIMETYPE + targetMimetype, TARGET_ENCODING + defaultEncoding); TARGET_MIMETYPE + targetMimetype, TARGET_ENCODING + defaultEncoding);
verify(executorSpyDefaultFalse, times(1)).call(mockSourceFile, mockTargetFile, transformName, null, null, verify(executorSpyDefaultFalse, times(1)).call(mockInputStream, mockOutputStream, null, null,
TARGET_MIMETYPE + targetMimetype, TARGET_ENCODING + defaultEncoding); TARGET_MIMETYPE + targetMimetype, TARGET_ENCODING + defaultEncoding);
// useful set of pdfbox transformOptions just to be safe // useful set of pdfbox transformOptions just to be safe
clearInvocations(executorSpyDefaultTrue, executorSpyDefaultFalse); clearInvocations(executorSpyDefaultTrue, executorSpyDefaultFalse);
transformOptions.put("targetEncoding", "anyEncoding"); transformOptions.put("targetEncoding", "anyEncoding");
executorSpyDefaultTrue.transform(sourceMimetype, targetMimetype, transformOptions, mockSourceFile, mockTargetFile); executorSpyDefaultTrue.transform(sourceMimetype, mockInputStream, targetMimetype, mockOutputStream, transformOptions, mockTransformManager);
executorSpyDefaultFalse.transform(sourceMimetype, targetMimetype, transformOptions, mockSourceFile, mockTargetFile); executorSpyDefaultFalse.transform(sourceMimetype, mockInputStream, targetMimetype, mockOutputStream, transformOptions, mockTransformManager);
// both call methods should have NOT_EXTRACT_BOOKMARKS_TEXT but the encoding will change // both call methods should have NOT_EXTRACT_BOOKMARKS_TEXT but the encoding will change
verify(executorSpyDefaultTrue, times(1)).call(mockSourceFile, mockTargetFile, transformName, null, null, verify(executorSpyDefaultTrue, times(1)).call(mockInputStream, mockOutputStream, null, null,
TARGET_MIMETYPE + targetMimetype, TARGET_ENCODING + "anyEncoding"); TARGET_MIMETYPE + targetMimetype, TARGET_ENCODING + "anyEncoding");
verify(executorSpyDefaultFalse, times(1)).call(mockSourceFile, mockTargetFile, transformName, null, null, verify(executorSpyDefaultFalse, times(1)).call(mockInputStream, mockOutputStream, null, null,
TARGET_MIMETYPE + targetMimetype, TARGET_ENCODING + "anyEncoding"); TARGET_MIMETYPE + targetMimetype, TARGET_ENCODING + "anyEncoding");
} }
} }