mirror of
https://github.com/Alfresco/alfresco-transform-core.git
synced 2025-08-14 17:58:27 +00:00
Save point: [skip ci]
* cleaning up TransformController - more to do * wire up all transforms
This commit is contained in:
@@ -26,9 +26,8 @@
|
||||
*/
|
||||
package org.alfresco.transform.tika.metadataExtractors;
|
||||
|
||||
import org.alfresco.transform.base.CustomTransformer;
|
||||
import org.alfresco.transform.base.TransformManager;
|
||||
import org.alfresco.transform.base.metadataExtractors.AbstractMetadataExtractor;
|
||||
import org.alfresco.transform.common.TransformException;
|
||||
import org.apache.tika.embedder.Embedder;
|
||||
import org.apache.tika.extractor.DocumentSelector;
|
||||
import org.apache.tika.metadata.DublinCore;
|
||||
@@ -54,7 +53,6 @@ import org.xml.sax.Locator;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileInputStream;
|
||||
import java.io.FileOutputStream;
|
||||
import java.io.InputStream;
|
||||
import java.io.OutputStream;
|
||||
import java.io.Serializable;
|
||||
@@ -84,7 +82,7 @@ import java.util.stream.Stream;
|
||||
* @author Nick Burch
|
||||
* @author adavis
|
||||
*/
|
||||
public abstract class AbstractTikaMetadataExtractor extends AbstractMetadataExtractor implements CustomTransformer
|
||||
public abstract class AbstractTikaMetadataExtractor extends AbstractMetadataExtractor
|
||||
{
|
||||
protected static final String KEY_AUTHOR = "author";
|
||||
protected static final String KEY_TITLE = "title";
|
||||
@@ -310,22 +308,15 @@ public abstract class AbstractTikaMetadataExtractor extends AbstractMetadataExtr
|
||||
return rawProperties;
|
||||
}
|
||||
|
||||
public void embedMetadata(String sourceMimetype, Map<String, String> transformOptions,
|
||||
String sourceEncoding, InputStream inputStream,
|
||||
String targetEncoding, OutputStream outputStream) throws Exception
|
||||
{
|
||||
// TODO
|
||||
throw new TransformException(500, "TODO embedMetadata");
|
||||
}
|
||||
|
||||
/**
|
||||
* @deprecated The content repository's TikaPoweredMetadataExtracter provides no non test implementations.
|
||||
* This code exists in case there are custom implementations, that need to be converted to T-Engines.
|
||||
* It is simply a copy and paste from the content repository and has received limited testing.
|
||||
*/
|
||||
@Override
|
||||
public void embedMetadata(String sourceMimetype, String targetMimetype, Map<String, String> transformOptions,
|
||||
File sourceFile, File targetFile) throws Exception
|
||||
public void embedMetadata(String sourceMimetype, InputStream inputStream,
|
||||
String targetMimetype, OutputStream outputStream,
|
||||
Map<String, String> transformOptions, TransformManager transformManager) throws Exception
|
||||
{
|
||||
Embedder embedder = getEmbedder();
|
||||
if (embedder == null)
|
||||
@@ -334,12 +325,7 @@ public abstract class AbstractTikaMetadataExtractor extends AbstractMetadataExtr
|
||||
}
|
||||
|
||||
Metadata metadataToEmbed = getTikaMetadata(transformOptions);
|
||||
|
||||
try (InputStream inputStream = new FileInputStream(sourceFile);
|
||||
OutputStream outputStream = new FileOutputStream(targetFile))
|
||||
{
|
||||
embedder.embed(metadataToEmbed, inputStream, outputStream, null);
|
||||
}
|
||||
embedder.embed(metadataToEmbed, inputStream, outputStream, null);
|
||||
}
|
||||
|
||||
private Metadata getTikaMetadata(Map<String, String> transformOptions)
|
||||
|
@@ -26,10 +26,9 @@
|
||||
*/
|
||||
package org.alfresco.transform.tika.transformers;
|
||||
|
||||
import org.alfresco.transform.base.CustomTransformer;
|
||||
import org.alfresco.transform.base.logging.LogEntry;
|
||||
import org.alfresco.transform.base.util.CustomTransformerFileAdaptor;
|
||||
import org.alfresco.transform.common.RequestParamMap;
|
||||
import org.alfresco.transform.common.TransformException;
|
||||
import org.apache.tika.extractor.DocumentSelector;
|
||||
import org.apache.tika.parser.Parser;
|
||||
import org.slf4j.Logger;
|
||||
@@ -38,21 +37,18 @@ import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.beans.factory.annotation.Value;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.InputStream;
|
||||
import java.io.OutputStream;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Map;
|
||||
import java.util.StringJoiner;
|
||||
|
||||
import static java.lang.Boolean.parseBoolean;
|
||||
|
||||
public abstract class GenericTikaTransformer implements CustomTransformer
|
||||
public abstract class GenericTikaTransformer implements CustomTransformerFileAdaptor
|
||||
{
|
||||
private static final Logger logger = LoggerFactory.getLogger(GenericTikaTransformer.class);
|
||||
|
||||
@Value("${transform.core.tika.pdfBox.notExtractBookmarksTextDefault:false}")
|
||||
boolean notExtractBookmarksTextDefault;
|
||||
|
||||
@Autowired
|
||||
protected Tika tika;
|
||||
|
||||
@@ -71,15 +67,7 @@ public abstract class GenericTikaTransformer implements CustomTransformer
|
||||
}
|
||||
|
||||
@Override
|
||||
public void transform(String sourceMimetype, String sourceEncoding, InputStream inputStream,
|
||||
String targetMimetype, String targetEncoding, OutputStream outputStream,
|
||||
Map<String, String> transformOptions) throws Exception
|
||||
{
|
||||
// TODO
|
||||
throw new TransformException(500, "TODO GenericTikaTransformer transform with InputStreams");
|
||||
}
|
||||
|
||||
public void transform(String transformName, String sourceMimetype, String targetMimetype,
|
||||
public void transform(String sourceMimetype, String targetMimetype,
|
||||
Map<String, String> transformOptions, File sourceFile, File targetFile)
|
||||
throws Exception
|
||||
{
|
||||
@@ -92,7 +80,8 @@ public abstract class GenericTikaTransformer implements CustomTransformer
|
||||
{
|
||||
logger.trace("notExtractBookmarksText default value has been overridden to {}", notExtractBookmarksTextDefault);
|
||||
}
|
||||
call(sourceFile, targetFile, transformName,
|
||||
String transformerName = getTransformerName();
|
||||
call(sourceFile, targetFile, transformerName,
|
||||
includeContents ? Tika.INCLUDE_CONTENTS : null,
|
||||
notExtractBookmarksText ? Tika.NOT_EXTRACT_BOOKMARKS_TEXT : null,
|
||||
Tika.TARGET_MIMETYPE + targetMimetype, Tika.TARGET_ENCODING + targetEncoding);
|
||||
|
@@ -49,7 +49,7 @@ import org.mockito.junit.jupiter.MockitoExtension;
|
||||
@ExtendWith(MockitoExtension.class)
|
||||
public class GenericTikaTransformerTest
|
||||
{
|
||||
private class TikaTestTransformer extends GenericTikaTransformer
|
||||
private static class TikaTestTransformer extends GenericTikaTransformer
|
||||
{
|
||||
@Override
|
||||
protected Parser getParser()
|
||||
@@ -61,7 +61,7 @@ public class GenericTikaTransformerTest
|
||||
{
|
||||
this.notExtractBookmarksTextDefault = notExtractBookmarksTextDefault;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testNotExtractBookmarkTextDefault() throws Exception
|
||||
@@ -83,9 +83,9 @@ public class GenericTikaTransformerTest
|
||||
Map<String, String> transformOptions = new HashMap<>();
|
||||
|
||||
// use empty transformOptions to test defaults
|
||||
executorSpyDefaultTrue.transform(transformName, sourceMimetype, targetMimetype, transformOptions,
|
||||
executorSpyDefaultTrue.transform(sourceMimetype, targetMimetype, transformOptions,
|
||||
mockSourceFile, mockTargetFile);
|
||||
executorSpyDefaultFalse.transform(transformName, sourceMimetype, targetMimetype, transformOptions,
|
||||
executorSpyDefaultFalse.transform(sourceMimetype, targetMimetype, transformOptions,
|
||||
mockSourceFile, mockTargetFile);
|
||||
|
||||
// when default set to true, with no options passed we should get a call method with NOT_EXTRACT_BOOKMARKS_TEXT
|
||||
@@ -99,9 +99,9 @@ public class GenericTikaTransformerTest
|
||||
// use transforms with notExtractBookmarksText set to true
|
||||
clearInvocations(executorSpyDefaultTrue, executorSpyDefaultFalse);
|
||||
transformOptions.put("notExtractBookmarksText", "true");
|
||||
executorSpyDefaultTrue.transform(transformName, sourceMimetype, targetMimetype, transformOptions,
|
||||
executorSpyDefaultTrue.transform(sourceMimetype, targetMimetype, transformOptions,
|
||||
mockSourceFile, mockTargetFile);
|
||||
executorSpyDefaultFalse.transform(transformName, sourceMimetype, targetMimetype, transformOptions,
|
||||
executorSpyDefaultFalse.transform(sourceMimetype, targetMimetype, transformOptions,
|
||||
mockSourceFile, mockTargetFile);
|
||||
|
||||
// both call methods should have NOT_EXTRACT_BOOKMARKS_TEXT
|
||||
@@ -114,8 +114,8 @@ public class GenericTikaTransformerTest
|
||||
// use transforms with notExtractBookmarksText set to false
|
||||
clearInvocations(executorSpyDefaultTrue, executorSpyDefaultFalse);
|
||||
transformOptions.replace("notExtractBookmarksText", "true", "false");
|
||||
executorSpyDefaultTrue.transform(transformName, sourceMimetype, targetMimetype, transformOptions, mockSourceFile, mockTargetFile);
|
||||
executorSpyDefaultFalse.transform(transformName, sourceMimetype, targetMimetype, transformOptions, mockSourceFile, mockTargetFile);
|
||||
executorSpyDefaultTrue.transform(sourceMimetype, targetMimetype, transformOptions, mockSourceFile, mockTargetFile);
|
||||
executorSpyDefaultFalse.transform(sourceMimetype, targetMimetype, transformOptions, mockSourceFile, mockTargetFile);
|
||||
|
||||
// both call methods should have NOT_EXTRACT_BOOKMARKS_TEXT
|
||||
verify(executorSpyDefaultTrue, times(1)).call(mockSourceFile, mockTargetFile, transformName, null, null,
|
||||
@@ -124,11 +124,11 @@ public class GenericTikaTransformerTest
|
||||
verify(executorSpyDefaultFalse, times(1)).call(mockSourceFile, mockTargetFile, transformName, null, null,
|
||||
TARGET_MIMETYPE + targetMimetype, TARGET_ENCODING + defaultEncoding);
|
||||
|
||||
// use full set of pdfbox transformOptions just to be safe
|
||||
// useful set of pdfbox transformOptions just to be safe
|
||||
clearInvocations(executorSpyDefaultTrue, executorSpyDefaultFalse);
|
||||
transformOptions.put("targetEncoding", "anyEncoding");
|
||||
executorSpyDefaultTrue.transform(transformName, sourceMimetype, targetMimetype, transformOptions, mockSourceFile, mockTargetFile);
|
||||
executorSpyDefaultFalse.transform(transformName, sourceMimetype, targetMimetype, transformOptions, mockSourceFile, mockTargetFile);
|
||||
executorSpyDefaultTrue.transform(sourceMimetype, targetMimetype, transformOptions, mockSourceFile, mockTargetFile);
|
||||
executorSpyDefaultFalse.transform(sourceMimetype, targetMimetype, transformOptions, mockSourceFile, mockTargetFile);
|
||||
|
||||
// both call methods should have NOT_EXTRACT_BOOKMARKS_TEXT but the encoding will change
|
||||
verify(executorSpyDefaultTrue, times(1)).call(mockSourceFile, mockTargetFile, transformName, null, null,
|
||||
|
Reference in New Issue
Block a user