diff --git a/engines/aio/src/main/resources/application-default.yaml b/engines/aio/src/main/resources/application-default.yaml index a57aa716..51e31f8d 100644 --- a/engines/aio/src/main/resources/application-default.yaml +++ b/engines/aio/src/main/resources/application-default.yaml @@ -20,4 +20,7 @@ transform: config: ${IMAGEMAGICK_CONFIG:} tika: pdfBox: - notExtractBookmarksTextDefault: ${PDFBOX_NOTEXTRACTBOOKMARKS_DEFAULT:false} \ No newline at end of file + notExtractBookmarksTextDefault: ${PDFBOX_NOTEXTRACTBOOKMARKS_DEFAULT:false} + exifTool: + windowsOS: 'exiftool -args -G1 -sep "|||" #{"$"}{INPUT}' + unixOS: 'env FOO=#{"$"}{OUTPUT} exiftool -args -G1 -sep "|||" #{"$"}{INPUT}' diff --git a/engines/tika/src/main/java/org/alfresco/transform/tika/metadata/extractors/IPTCMetadataExtractor.java b/engines/tika/src/main/java/org/alfresco/transform/tika/metadata/extractors/IPTCMetadataExtractor.java index 0384ba9a..6d1f12eb 100644 --- a/engines/tika/src/main/java/org/alfresco/transform/tika/metadata/extractors/IPTCMetadataExtractor.java +++ b/engines/tika/src/main/java/org/alfresco/transform/tika/metadata/extractors/IPTCMetadataExtractor.java @@ -26,6 +26,7 @@ */ package org.alfresco.transform.tika.metadata.extractors; +import org.alfresco.transform.base.executors.RuntimeExec; import org.alfresco.transform.tika.metadata.AbstractTikaMetadataExtractorEmbeddor; import org.alfresco.transform.tika.parsers.ExifToolParser; import org.apache.commons.lang3.StringUtils; @@ -55,17 +56,18 @@ public class IPTCMetadataExtractor extends AbstractTikaMetadataExtractorEmbeddor private static final Pattern YEAR_IPTC = Pattern.compile("(\\d{4}[:|-]\\d{2}[:|-]\\d{2})"); private ExifToolParser parser; + private RuntimeExec exifRuntimeExec; - public IPTCMetadataExtractor() - { + public IPTCMetadataExtractor(RuntimeExec exifRuntimeExec) { super(EXTRACTOR, logger); + this.exifRuntimeExec = exifRuntimeExec; } @Override protected Parser getParser() { if (this.parser == null) { - this.parser = new ExifToolParser(); + this.parser = new ExifToolParser(exifRuntimeExec); } return this.parser; } diff --git a/engines/tika/src/main/java/org/alfresco/transform/tika/parsers/ExifToolParser.java b/engines/tika/src/main/java/org/alfresco/transform/tika/parsers/ExifToolParser.java index 660fd82a..1a6bb9b8 100644 --- a/engines/tika/src/main/java/org/alfresco/transform/tika/parsers/ExifToolParser.java +++ b/engines/tika/src/main/java/org/alfresco/transform/tika/parsers/ExifToolParser.java @@ -44,6 +44,7 @@ import java.util.Map; import java.util.regex.Matcher; import java.util.regex.Pattern; +import org.alfresco.transform.base.executors.RuntimeExec; import org.apache.commons.io.IOUtils; import org.apache.commons.io.output.NullOutputStream; import org.apache.tika.exception.TikaException; @@ -76,13 +77,33 @@ public class ExifToolParser extends ExternalParser { private String separator; public ExifToolParser() { + this(null); + } + + public ExifToolParser(RuntimeExec exifRuntimeExec) { super(); try { List eParsers = ExternalParsersFactory.create(getExternalParserConfigURL()); // if ExifTool is not installed then no parsers are returned if (eParsers.size() > 0) { ExternalParser eParser = eParsers.get(0); - this.setCommand(eParser.getCommand()); + + String[] commandToBeExecuted; + if (exifRuntimeExec==null) { + logger.debug("Command to be executed determined from Tika ExternalParser"); + commandToBeExecuted = eParser.getCommand(); + } else { + logger.debug("Command to be executed determined from RuntimeExec"); + commandToBeExecuted = exifRuntimeExec.getCommand(); + } + if (commandToBeExecuted==null || commandToBeExecuted.length==0) { + commandToBeExecuted = eParser.getCommand(); + } + + String commandToBeExecutedAsString = String.join( " ", commandToBeExecuted); + logger.debug("Command to be executed: " + commandToBeExecutedAsString ); + + this.setCommand(commandToBeExecutedAsString); this.setIgnoredLineConsumer(eParser.getIgnoredLineConsumer()); this.setMetadataExtractionPatterns(eParser.getMetadataExtractionPatterns()); this.setSupportedTypes(eParser.getSupportedTypes()); @@ -153,9 +174,11 @@ public class ExifToolParser extends ExternalParser { TemporaryResources tmp = new TemporaryResources(); try { TikaInputStream tis = TikaInputStream.get(stream, tmp); + if (this.getSupportedTypes().contains(mediaType)) { parse(tis, xhtml, metadata, tmp); - } + } + switch (mediaType.getType()+"/"+mediaType.getSubtype()) { case MIMETYPE_IMAGE_JPEG: parseAdditional(new JpegParser(), tis, handler, metadata, context, mediaType); @@ -299,6 +322,7 @@ public class ExifToolParser extends ExternalParser { try { IOUtils.copy(stream, stdin); } catch (IOException e) { + logger.error( e.getMessage()); } } }; @@ -306,6 +330,7 @@ public class ExifToolParser extends ExternalParser { try { t.join(); } catch (InterruptedException ignore) { + logger.error(ignore.getMessage()); } } diff --git a/engines/tika/src/main/java/org/alfresco/transform/tika/transformers/AbstractTikaTransformer.java b/engines/tika/src/main/java/org/alfresco/transform/tika/transformers/AbstractTikaTransformer.java index a43f6e27..cdb6ad25 100644 --- a/engines/tika/src/main/java/org/alfresco/transform/tika/transformers/AbstractTikaTransformer.java +++ b/engines/tika/src/main/java/org/alfresco/transform/tika/transformers/AbstractTikaTransformer.java @@ -28,6 +28,7 @@ package org.alfresco.transform.tika.transformers; import org.alfresco.transform.base.CustomTransformer; import org.alfresco.transform.base.TransformManager; +import org.alfresco.transform.base.executors.RuntimeExec; import org.alfresco.transform.base.logging.LogEntry; import org.alfresco.transform.common.RequestParamMap; import org.apache.tika.extractor.DocumentSelector; @@ -36,10 +37,12 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.beans.factory.annotation.Value; +import org.springframework.context.annotation.Bean; import java.io.InputStream; import java.io.OutputStream; import java.util.Arrays; +import java.util.HashMap; import java.util.Map; import java.util.Objects; import java.util.StringJoiner; @@ -52,11 +55,27 @@ public abstract class AbstractTikaTransformer implements CustomTransformer @Value("${transform.core.tika.pdfBox.notExtractBookmarksTextDefault:false}") boolean notExtractBookmarksTextDefault; + @Value("${transform.core.tika.exifTool.windowsOS}") + private String exifToolCommandOnWindows; + @Value("${transform.core.tika.exifTool.unixOS}") + private String exifToolCommandOnUnix; @Autowired protected Tika tika; protected abstract Parser getParser(); + @Bean("exifTool") + public RuntimeExec exifRuntimeExec() + { + RuntimeExec runtimeExec = new RuntimeExec(); + Map commandPerOS = new HashMap<>(); + commandPerOS.put("[wW]in.*", exifToolCommandOnWindows.split(" ")); + commandPerOS.put("*", exifToolCommandOnUnix.split(" ")); + runtimeExec.setCommandsAndArguments(commandPerOS); + + return runtimeExec; + } + protected DocumentSelector getDocumentSelector() { return null; diff --git a/engines/tika/src/main/resources/application-default.yaml b/engines/tika/src/main/resources/application-default.yaml index d39257e9..7e280d41 100644 --- a/engines/tika/src/main/resources/application-default.yaml +++ b/engines/tika/src/main/resources/application-default.yaml @@ -4,4 +4,7 @@ transform: core: tika: pdfBox: - notExtractBookmarksTextDefault: ${PDFBOX_NOTEXTRACTBOOKMARKS_DEFAULT:false} \ No newline at end of file + notExtractBookmarksTextDefault: ${PDFBOX_NOTEXTRACTBOOKMARKS_DEFAULT:false} + exifTool: + windowsOS: 'exiftool -args -G1 -sep "|||" #{"$"}{INPUT}' + unixOS: 'env FOO=#{"$"}{OUTPUT} exiftool -args -G1 -sep "|||" #{"$"}{INPUT}' diff --git a/engines/tika/src/test/java/org/alfresco/transform/tika/metadata/extractors/IPTCMetadataExtractorTest.java b/engines/tika/src/test/java/org/alfresco/transform/tika/metadata/extractors/IPTCMetadataExtractorTest.java index f4e8cd86..b587aa59 100644 --- a/engines/tika/src/test/java/org/alfresco/transform/tika/metadata/extractors/IPTCMetadataExtractorTest.java +++ b/engines/tika/src/test/java/org/alfresco/transform/tika/metadata/extractors/IPTCMetadataExtractorTest.java @@ -32,7 +32,7 @@ import static org.junit.jupiter.api.Assertions.assertArrayEquals; public class IPTCMetadataExtractorTest { - IPTCMetadataExtractor extractor = new IPTCMetadataExtractor(); + IPTCMetadataExtractor extractor = new IPTCMetadataExtractor(null); @Test public void testIptcToIso8601DateStrings() {