mirror of
https://github.com/Alfresco/alfresco-transform-core.git
synced 2025-05-19 17:14:47 +00:00
MNT-23047: exifTool command defined as properties and dependant of ru… (#655)
* MNT-23047: exifTool command defined as properties and dependant of running OS
This commit is contained in:
parent
514d03f81a
commit
6849854f4e
@ -20,4 +20,7 @@ transform:
|
||||
config: ${IMAGEMAGICK_CONFIG:}
|
||||
tika:
|
||||
pdfBox:
|
||||
notExtractBookmarksTextDefault: ${PDFBOX_NOTEXTRACTBOOKMARKS_DEFAULT:false}
|
||||
notExtractBookmarksTextDefault: ${PDFBOX_NOTEXTRACTBOOKMARKS_DEFAULT:false}
|
||||
exifTool:
|
||||
windowsOS: 'exiftool -args -G1 -sep "|||" #{"$"}{INPUT}'
|
||||
unixOS: 'env FOO=#{"$"}{OUTPUT} exiftool -args -G1 -sep "|||" #{"$"}{INPUT}'
|
||||
|
@ -26,6 +26,7 @@
|
||||
*/
|
||||
package org.alfresco.transform.tika.metadata.extractors;
|
||||
|
||||
import org.alfresco.transform.base.executors.RuntimeExec;
|
||||
import org.alfresco.transform.tika.metadata.AbstractTikaMetadataExtractorEmbeddor;
|
||||
import org.alfresco.transform.tika.parsers.ExifToolParser;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
@ -55,17 +56,18 @@ public class IPTCMetadataExtractor extends AbstractTikaMetadataExtractorEmbeddor
|
||||
private static final Pattern YEAR_IPTC = Pattern.compile("(\\d{4}[:|-]\\d{2}[:|-]\\d{2})");
|
||||
|
||||
private ExifToolParser parser;
|
||||
private RuntimeExec exifRuntimeExec;
|
||||
|
||||
public IPTCMetadataExtractor()
|
||||
{
|
||||
public IPTCMetadataExtractor(RuntimeExec exifRuntimeExec) {
|
||||
super(EXTRACTOR, logger);
|
||||
this.exifRuntimeExec = exifRuntimeExec;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Parser getParser()
|
||||
{
|
||||
if (this.parser == null) {
|
||||
this.parser = new ExifToolParser();
|
||||
this.parser = new ExifToolParser(exifRuntimeExec);
|
||||
}
|
||||
return this.parser;
|
||||
}
|
||||
|
@ -44,6 +44,7 @@ import java.util.Map;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import org.alfresco.transform.base.executors.RuntimeExec;
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.apache.commons.io.output.NullOutputStream;
|
||||
import org.apache.tika.exception.TikaException;
|
||||
@ -76,13 +77,33 @@ public class ExifToolParser extends ExternalParser {
|
||||
private String separator;
|
||||
|
||||
public ExifToolParser() {
|
||||
this(null);
|
||||
}
|
||||
|
||||
public ExifToolParser(RuntimeExec exifRuntimeExec) {
|
||||
super();
|
||||
try {
|
||||
List<ExternalParser> eParsers = ExternalParsersFactory.create(getExternalParserConfigURL());
|
||||
// if ExifTool is not installed then no parsers are returned
|
||||
if (eParsers.size() > 0) {
|
||||
ExternalParser eParser = eParsers.get(0);
|
||||
this.setCommand(eParser.getCommand());
|
||||
|
||||
String[] commandToBeExecuted;
|
||||
if (exifRuntimeExec==null) {
|
||||
logger.debug("Command to be executed determined from Tika ExternalParser");
|
||||
commandToBeExecuted = eParser.getCommand();
|
||||
} else {
|
||||
logger.debug("Command to be executed determined from RuntimeExec");
|
||||
commandToBeExecuted = exifRuntimeExec.getCommand();
|
||||
}
|
||||
if (commandToBeExecuted==null || commandToBeExecuted.length==0) {
|
||||
commandToBeExecuted = eParser.getCommand();
|
||||
}
|
||||
|
||||
String commandToBeExecutedAsString = String.join( " ", commandToBeExecuted);
|
||||
logger.debug("Command to be executed: " + commandToBeExecutedAsString );
|
||||
|
||||
this.setCommand(commandToBeExecutedAsString);
|
||||
this.setIgnoredLineConsumer(eParser.getIgnoredLineConsumer());
|
||||
this.setMetadataExtractionPatterns(eParser.getMetadataExtractionPatterns());
|
||||
this.setSupportedTypes(eParser.getSupportedTypes());
|
||||
@ -153,9 +174,11 @@ public class ExifToolParser extends ExternalParser {
|
||||
TemporaryResources tmp = new TemporaryResources();
|
||||
try {
|
||||
TikaInputStream tis = TikaInputStream.get(stream, tmp);
|
||||
|
||||
if (this.getSupportedTypes().contains(mediaType)) {
|
||||
parse(tis, xhtml, metadata, tmp);
|
||||
}
|
||||
}
|
||||
|
||||
switch (mediaType.getType()+"/"+mediaType.getSubtype()) {
|
||||
case MIMETYPE_IMAGE_JPEG:
|
||||
parseAdditional(new JpegParser(), tis, handler, metadata, context, mediaType);
|
||||
@ -299,6 +322,7 @@ public class ExifToolParser extends ExternalParser {
|
||||
try {
|
||||
IOUtils.copy(stream, stdin);
|
||||
} catch (IOException e) {
|
||||
logger.error( e.getMessage());
|
||||
}
|
||||
}
|
||||
};
|
||||
@ -306,6 +330,7 @@ public class ExifToolParser extends ExternalParser {
|
||||
try {
|
||||
t.join();
|
||||
} catch (InterruptedException ignore) {
|
||||
logger.error(ignore.getMessage());
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -28,6 +28,7 @@ package org.alfresco.transform.tika.transformers;
|
||||
|
||||
import org.alfresco.transform.base.CustomTransformer;
|
||||
import org.alfresco.transform.base.TransformManager;
|
||||
import org.alfresco.transform.base.executors.RuntimeExec;
|
||||
import org.alfresco.transform.base.logging.LogEntry;
|
||||
import org.alfresco.transform.common.RequestParamMap;
|
||||
import org.apache.tika.extractor.DocumentSelector;
|
||||
@ -36,10 +37,12 @@ import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.beans.factory.annotation.Value;
|
||||
import org.springframework.context.annotation.Bean;
|
||||
|
||||
import java.io.InputStream;
|
||||
import java.io.OutputStream;
|
||||
import java.util.Arrays;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
import java.util.Objects;
|
||||
import java.util.StringJoiner;
|
||||
@ -52,11 +55,27 @@ public abstract class AbstractTikaTransformer implements CustomTransformer
|
||||
|
||||
@Value("${transform.core.tika.pdfBox.notExtractBookmarksTextDefault:false}")
|
||||
boolean notExtractBookmarksTextDefault;
|
||||
@Value("${transform.core.tika.exifTool.windowsOS}")
|
||||
private String exifToolCommandOnWindows;
|
||||
@Value("${transform.core.tika.exifTool.unixOS}")
|
||||
private String exifToolCommandOnUnix;
|
||||
@Autowired
|
||||
protected Tika tika;
|
||||
|
||||
protected abstract Parser getParser();
|
||||
|
||||
@Bean("exifTool")
|
||||
public RuntimeExec exifRuntimeExec()
|
||||
{
|
||||
RuntimeExec runtimeExec = new RuntimeExec();
|
||||
Map<String, String[]> commandPerOS = new HashMap<>();
|
||||
commandPerOS.put("[wW]in.*", exifToolCommandOnWindows.split(" "));
|
||||
commandPerOS.put("*", exifToolCommandOnUnix.split(" "));
|
||||
runtimeExec.setCommandsAndArguments(commandPerOS);
|
||||
|
||||
return runtimeExec;
|
||||
}
|
||||
|
||||
protected DocumentSelector getDocumentSelector()
|
||||
{
|
||||
return null;
|
||||
|
@ -4,4 +4,7 @@ transform:
|
||||
core:
|
||||
tika:
|
||||
pdfBox:
|
||||
notExtractBookmarksTextDefault: ${PDFBOX_NOTEXTRACTBOOKMARKS_DEFAULT:false}
|
||||
notExtractBookmarksTextDefault: ${PDFBOX_NOTEXTRACTBOOKMARKS_DEFAULT:false}
|
||||
exifTool:
|
||||
windowsOS: 'exiftool -args -G1 -sep "|||" #{"$"}{INPUT}'
|
||||
unixOS: 'env FOO=#{"$"}{OUTPUT} exiftool -args -G1 -sep "|||" #{"$"}{INPUT}'
|
||||
|
@ -32,7 +32,7 @@ import static org.junit.jupiter.api.Assertions.assertArrayEquals;
|
||||
|
||||
public class IPTCMetadataExtractorTest
|
||||
{
|
||||
IPTCMetadataExtractor extractor = new IPTCMetadataExtractor();
|
||||
IPTCMetadataExtractor extractor = new IPTCMetadataExtractor(null);
|
||||
|
||||
@Test
|
||||
public void testIptcToIso8601DateStrings() {
|
||||
|
Loading…
x
Reference in New Issue
Block a user