mirror of
https://github.com/Alfresco/alfresco-transform-core.git
synced 2025-05-19 17:14:47 +00:00
MNT-23047: exifTool command defined as properties and dependant of ru… (#655)
* MNT-23047: exifTool command defined as properties and dependant of running OS
This commit is contained in:
parent
514d03f81a
commit
6849854f4e
@ -20,4 +20,7 @@ transform:
|
|||||||
config: ${IMAGEMAGICK_CONFIG:}
|
config: ${IMAGEMAGICK_CONFIG:}
|
||||||
tika:
|
tika:
|
||||||
pdfBox:
|
pdfBox:
|
||||||
notExtractBookmarksTextDefault: ${PDFBOX_NOTEXTRACTBOOKMARKS_DEFAULT:false}
|
notExtractBookmarksTextDefault: ${PDFBOX_NOTEXTRACTBOOKMARKS_DEFAULT:false}
|
||||||
|
exifTool:
|
||||||
|
windowsOS: 'exiftool -args -G1 -sep "|||" #{"$"}{INPUT}'
|
||||||
|
unixOS: 'env FOO=#{"$"}{OUTPUT} exiftool -args -G1 -sep "|||" #{"$"}{INPUT}'
|
||||||
|
@ -26,6 +26,7 @@
|
|||||||
*/
|
*/
|
||||||
package org.alfresco.transform.tika.metadata.extractors;
|
package org.alfresco.transform.tika.metadata.extractors;
|
||||||
|
|
||||||
|
import org.alfresco.transform.base.executors.RuntimeExec;
|
||||||
import org.alfresco.transform.tika.metadata.AbstractTikaMetadataExtractorEmbeddor;
|
import org.alfresco.transform.tika.metadata.AbstractTikaMetadataExtractorEmbeddor;
|
||||||
import org.alfresco.transform.tika.parsers.ExifToolParser;
|
import org.alfresco.transform.tika.parsers.ExifToolParser;
|
||||||
import org.apache.commons.lang3.StringUtils;
|
import org.apache.commons.lang3.StringUtils;
|
||||||
@ -55,17 +56,18 @@ public class IPTCMetadataExtractor extends AbstractTikaMetadataExtractorEmbeddor
|
|||||||
private static final Pattern YEAR_IPTC = Pattern.compile("(\\d{4}[:|-]\\d{2}[:|-]\\d{2})");
|
private static final Pattern YEAR_IPTC = Pattern.compile("(\\d{4}[:|-]\\d{2}[:|-]\\d{2})");
|
||||||
|
|
||||||
private ExifToolParser parser;
|
private ExifToolParser parser;
|
||||||
|
private RuntimeExec exifRuntimeExec;
|
||||||
|
|
||||||
public IPTCMetadataExtractor()
|
public IPTCMetadataExtractor(RuntimeExec exifRuntimeExec) {
|
||||||
{
|
|
||||||
super(EXTRACTOR, logger);
|
super(EXTRACTOR, logger);
|
||||||
|
this.exifRuntimeExec = exifRuntimeExec;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected Parser getParser()
|
protected Parser getParser()
|
||||||
{
|
{
|
||||||
if (this.parser == null) {
|
if (this.parser == null) {
|
||||||
this.parser = new ExifToolParser();
|
this.parser = new ExifToolParser(exifRuntimeExec);
|
||||||
}
|
}
|
||||||
return this.parser;
|
return this.parser;
|
||||||
}
|
}
|
||||||
|
@ -44,6 +44,7 @@ import java.util.Map;
|
|||||||
import java.util.regex.Matcher;
|
import java.util.regex.Matcher;
|
||||||
import java.util.regex.Pattern;
|
import java.util.regex.Pattern;
|
||||||
|
|
||||||
|
import org.alfresco.transform.base.executors.RuntimeExec;
|
||||||
import org.apache.commons.io.IOUtils;
|
import org.apache.commons.io.IOUtils;
|
||||||
import org.apache.commons.io.output.NullOutputStream;
|
import org.apache.commons.io.output.NullOutputStream;
|
||||||
import org.apache.tika.exception.TikaException;
|
import org.apache.tika.exception.TikaException;
|
||||||
@ -76,13 +77,33 @@ public class ExifToolParser extends ExternalParser {
|
|||||||
private String separator;
|
private String separator;
|
||||||
|
|
||||||
public ExifToolParser() {
|
public ExifToolParser() {
|
||||||
|
this(null);
|
||||||
|
}
|
||||||
|
|
||||||
|
public ExifToolParser(RuntimeExec exifRuntimeExec) {
|
||||||
super();
|
super();
|
||||||
try {
|
try {
|
||||||
List<ExternalParser> eParsers = ExternalParsersFactory.create(getExternalParserConfigURL());
|
List<ExternalParser> eParsers = ExternalParsersFactory.create(getExternalParserConfigURL());
|
||||||
// if ExifTool is not installed then no parsers are returned
|
// if ExifTool is not installed then no parsers are returned
|
||||||
if (eParsers.size() > 0) {
|
if (eParsers.size() > 0) {
|
||||||
ExternalParser eParser = eParsers.get(0);
|
ExternalParser eParser = eParsers.get(0);
|
||||||
this.setCommand(eParser.getCommand());
|
|
||||||
|
String[] commandToBeExecuted;
|
||||||
|
if (exifRuntimeExec==null) {
|
||||||
|
logger.debug("Command to be executed determined from Tika ExternalParser");
|
||||||
|
commandToBeExecuted = eParser.getCommand();
|
||||||
|
} else {
|
||||||
|
logger.debug("Command to be executed determined from RuntimeExec");
|
||||||
|
commandToBeExecuted = exifRuntimeExec.getCommand();
|
||||||
|
}
|
||||||
|
if (commandToBeExecuted==null || commandToBeExecuted.length==0) {
|
||||||
|
commandToBeExecuted = eParser.getCommand();
|
||||||
|
}
|
||||||
|
|
||||||
|
String commandToBeExecutedAsString = String.join( " ", commandToBeExecuted);
|
||||||
|
logger.debug("Command to be executed: " + commandToBeExecutedAsString );
|
||||||
|
|
||||||
|
this.setCommand(commandToBeExecutedAsString);
|
||||||
this.setIgnoredLineConsumer(eParser.getIgnoredLineConsumer());
|
this.setIgnoredLineConsumer(eParser.getIgnoredLineConsumer());
|
||||||
this.setMetadataExtractionPatterns(eParser.getMetadataExtractionPatterns());
|
this.setMetadataExtractionPatterns(eParser.getMetadataExtractionPatterns());
|
||||||
this.setSupportedTypes(eParser.getSupportedTypes());
|
this.setSupportedTypes(eParser.getSupportedTypes());
|
||||||
@ -153,9 +174,11 @@ public class ExifToolParser extends ExternalParser {
|
|||||||
TemporaryResources tmp = new TemporaryResources();
|
TemporaryResources tmp = new TemporaryResources();
|
||||||
try {
|
try {
|
||||||
TikaInputStream tis = TikaInputStream.get(stream, tmp);
|
TikaInputStream tis = TikaInputStream.get(stream, tmp);
|
||||||
|
|
||||||
if (this.getSupportedTypes().contains(mediaType)) {
|
if (this.getSupportedTypes().contains(mediaType)) {
|
||||||
parse(tis, xhtml, metadata, tmp);
|
parse(tis, xhtml, metadata, tmp);
|
||||||
}
|
}
|
||||||
|
|
||||||
switch (mediaType.getType()+"/"+mediaType.getSubtype()) {
|
switch (mediaType.getType()+"/"+mediaType.getSubtype()) {
|
||||||
case MIMETYPE_IMAGE_JPEG:
|
case MIMETYPE_IMAGE_JPEG:
|
||||||
parseAdditional(new JpegParser(), tis, handler, metadata, context, mediaType);
|
parseAdditional(new JpegParser(), tis, handler, metadata, context, mediaType);
|
||||||
@ -299,6 +322,7 @@ public class ExifToolParser extends ExternalParser {
|
|||||||
try {
|
try {
|
||||||
IOUtils.copy(stream, stdin);
|
IOUtils.copy(stream, stdin);
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
|
logger.error( e.getMessage());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
@ -306,6 +330,7 @@ public class ExifToolParser extends ExternalParser {
|
|||||||
try {
|
try {
|
||||||
t.join();
|
t.join();
|
||||||
} catch (InterruptedException ignore) {
|
} catch (InterruptedException ignore) {
|
||||||
|
logger.error(ignore.getMessage());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -28,6 +28,7 @@ package org.alfresco.transform.tika.transformers;
|
|||||||
|
|
||||||
import org.alfresco.transform.base.CustomTransformer;
|
import org.alfresco.transform.base.CustomTransformer;
|
||||||
import org.alfresco.transform.base.TransformManager;
|
import org.alfresco.transform.base.TransformManager;
|
||||||
|
import org.alfresco.transform.base.executors.RuntimeExec;
|
||||||
import org.alfresco.transform.base.logging.LogEntry;
|
import org.alfresco.transform.base.logging.LogEntry;
|
||||||
import org.alfresco.transform.common.RequestParamMap;
|
import org.alfresco.transform.common.RequestParamMap;
|
||||||
import org.apache.tika.extractor.DocumentSelector;
|
import org.apache.tika.extractor.DocumentSelector;
|
||||||
@ -36,10 +37,12 @@ import org.slf4j.Logger;
|
|||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
import org.springframework.beans.factory.annotation.Autowired;
|
import org.springframework.beans.factory.annotation.Autowired;
|
||||||
import org.springframework.beans.factory.annotation.Value;
|
import org.springframework.beans.factory.annotation.Value;
|
||||||
|
import org.springframework.context.annotation.Bean;
|
||||||
|
|
||||||
import java.io.InputStream;
|
import java.io.InputStream;
|
||||||
import java.io.OutputStream;
|
import java.io.OutputStream;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
|
import java.util.HashMap;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.Objects;
|
import java.util.Objects;
|
||||||
import java.util.StringJoiner;
|
import java.util.StringJoiner;
|
||||||
@ -52,11 +55,27 @@ public abstract class AbstractTikaTransformer implements CustomTransformer
|
|||||||
|
|
||||||
@Value("${transform.core.tika.pdfBox.notExtractBookmarksTextDefault:false}")
|
@Value("${transform.core.tika.pdfBox.notExtractBookmarksTextDefault:false}")
|
||||||
boolean notExtractBookmarksTextDefault;
|
boolean notExtractBookmarksTextDefault;
|
||||||
|
@Value("${transform.core.tika.exifTool.windowsOS}")
|
||||||
|
private String exifToolCommandOnWindows;
|
||||||
|
@Value("${transform.core.tika.exifTool.unixOS}")
|
||||||
|
private String exifToolCommandOnUnix;
|
||||||
@Autowired
|
@Autowired
|
||||||
protected Tika tika;
|
protected Tika tika;
|
||||||
|
|
||||||
protected abstract Parser getParser();
|
protected abstract Parser getParser();
|
||||||
|
|
||||||
|
@Bean("exifTool")
|
||||||
|
public RuntimeExec exifRuntimeExec()
|
||||||
|
{
|
||||||
|
RuntimeExec runtimeExec = new RuntimeExec();
|
||||||
|
Map<String, String[]> commandPerOS = new HashMap<>();
|
||||||
|
commandPerOS.put("[wW]in.*", exifToolCommandOnWindows.split(" "));
|
||||||
|
commandPerOS.put("*", exifToolCommandOnUnix.split(" "));
|
||||||
|
runtimeExec.setCommandsAndArguments(commandPerOS);
|
||||||
|
|
||||||
|
return runtimeExec;
|
||||||
|
}
|
||||||
|
|
||||||
protected DocumentSelector getDocumentSelector()
|
protected DocumentSelector getDocumentSelector()
|
||||||
{
|
{
|
||||||
return null;
|
return null;
|
||||||
|
@ -4,4 +4,7 @@ transform:
|
|||||||
core:
|
core:
|
||||||
tika:
|
tika:
|
||||||
pdfBox:
|
pdfBox:
|
||||||
notExtractBookmarksTextDefault: ${PDFBOX_NOTEXTRACTBOOKMARKS_DEFAULT:false}
|
notExtractBookmarksTextDefault: ${PDFBOX_NOTEXTRACTBOOKMARKS_DEFAULT:false}
|
||||||
|
exifTool:
|
||||||
|
windowsOS: 'exiftool -args -G1 -sep "|||" #{"$"}{INPUT}'
|
||||||
|
unixOS: 'env FOO=#{"$"}{OUTPUT} exiftool -args -G1 -sep "|||" #{"$"}{INPUT}'
|
||||||
|
@ -32,7 +32,7 @@ import static org.junit.jupiter.api.Assertions.assertArrayEquals;
|
|||||||
|
|
||||||
public class IPTCMetadataExtractorTest
|
public class IPTCMetadataExtractorTest
|
||||||
{
|
{
|
||||||
IPTCMetadataExtractor extractor = new IPTCMetadataExtractor();
|
IPTCMetadataExtractor extractor = new IPTCMetadataExtractor(null);
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testIptcToIso8601DateStrings() {
|
public void testIptcToIso8601DateStrings() {
|
||||||
|
Loading…
x
Reference in New Issue
Block a user