diff --git a/alfresco-transform-misc/alfresco-transform-misc-boot/src/test/java/org/alfresco/transformer/MiscControllerTest.java b/alfresco-transform-misc/alfresco-transform-misc-boot/src/test/java/org/alfresco/transformer/MiscControllerTest.java index 89580f20..d4b4ff86 100644 --- a/alfresco-transform-misc/alfresco-transform-misc-boot/src/test/java/org/alfresco/transformer/MiscControllerTest.java +++ b/alfresco-transform-misc/alfresco-transform-misc-boot/src/test/java/org/alfresco/transformer/MiscControllerTest.java @@ -245,19 +245,21 @@ public class MiscControllerTest extends AbstractTransformerControllerTest public void testExtractMetadataRFC822() throws Exception { String expected = - "{\"{http://www.alfresco.org/model/content/1.0}addressee\":\"Nevin Nollop \"," + - "\"{http://www.alfresco.org/model/content/1.0}description\":\"The quick brown fox jumps over the lazy dog\"," + - "\"{http://www.alfresco.org/model/content/1.0}addressees\":\"Nevin Nollop \"," + - "\"{http://www.alfresco.org/model/imap/1.0}dateSent\":1086351802000," + - "\"{http://www.alfresco.org/model/imap/1.0}messageTo\":\"Nevin Nollop \"," + - "\"{http://www.alfresco.org/model/imap/1.0}messageId\":\"<20040604122322.GV1905@phoenix.home>\"," + - "\"{http://www.alfresco.org/model/content/1.0}title\":\"The quick brown fox jumps over the lazy dog\"," + - "\"{http://www.alfresco.org/model/imap/1.0}messageSubject\":\"The quick brown fox jumps over the lazy dog\"," + - "\"{http://www.alfresco.org/model/imap/1.0}messageCc\":\"Nevin Nollop \"," + - "\"{http://www.alfresco.org/model/content/1.0}sentdate\":1086351802000," + - "\"{http://www.alfresco.org/model/content/1.0}subjectline\":\"The quick brown fox jumps over the lazy dog\"," + - "\"{http://www.alfresco.org/model/imap/1.0}messageFrom\":\"Nevin Nollop \"," + - "\"{http://www.alfresco.org/model/content/1.0}originator\":\"Nevin Nollop \"}"; + "{"+ + "\"{http://www.alfresco.org/model/content/1.0}addressee\":\"Nevin Nollop \","+ + "\"{http://www.alfresco.org/model/content/1.0}addressees\":\"Nevin Nollop \","+ + "\"{http://www.alfresco.org/model/content/1.0}description\":\"The quick brown fox jumps over the lazy dog\","+ + "\"{http://www.alfresco.org/model/content/1.0}originator\":\"Nevin Nollop \","+ + "\"{http://www.alfresco.org/model/content/1.0}sentdate\":1086351802000,"+ + "\"{http://www.alfresco.org/model/content/1.0}subjectline\":\"The quick brown fox jumps over the lazy dog\","+ + "\"{http://www.alfresco.org/model/content/1.0}title\":\"The quick brown fox jumps over the lazy dog\","+ + "\"{http://www.alfresco.org/model/imap/1.0}dateSent\":1086351802000,"+ + "\"{http://www.alfresco.org/model/imap/1.0}messageCc\":\"Nevin Nollop \","+ + "\"{http://www.alfresco.org/model/imap/1.0}messageFrom\":\"Nevin Nollop \","+ + "\"{http://www.alfresco.org/model/imap/1.0}messageId\":\"<20040604122322.GV1905@phoenix.home>\","+ + "\"{http://www.alfresco.org/model/imap/1.0}messageSubject\":\"The quick brown fox jumps over the lazy dog\","+ + "\"{http://www.alfresco.org/model/imap/1.0}messageTo\":\"Nevin Nollop \""+ + "}"; MvcResult result = sendRequest("eml", null, MIMETYPE_RFC822, @@ -287,9 +289,9 @@ public class MiscControllerTest extends AbstractTransformerControllerTest "\"messageFrom\":[" + "\"{http://www.alfresco.org/model/dod5015/1.0}dodProp1\"]}\n"; String expected = - "{\"{http://www.alfresco.org/model/imap/1.0}messageSubject\":\"The quick brown fox jumps over the lazy dog\"," + + "{\"{http://www.alfresco.org/model/content/1.0}title\":\"The quick brown fox jumps over the lazy dog\","+ "\"{http://www.alfresco.org/model/dod5015/1.0}dodProp1\":\"Nevin Nollop \"," + - "\"{http://www.alfresco.org/model/content/1.0}title\":\"The quick brown fox jumps over the lazy dog\"}"; + "\"{http://www.alfresco.org/model/imap/1.0}messageSubject\":\"The quick brown fox jumps over the lazy dog\"}"; MvcResult result = sendRequest("eml", null, MIMETYPE_RFC822, diff --git a/alfresco-transform-tika/alfresco-transform-tika/src/main/java/org/alfresco/transformer/executors/TikaJavaExecutor.java b/alfresco-transform-tika/alfresco-transform-tika/src/main/java/org/alfresco/transformer/executors/TikaJavaExecutor.java index 9817cc83..68075023 100644 --- a/alfresco-transform-tika/alfresco-transform-tika/src/main/java/org/alfresco/transformer/executors/TikaJavaExecutor.java +++ b/alfresco-transform-tika/alfresco-transform-tika/src/main/java/org/alfresco/transformer/executors/TikaJavaExecutor.java @@ -38,6 +38,7 @@ import org.alfresco.transformer.metadataExtractors.PdfBoxMetadataExtractor; import org.alfresco.transformer.metadataExtractors.PoiMetadataExtractor; import org.alfresco.transformer.metadataExtractors.TikaAudioMetadataExtractor; import org.alfresco.transformer.metadataExtractors.TikaAutoMetadataExtractor; +import org.alfresco.transformer.metadataExtractors.IPTCMetadataExtractor; import org.alfresco.transformer.util.RequestParamMap; import org.apache.tika.exception.TikaException; import org.slf4j.LoggerFactory; @@ -80,6 +81,7 @@ public class TikaJavaExecutor implements JavaExecutor .put("PoiMetadataExtractor", new PoiMetadataExtractor()) .put("TikaAudioMetadataExtractor", new TikaAudioMetadataExtractor()) .put("TikaAutoMetadataExtractor", new TikaAutoMetadataExtractor()) + .put("IPTCMetadataExtractor", new IPTCMetadataExtractor()) .build(); private final Map metadataEmbedder = ImmutableMap .builder() diff --git a/alfresco-transform-tika/alfresco-transform-tika/src/main/java/org/alfresco/transformer/metadataExtractors/IPTCMetadataExtractor.java b/alfresco-transform-tika/alfresco-transform-tika/src/main/java/org/alfresco/transformer/metadataExtractors/IPTCMetadataExtractor.java new file mode 100644 index 00000000..02f6f0fb --- /dev/null +++ b/alfresco-transform-tika/alfresco-transform-tika/src/main/java/org/alfresco/transformer/metadataExtractors/IPTCMetadataExtractor.java @@ -0,0 +1,58 @@ +/* + * #%L + * Alfresco Transform Core + * %% + * Copyright (C) 2005 - 2021 Alfresco Software Limited + * %% + * This file is part of the Alfresco software. + * - + * If the software was purchased under a paid Alfresco license, the terms of + * the paid license agreement will prevail. Otherwise, the software is + * provided under the following open source license terms: + * - + * Alfresco is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * - + * Alfresco is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * - + * You should have received a copy of the GNU Lesser General Public License + * along with Alfresco. If not, see . + * #L% + */ +package org.alfresco.transformer.metadataExtractors; + +import java.io.IOException; + +import org.alfresco.transform.exceptions.TransformException; +import org.alfresco.transformer.tika.parsers.ExifToolParser; +import org.apache.tika.exception.TikaException; +import org.apache.tika.parser.Parser; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class IPTCMetadataExtractor extends AbstractTikaMetadataExtractor +{ + + private static final Logger logger = LoggerFactory.getLogger(IPTCMetadataExtractor.class); + + public IPTCMetadataExtractor() + { + super(logger); + } + + @Override + protected Parser getParser() { + try { + return new ExifToolParser(); + } catch (IOException | TikaException e) { + logger.error(e.getMessage(), e); + throw new TransformException(500, "Error creating IPTC parser"); + } + } + +} diff --git a/alfresco-transform-tika/alfresco-transform-tika/src/main/java/org/alfresco/transformer/tika/parsers/ExifToolParser.java b/alfresco-transform-tika/alfresco-transform-tika/src/main/java/org/alfresco/transformer/tika/parsers/ExifToolParser.java new file mode 100644 index 00000000..90442690 --- /dev/null +++ b/alfresco-transform-tika/alfresco-transform-tika/src/main/java/org/alfresco/transformer/tika/parsers/ExifToolParser.java @@ -0,0 +1,286 @@ +/* + * #%L + * Alfresco Transform Core + * %% + * Copyright (C) 2005 - 2021 Alfresco Software Limited + * %% + * This file is part of the Alfresco software. + * - + * If the software was purchased under a paid Alfresco license, the terms of + * the paid license agreement will prevail. Otherwise, the software is + * provided under the following open source license terms: + * - + * Alfresco is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * - + * Alfresco is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * - + * You should have received a copy of the GNU Lesser General Public License + * along with Alfresco. If not, see . + * #L% + */ +package org.alfresco.transformer.tika.parsers; + +import java.io.BufferedReader; +import java.io.File; +import java.io.FileInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.io.OutputStream; +import java.io.Reader; +import java.net.URL; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import org.apache.tika.metadata.Metadata; + +import org.apache.tika.exception.TikaException; +import org.apache.tika.io.IOUtils; +import org.apache.tika.io.NullOutputStream; +import org.apache.tika.io.TemporaryResources; +import org.apache.tika.io.TikaInputStream; +import org.apache.tika.parser.ParseContext; +import org.apache.tika.parser.external.ExternalParser; +import org.apache.tika.parser.external.ExternalParsersFactory; +import org.apache.tika.sax.XHTMLContentHandler; +import org.xml.sax.ContentHandler; +import org.xml.sax.SAXException; + +import static java.nio.charset.StandardCharsets.UTF_8; + +public class ExifToolParser extends ExternalParser { + + private static final String EXIFTOOL_PARSER_CONFIG = "parsers/external/config/exiftool-parser.xml"; + + public ExifToolParser() throws IOException, TikaException { + super(); + ExternalParser eParser = ExternalParsersFactory.create(getExternalParserConfigURL()).get(0); + this.setCommand(eParser.getCommand()); + this.setIgnoredLineConsumer(eParser.getIgnoredLineConsumer()); + this.setMetadataExtractionPatterns(eParser.getMetadataExtractionPatterns()); + this.setSupportedTypes(eParser.getSupportedTypes()); + } + + private URL getExternalParserConfigURL(){ + ClassLoader classLoader = ExifToolParser.class.getClassLoader(); + return classLoader.getResource(EXIFTOOL_PARSER_CONFIG); + } + + /** + * Adapted from {@link org.apache.tika.parser.external.ExternalParser} + * due to errors attempting to {@link #extractMetadata} from the errorStream in original implementation.

+ * Executes the configured external command and passes the given document + * stream as a simple XHTML document to the given SAX content handler. + * Metadata is only extracted if {@link #setMetadataExtractionPatterns(Map)} + * has been called to set patterns. + */ + public void parse(InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context) + throws IOException, SAXException, TikaException { + XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata); + + TemporaryResources tmp = new TemporaryResources(); + try { + parse(TikaInputStream.get(stream, tmp), xhtml, metadata, tmp); + } finally { + tmp.dispose(); + } + } + + private void parse(TikaInputStream stream, XHTMLContentHandler xhtml, Metadata metadata, TemporaryResources tmp) + throws IOException, SAXException, TikaException { + boolean inputToStdIn = true; + boolean outputFromStdOut = true; + boolean hasPatterns = (getMetadataExtractionPatterns() != null && !getMetadataExtractionPatterns().isEmpty()); + + File output = null; + + // Build our getCommand() + String[] cmd; + if (getCommand().length == 1) { + cmd = getCommand()[0].split(" "); + } else { + cmd = new String[getCommand().length]; + System.arraycopy(getCommand(), 0, cmd, 0, getCommand().length); + } + for (int i = 0; i < cmd.length; i++) { + if (cmd[i].indexOf(INPUT_FILE_TOKEN) != -1) { + cmd[i] = cmd[i].replace(INPUT_FILE_TOKEN, stream.getFile().getPath()); + inputToStdIn = false; + } + if (cmd[i].indexOf(OUTPUT_FILE_TOKEN) != -1) { + output = tmp.createTemporaryFile(); + outputFromStdOut = false; + cmd[i] = cmd[i].replace(OUTPUT_FILE_TOKEN, output.getPath()); + } + } + + // Execute + Process process = null; + try { + if (cmd.length == 1) { + process = Runtime.getRuntime().exec(cmd[0]); + } else { + process = Runtime.getRuntime().exec(cmd); + } + } catch (Exception e) { + e.printStackTrace(); + } + + try { + if (inputToStdIn) { + sendInput(process, stream); + } else { + process.getOutputStream().close(); + } + + InputStream out = process.getInputStream(); + InputStream err = process.getErrorStream(); + + if (hasPatterns) { + + if (outputFromStdOut) { + extractOutput(out, xhtml); + } else { + extractMetadata(out, metadata); + } + } else { + ignoreStream(err); + + if (outputFromStdOut) { + extractOutput(out, xhtml); + } else { + ignoreStream(out); + } + } + } finally { + try { + process.waitFor(); + } catch (InterruptedException ignore) { + } + } + + // Grab the output if we haven't already + if (!outputFromStdOut) { + extractOutput(new FileInputStream(output), xhtml); + } + } + + /** + * Adapted from {@link org.apache.tika.parser.external.ExternalParser}

+ * Starts a thread that extracts the contents of the standard output + * stream of the given process to the given XHTML content handler. + * The standard output stream is closed once fully processed. + * + * @param process process + * @param xhtml XHTML content handler + * @throws SAXException if the XHTML SAX events could not be handled + * @throws IOException if an input error occurred + */ + private void extractOutput(InputStream stream, XHTMLContentHandler xhtml) throws SAXException, IOException { + try (Reader reader = new InputStreamReader(stream, UTF_8)) { + xhtml.startDocument(); + xhtml.startElement("p"); + char[] buffer = new char[1024]; + for (int n = reader.read(buffer); n != -1; n = reader.read(buffer)) { + xhtml.characters(buffer, 0, n); + } + xhtml.endElement("p"); + xhtml.endDocument(); + } + } + + /** + * Adapted from {@link org.apache.tika.parser.external.ExternalParser}

+ * Starts a thread that sends the contents of the given input stream + * to the standard input stream of the given process. Potential + * exceptions are ignored, and the standard input stream is closed + * once fully processed. Note that the given input stream is not + * closed by this method. + * + * @param process process + * @param stream input stream + */ + private void sendInput(final Process process, final InputStream stream) { + Thread t = new Thread() { + public void run() { + OutputStream stdin = process.getOutputStream(); + try { + IOUtils.copy(stream, stdin); + } catch (IOException e) { + } + } + }; + t.start(); + try { + t.join(); + } catch (InterruptedException ignore) { + } + } + + /** + * Adapted from {@link org.apache.tika.parser.external.ExternalParser}

+ * Starts a thread that reads and discards the contents of the + * standard stream of the given process. Potential exceptions + * are ignored, and the stream is closed once fully processed. + * + * @param process process + */ + private void ignoreStream(final InputStream stream) { + Thread t = new Thread() { + public void run() { + try { + IOUtils.copy(stream, new NullOutputStream()); + } catch (IOException e) { + } finally { + IOUtils.closeQuietly(stream); + } + } + }; + t.start(); + try { + t.join(); + } catch (InterruptedException ignore) { + } + } + + private void extractMetadata(final InputStream stream, final Metadata metadata) { + Thread t = new Thread() { + public void run() { + BufferedReader reader; + reader = new BufferedReader(new InputStreamReader(stream, UTF_8)); + try { + String line; + while ((line = reader.readLine()) != null) { + for (Pattern p : getMetadataExtractionPatterns().keySet()) { + Matcher m = p.matcher(line); + if (m.find()) { + if (getMetadataExtractionPatterns().get(p) != null + && !getMetadataExtractionPatterns().get(p).equals("")) { + metadata.add(getMetadataExtractionPatterns().get(p), m.group(1)); + } else { + metadata.add(m.group(1), m.group(2)); + } + } + } + } + } catch (IOException e) { + // Ignore + } finally { + IOUtils.closeQuietly(reader); + IOUtils.closeQuietly(stream); + } + } + }; + t.start(); + try { + t.join(); + } catch (InterruptedException ignore) { + } + } +} diff --git a/alfresco-transform-tika/alfresco-transform-tika/src/main/resources/IPTCMetadataExtractor_metadata_extract.properties b/alfresco-transform-tika/alfresco-transform-tika/src/main/resources/IPTCMetadataExtractor_metadata_extract.properties new file mode 100644 index 00000000..95e1eb96 --- /dev/null +++ b/alfresco-transform-tika/alfresco-transform-tika/src/main/resources/IPTCMetadataExtractor_metadata_extract.properties @@ -0,0 +1,308 @@ +# +# IPTCMetadataExtracter - default mapping +# +# author: David Edwards + +# TODO Complete mappings (currently copied from other files) + +# Namespaces +namespace.prefix.cm=http://www.alfresco.org/model/content/1.0 +namespace.prefix.audio=http://www.alfresco.org/model/audio/1.0 +namespace.prefix.video=http://www.alfresco.org/model/video/1.0 +namespace.prefix.custom=MyCustomNameSpace + +# Dump of most namespaces returned by the exifTool for testing +# TODO ATS-900 +namespace.prefix.File=File +namespace.prefix.IFD0=IFD0 +namespace.prefix.ExifIFD=ExifIFD +namespace.prefix.IFD1=IFD1 +namespace.prefix.Photoshop=Photoshop +namespace.prefix.IPTC=IPTC +namespace.prefix.ICC-header=ICC-header +namespace.prefix.ICC_Profile=ICC_Profile +namespace.prefix.ICC-view=ICC-view +namespace.prefix.ICC-meas=ICC-meas +namespace.prefix.XMP-x=XMP-x +namespace.prefix.XMP-photoshop=XMP-photoshop +namespace.prefix.XMP-xmp=XMP-xmp +namespace.prefix.XMP-iptcCore=XMP-iptcCore +namespace.prefix.XMP-dc=XMP-dc +namespace.prefix.XMP-xmpRights=XMP-xmpRights +namespace.prefix.XMP-xmpMM=XMP-xmpMM +namespace.prefix.XMP-illustrator=XMP-illustrator +namespace.prefix.XMP-mediapro=XMP-mediapro +namespace.prefix.XMP-aux=XMP-aux +namespace.prefix.XMP-iptcExt=XMP-iptcExt +namespace.prefix.XMP-plus=XMP-plus +namespace.prefix.XMP-custom=XMP-custom +namespace.prefix.Adobe=Adobe +namespace.prefix.Composite=Composite + +# Core mappings +# There is overlap with the TikaAutoMetadatorExtractor. These core mappings should still be included for backwards compatability. +# TODO ATS-900 +XMP-dc\:Creator=cm:author +XMP-dc\:Title=cm:title +XMP-dc\:Description=cm:description +created=cm:created + +# Dump of most namespaces returned by the exifTool for testing +# TODO ATS-900 +ExifTool\:ExifToolVersion=custom:ExifToolVersion +File\:FileType=File:FileType +File\:FileTypeExtension=File:FileTypeExtension +File\:MIMEType=File:MIMEType +File\:ExifByteOrder=File:ExifByteOrder +File\:CurrentIPTCDigest=File:CurrentIPTCDigest +File\:ImageWidth=File:ImageWidth +File\:ImageHeight=File:ImageHeight +File\:EncodingProcess=File:EncodingProcess +File\:BitsPerSample=File:BitsPerSample +File\:ColorComponents=File:ColorComponents +File\:YCbCrSubSampling=File:YCbCrSubSampling +IFD0\:PhotometricInterpretation=IFD0:PhotometricInterpretation +IFD0\:ImageDescription=IFD0:ImageDescription +IFD0\:Orientation=IFD0:Orientation +IFD0\:SamplesPerPixel=IFD0:SamplesPerPixel +IFD0\:XResolution=IFD0:XResolution +IFD0\:YResolution=IFD0:YResolution +IFD0\:ResolutionUnit=IFD0:ResolutionUnit +IFD0\:Software=IFD0:Software +IFD0\:ModifyDate=IFD0:ModifyDate +IFD0\:Artist=IFD0:Artist +IFD0\:Copyright=IFD0:Copyright +ExifIFD\:Make=ExifIFD:Make +ExifIFD\:Model=ExifIFD:Model +ExifIFD\:ExposureTime=ExifIFD:ExposureTime +ExifIFD\:FNumber=ExifIFD:FNumber +ExifIFD\:ExposureProgram=ExifIFD:ExposureProgram +ExifIFD\:ISO=ExifIFD:ISO +ExifIFD\:ExifVersion=ExifIFD:ExifVersion +ExifIFD\:DateTimeOriginal=ExifIFD:DateTimeOriginal +ExifIFD\:CreateDate=ExifIFD:CreateDate +ExifIFD\:ComponentsConfiguration=ExifIFD:ComponentsConfiguration +ExifIFD\:ShutterSpeedValue=ExifIFD:ShutterSpeedValue +ExifIFD\:ApertureValue=ExifIFD:ApertureValue +ExifIFD\:ExposureCompensation=ExifIFD:ExposureCompensation +ExifIFD\:MaxApertureValue=ExifIFD:MaxApertureValue +ExifIFD\:MeteringMode=ExifIFD:MeteringMode +ExifIFD\:Flash=ExifIFD:Flash +ExifIFD\:FocalLength=ExifIFD:FocalLength +ExifIFD\:SubSecTime=ExifIFD:SubSecTime +ExifIFD\:SubSecTimeOriginal=ExifIFD:SubSecTimeOriginal +ExifIFD\:SubSecTimeDigitized=ExifIFD:SubSecTimeDigitized +ExifIFD\:FlashpixVersion=ExifIFD:FlashpixVersion +ExifIFD\:ColorSpace=ExifIFD:ColorSpace +ExifIFD\:ExifImageWidth=ExifIFD:ExifImageWidth +ExifIFD\:ExifImageHeight=ExifIFD:ExifImageHeight +ExifIFD\:FocalPlaneXResolution=ExifIFD:FocalPlaneXResolution +ExifIFD\:FocalPlaneYResolution=ExifIFD:FocalPlaneYResolution +ExifIFD\:FocalPlaneResolutionUnit=ExifIFD:FocalPlaneResolutionUnit +ExifIFD\:CustomRendered=ExifIFD:CustomRendered +ExifIFD\:ExposureMode=ExifIFD:ExposureMode +ExifIFD\:WhiteBalance=ExifIFD:WhiteBalance +ExifIFD\:SceneCaptureType=ExifIFD:SceneCaptureType +ExifIFD\:SerialNumber=ExifIFD:SerialNumber +ExifIFD\:LensInfo=ExifIFD:LensInfo +ExifIFD\:LensModel=ExifIFD:LensModel +ExifIFD\:LensSerialNumber=ExifIFD:LensSerialNumber +IFD1\:Compression=IFD1:Compression +IFD1\:ThumbnailOffset=IFD1:ThumbnailOffset +IFD1\:ThumbnailLength=IFD1:ThumbnailLength +IFD1\:ThumbnailImage=IFD1:ThumbnailImage +IPTC\:CodedCharacterSet=IPTC:CodedCharacterSet +IPTC\:ApplicationRecordVersion=IPTC:ApplicationRecordVersion +IPTC\:Caption-Abstract=IPTC:Caption-Abstract +IPTC\:Writer-Editor=IPTC:Writer-Editor +IPTC\:SpecialInstructions=IPTC:SpecialInstructions +IPTC\:By-line=IPTC:By-line +IPTC\:By-lineTitle=IPTC:By-lineTitle +IPTC\:ObjectName=IPTC:ObjectName +IPTC\:TimeCreated=IPTC:TimeCreated +IPTC\:Sub-location=IPTC:Sub-location +IPTC\:Province-State=IPTC:Province-State +IPTC\:Country-PrimaryLocationName=IPTC:Country-PrimaryLocationName +IPTC\:Country-PrimaryLocationCode=IPTC:Country-PrimaryLocationCode +IPTC\:OriginalTransmissionReference=IPTC:OriginalTransmissionReference +IPTC\:Keywords=IPTC:Keywords +IPTC\:CopyrightNotice=IPTC:CopyrightNotice +IPTC\:FixtureIdentifier=IPTC:FixtureIdentifier +IPTC\:EditStatus=IPTC:EditStatus +IPTC\:Contact=IPTC:Contact +Photoshop\:IPTCDigest=Photoshop:IPTCDigest +Photoshop\:DisplayedUnitsX=Photoshop:DisplayedUnitsX +Photoshop\:DisplayedUnitsY=Photoshop:DisplayedUnitsY +Photoshop\:PrintStyle=Photoshop:PrintStyle +Photoshop\:PrintPosition=Photoshop:PrintPosition +Photoshop\:PrintScale=Photoshop:PrintScale +Photoshop\:GlobalAngle=Photoshop:GlobalAngle +Photoshop\:GlobalAltitude=Photoshop:GlobalAltitude +Photoshop\:CopyrightFlag=Photoshop:CopyrightFlag +Photoshop\:URL=Photoshop:URL +Photoshop\:URL_List=Photoshop:URL_List +Photoshop\:SlicesGroupName=Photoshop:SlicesGroupName +Photoshop\:NumSlices=Photoshop:NumSlices +Photoshop\:PixelAspectRatio=Photoshop:PixelAspectRatio +Photoshop\:PhotoshopThumbnail=Photoshop:PhotoshopThumbnail +Photoshop\:HasRealMergedData=Photoshop:HasRealMergedData +Photoshop\:WriterName=Photoshop:WriterName +Photoshop\:ReaderName=Photoshop:ReaderName +Photoshop\:PhotoshopQuality=Photoshop:PhotoshopQuality +Photoshop\:PhotoshopFormat=Photoshop:PhotoshopFormat +Photoshop\:ProgressiveScans=Photoshop:ProgressiveScans +XMP-x\:XMPToolkit=XMP-x:XMPToolkit +XMP-photoshop\:LegacyIPTCDigest=XMP-photoshop:LegacyIPTCDigest +XMP-photoshop\:AuthorsPosition=XMP-photoshop:AuthorsPosition +XMP-photoshop\:Headline=XMP-photoshop:Headline +XMP-photoshop\:CaptionWriter=XMP-photoshop:CaptionWriter +XMP-photoshop\:DateCreated=XMP-photoshop:DateCreated +XMP-photoshop\:City=XMP-photoshop:City +XMP-photoshop\:State=XMP-photoshop:State +XMP-photoshop\:Country=XMP-photoshop:Country +XMP-photoshop\:TransmissionReference=XMP-photoshop:TransmissionReference +XMP-photoshop\:Instructions=XMP-photoshop:Instructions +XMP-photoshop\:Credit=XMP-photoshop:Credit +XMP-photoshop\:Source=XMP-photoshop:Source +XMP-photoshop\:Category=XMP-photoshop:Category +XMP-photoshop\:ColorMode=XMP-photoshop:ColorMode +XMP-photoshop\:ICCProfileName=XMP-photoshop:ICCProfileName +XMP-photoshop\:SupplementalCategories=XMP-photoshop:SupplementalCategories +XMP-xmp\:CreatorTool=XMP-xmp:CreatorTool +XMP-xmp\:MetadataDate=XMP-xmp:MetadataDate +XMP-iptcCore\:IntellectualGenre=XMP-iptcCore:IntellectualGenre +XMP-iptcCore\:Location=XMP-iptcCore:Location +XMP-iptcCore\:CountryCode=XMP-iptcCore:CountryCode +XMP-iptcCore\:CreatorAddress=XMP-iptcCore:CreatorAddress +XMP-iptcCore\:CreatorCity=XMP-iptcCore:CreatorCity +XMP-iptcCore\:CreatorRegion=XMP-iptcCore:CreatorRegion +XMP-iptcCore\:CreatorPostalCode=XMP-iptcCore:CreatorPostalCode +XMP-iptcCore\:CreatorCountry=XMP-iptcCore:CreatorCountry +XMP-iptcCore\:CreatorWorkTelephone=XMP-iptcCore:CreatorWorkTelephone +XMP-iptcCore\:CreatorWorkEmail=XMP-iptcCore:CreatorWorkEmail +XMP-iptcCore\:CreatorWorkURL=XMP-iptcCore:CreatorWorkURL +XMP-iptcCore\:Scene=XMP-iptcCore:Scene +XMP-iptcCore\:SubjectCode=XMP-iptcCore:SubjectCode +XMP-dc\:Format=XMP-dc:Format +XMP-dc\:Title=XMP-dc:Title +XMP-dc\:Subject=XMP-dc:Subject +XMP-dc\:Creator=XMP-dc:Creator +XMP-dc\:Rights=XMP-dc:Rights +XMP-dc\:Description=XMP-dc:Description +XMP-xmpRights\:WebStatement=XMP-xmpRights:WebStatement +XMP-xmpRights\:Marked=XMP-xmpRights:Marked +XMP-xmpRights\:UsageTerms=XMP-xmpRights:UsageTerms +XMP-xmpMM\:DocumentID=XMP-xmpMM:DocumentID +XMP-xmpMM\:InstanceID=XMP-xmpMM:InstanceID +XMP-xmpMM\:OriginalDocumentID=XMP-xmpMM:OriginalDocumentID +XMP-xmpMM\:HistoryAction=XMP-xmpMM:HistoryAction +XMP-xmpMM\:HistoryInstanceID=XMP-xmpMM:HistoryInstanceID +XMP-xmpMM\:HistoryWhen=XMP-xmpMM:HistoryWhen +XMP-xmpMM\:HistorySoftwareAgent=XMP-xmpMM:HistorySoftwareAgent +XMP-xmpMM\:HistoryChanged=XMP-xmpMM:HistoryChanged +XMP-illustrator\:StartupProfile=XMP-illustrator:StartupProfile +XMP-mediapro\:Status=XMP-mediapro:Status +XMP-mediapro\:People=XMP-mediapro:People +XMP-aux\:Lens=XMP-aux:Lens +XMP-iptcExt\:AdditionalModelInformation=XMP-iptcExt:AdditionalModelInformation +XMP-iptcExt\:MaxAvailWidth=XMP-iptcExt:MaxAvailWidth +XMP-iptcExt\:MaxAvailHeight=XMP-iptcExt:MaxAvailHeight +XMP-iptcExt\:DigitalSourceType=XMP-iptcExt:DigitalSourceType +XMP-iptcExt\:LocationCreatedSublocation=XMP-iptcExt:LocationCreatedSublocation +XMP-iptcExt\:LocationCreatedCity=XMP-iptcExt:LocationCreatedCity +XMP-iptcExt\:LocationCreatedProvinceState=XMP-iptcExt:LocationCreatedProvinceState +XMP-iptcExt\:LocationCreatedCountryName=XMP-iptcExt:LocationCreatedCountryName +XMP-iptcExt\:LocationCreatedCountryCode=XMP-iptcExt:LocationCreatedCountryCode +XMP-iptcExt\:LocationCreatedWorldRegion=XMP-iptcExt:LocationCreatedWorldRegion +XMP-iptcExt\:Event=XMP-iptcExt:Event +XMP-iptcExt\:ModelAge=XMP-iptcExt:ModelAge +XMP-iptcExt\:ArtworkDateCreated=XMP-iptcExt:ArtworkDateCreated +XMP-iptcExt\:ArtworkSource=XMP-iptcExt:ArtworkSource +XMP-iptcExt\:ArtworkSourceInventoryNo=XMP-iptcExt:ArtworkSourceInventoryNo +XMP-iptcExt\:ArtworkCopyrightNotice=XMP-iptcExt:ArtworkCopyrightNotice +XMP-iptcExt\:ArtworkTitle=XMP-iptcExt:ArtworkTitle +XMP-iptcExt\:ArtworkCreator=XMP-iptcExt:ArtworkCreator +XMP-iptcExt\:RegistryOrganisationID=XMP-iptcExt:RegistryOrganisationID +XMP-iptcExt\:RegistryItemID=XMP-iptcExt:RegistryItemID +XMP-iptcExt\:OrganisationInImageName=XMP-iptcExt:OrganisationInImageName +XMP-iptcExt\:OrganisationInImageCode=XMP-iptcExt:OrganisationInImageCode +XMP-iptcExt\:PersonInImage=XMP-iptcExt:PersonInImage +XMP-iptcExt\:LocationShownSublocation=XMP-iptcExt:LocationShownSublocation +XMP-iptcExt\:LocationShownCity=XMP-iptcExt:LocationShownCity +XMP-iptcExt\:LocationShownProvinceState=XMP-iptcExt:LocationShownProvinceState +XMP-iptcExt\:LocationShownCountryName=XMP-iptcExt:LocationShownCountryName +XMP-iptcExt\:LocationShownCountryCode=XMP-iptcExt:LocationShownCountryCode +XMP-iptcExt\:LocationShownWorldRegion=XMP-iptcExt:LocationShownWorldRegion +XMP-plus\:PropertyReleaseStatus=XMP-plus:PropertyReleaseStatus +XMP-plus\:ImageSupplierImageID=XMP-plus:ImageSupplierImageID +XMP-plus\:MinorModelAgeDisclosure=XMP-plus:MinorModelAgeDisclosure +XMP-plus\:ModelReleaseStatus=XMP-plus:ModelReleaseStatus +XMP-plus\:PLUSVersion=XMP-plus:PLUSVersion +XMP-plus\:ImageSupplierName=XMP-plus:ImageSupplierName +XMP-plus\:ImageSupplierID=XMP-plus:ImageSupplierID +XMP-plus\:ImageCreatorName=XMP-plus:ImageCreatorName +XMP-plus\:ImageCreatorID=XMP-plus:ImageCreatorID +XMP-plus\:ModelReleaseID=XMP-plus:ModelReleaseID +XMP-plus\:PropertyReleaseID=XMP-plus:PropertyReleaseID +XMP-plus\:CopyrightOwnerName=XMP-plus:CopyrightOwnerName +XMP-plus\:CopyrightOwnerID=XMP-plus:CopyrightOwnerID +XMP-plus\:LicensorName=XMP-plus:LicensorName +XMP-plus\:LicensorID=XMP-plus:LicensorID +XMP-plus\:LicensorTelephone1=XMP-plus:LicensorTelephone1 +XMP-plus\:LicensorTelephone2=XMP-plus:LicensorTelephone2 +XMP-plus\:LicensorEmail=XMP-plus:LicensorEmail +XMP-plus\:LicensorURL=XMP-plus:LicensorURL +XMP-custom\:Text=XMP-custom:Text +XMP-custom\:TextML=XMP-custom:TextML +ICC-header\:ProfileCMMType=ICC-header:ProfileCMMType +ICC-header\:ProfileVersion=ICC-header:ProfileVersion +ICC-header\:ProfileClass=ICC-header:ProfileClass +ICC-header\:ColorSpaceData=ICC-header:ColorSpaceData +ICC-header\:ProfileConnectionSpace=ICC-header:ProfileConnectionSpace +ICC-header\:ProfileDateTime=ICC-header:ProfileDateTime +ICC-header\:ProfileFileSignature=ICC-header:ProfileFileSignature +ICC-header\:PrimaryPlatform=ICC-header:PrimaryPlatform +ICC-header\:CMMFlags=ICC-header:CMMFlags +ICC-header\:DeviceManufacturer=ICC-header:DeviceManufacturer +ICC-header\:DeviceModel=ICC-header:DeviceModel +ICC-header\:DeviceAttributes=ICC-header:DeviceAttributes +ICC-header\:RenderingIntent=ICC-header:RenderingIntent +ICC-header\:ConnectionSpaceIlluminant=ICC-header:ConnectionSpaceIlluminant +ICC-header\:ProfileCreator=ICC-header:ProfileCreator +ICC-header\:ProfileID=ICC-header:ProfileID +ICC_Profile\:ProfileCopyright=ICC_Profile:ProfileCopyright +ICC_Profile\:ProfileDescription=ICC_Profile:ProfileDescription +ICC_Profile\:MediaWhitePoint=ICC_Profile:MediaWhitePoint +ICC_Profile\:MediaBlackPoint=ICC_Profile:MediaBlackPoint +ICC_Profile\:RedMatrixColumn=ICC_Profile:RedMatrixColumn +ICC_Profile\:GreenMatrixColumn=ICC_Profile:GreenMatrixColumn +ICC_Profile\:BlueMatrixColumn=ICC_Profile:BlueMatrixColumn +ICC_Profile\:DeviceMfgDesc=ICC_Profile:DeviceMfgDesc +ICC_Profile\:DeviceModelDesc=ICC_Profile:DeviceModelDesc +ICC_Profile\:ViewingCondDesc=ICC_Profile:ViewingCondDesc +ICC_Profile\:Luminance=ICC_Profile:Luminance +ICC_Profile\:Technology=ICC_Profile:Technology +ICC_Profile\:RedTRC=ICC_Profile:RedTRC +ICC_Profile\:GreenTRC=ICC_Profile:GreenTRC +ICC_Profile\:BlueTRC=ICC_Profile:BlueTRC +ICC-view\:ViewingCondIlluminant=ICC-view:ViewingCondIlluminant +ICC-view\:ViewingCondSurround=ICC-view:ViewingCondSurround +ICC-view\:ViewingCondIlluminantType=ICC-view:ViewingCondIlluminantType +ICC-meas\:MeasurementObserver=ICC-meas:MeasurementObserver +ICC-meas\:MeasurementBacking=ICC-meas:MeasurementBacking +ICC-meas\:MeasurementGeometry=ICC-meas:MeasurementGeometry +ICC-meas\:MeasurementFlare=ICC-meas:MeasurementFlare +ICC-meas\:MeasurementIlluminant=ICC-meas:MeasurementIlluminant +Adobe\:DCTEncodeVersion=Adobe:DCTEncodeVersion +Adobe\:APP14Flags0=Adobe:APP14Flags0 +Adobe\:APP14Flags1=Adobe:APP14Flags1 +Adobe\:ColorTransform=Adobe:ColorTransform +Composite\:Aperture=Composite:Aperture +Composite\:ImageSize=Composite:ImageSize +Composite\:Megapixels=Composite:Megapixels +Composite\:ShutterSpeed=Composite:ShutterSpeed +Composite\:SubSecModifyDate=Composite:SubSecModifyDate +Composite\:DateTimeCreated=Composite:DateTimeCreated +Composite\:FocalLength35efl=Composite:FocalLength35efl +Composite\:LightValue=Composite:LightValue +Composite\:LensID=Composite:LensID \ No newline at end of file diff --git a/alfresco-transform-tika/alfresco-transform-tika/src/main/resources/parsers/external/config/exiftool-parser.xml b/alfresco-transform-tika/alfresco-transform-tika/src/main/resources/parsers/external/config/exiftool-parser.xml new file mode 100644 index 00000000..7d77927c --- /dev/null +++ b/alfresco-transform-tika/alfresco-transform-tika/src/main/resources/parsers/external/config/exiftool-parser.xml @@ -0,0 +1,35 @@ + + + + + exiftool -ver + 126,127 + + env FOO=${OUTPUT} exiftool -args -G1 ${INPUT} + + image/x-raw-hasselblad + image/x-raw-sony + image/x-raw-canon + image/x-raw-adobe + image/gif + image/jp2 + image/jpeg + image/x-raw-kodak + image/x-raw-minolta + image/x-raw-nikon + image/x-raw-olympus + image/x-raw-pentax + image/png + image/x-raw-fuji + image/x-raw-panasonic + image/tiff + image/webp + + + + \s*([A-Za-z0-9/ \(\)]+\S{1})\s+:\s+([A-Za-z0-9\(\)\[\] \:\-\.]+)\s* + + ^-([\S]+)\=(.*) + + + diff --git a/alfresco-transform-tika/alfresco-transform-tika/src/main/resources/tika_engine_config.json b/alfresco-transform-tika/alfresco-transform-tika/src/main/resources/tika_engine_config.json index 6206f6bc..22fd9531 100644 --- a/alfresco-transform-tika/alfresco-transform-tika/src/main/resources/tika_engine_config.json +++ b/alfresco-transform-tika/alfresco-transform-tika/src/main/resources/tika_engine_config.json @@ -526,6 +526,16 @@ "metadataOptions" ] }, + { + "transformerName": "IPTCMetadataExtractor", + "supportedSourceAndTargetList": [ + {"sourceMediaType": "image/jpeg", "priority": 60, "targetMediaType": "alfresco-metadata-extract"}, + {"sourceMediaType": "image/png", "priority": 60, "targetMediaType": "alfresco-metadata-extract"} + ], + "transformOptions": [ + "metadataOptions" + ] + }, { "transformerName": "MailMetadataExtractor", "supportedSourceAndTargetList": [ diff --git a/alfresco-transformer-base/src/main/java/org/alfresco/transformer/metadataExtractors/AbstractMetadataExtractor.java b/alfresco-transformer-base/src/main/java/org/alfresco/transformer/metadataExtractors/AbstractMetadataExtractor.java index 0c9e63d3..36cd4490 100644 --- a/alfresco-transformer-base/src/main/java/org/alfresco/transformer/metadataExtractors/AbstractMetadataExtractor.java +++ b/alfresco-transformer-base/src/main/java/org/alfresco/transformer/metadataExtractors/AbstractMetadataExtractor.java @@ -46,6 +46,7 @@ import java.util.Map; import java.util.Properties; import java.util.Set; import java.util.StringTokenizer; +import java.util.TreeMap; /** * Helper methods for metadata extract and embed. @@ -577,7 +578,7 @@ public abstract class AbstractMetadataExtractor systemProperties.put(systemQName, documentValue); } } - return systemProperties; + return new TreeMap(systemProperties); } private void writeMetadata(File targetFile, Map results)