ATS-899 create IPTCMetadatorExtractor within Tika T-engine (#372)

This commit is contained in:
David Edwards 2021-04-21 15:04:51 +01:00 committed by GitHub
parent 8b2450e822
commit 5ce59d9c04
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 718 additions and 16 deletions

View File

@ -245,19 +245,21 @@ public class MiscControllerTest extends AbstractTransformerControllerTest
public void testExtractMetadataRFC822() throws Exception public void testExtractMetadataRFC822() throws Exception
{ {
String expected = String expected =
"{\"{http://www.alfresco.org/model/content/1.0}addressee\":\"Nevin Nollop <nevin.nollop@gmail.com>\"," + "{"+
"\"{http://www.alfresco.org/model/content/1.0}description\":\"The quick brown fox jumps over the lazy dog\"," + "\"{http://www.alfresco.org/model/content/1.0}addressee\":\"Nevin Nollop <nevin.nollop@gmail.com>\","+
"\"{http://www.alfresco.org/model/content/1.0}addressees\":\"Nevin Nollop <nevinn@alfresco.com>\"," + "\"{http://www.alfresco.org/model/content/1.0}addressees\":\"Nevin Nollop <nevinn@alfresco.com>\","+
"\"{http://www.alfresco.org/model/imap/1.0}dateSent\":1086351802000," + "\"{http://www.alfresco.org/model/content/1.0}description\":\"The quick brown fox jumps over the lazy dog\","+
"\"{http://www.alfresco.org/model/imap/1.0}messageTo\":\"Nevin Nollop <nevin.nollop@gmail.com>\"," + "\"{http://www.alfresco.org/model/content/1.0}originator\":\"Nevin Nollop <nevin.nollop@alfresco.com>\","+
"\"{http://www.alfresco.org/model/imap/1.0}messageId\":\"<20040604122322.GV1905@phoenix.home>\"," + "\"{http://www.alfresco.org/model/content/1.0}sentdate\":1086351802000,"+
"\"{http://www.alfresco.org/model/content/1.0}title\":\"The quick brown fox jumps over the lazy dog\"," + "\"{http://www.alfresco.org/model/content/1.0}subjectline\":\"The quick brown fox jumps over the lazy dog\","+
"\"{http://www.alfresco.org/model/imap/1.0}messageSubject\":\"The quick brown fox jumps over the lazy dog\"," + "\"{http://www.alfresco.org/model/content/1.0}title\":\"The quick brown fox jumps over the lazy dog\","+
"\"{http://www.alfresco.org/model/imap/1.0}messageCc\":\"Nevin Nollop <nevinn@alfresco.com>\"," + "\"{http://www.alfresco.org/model/imap/1.0}dateSent\":1086351802000,"+
"\"{http://www.alfresco.org/model/content/1.0}sentdate\":1086351802000," + "\"{http://www.alfresco.org/model/imap/1.0}messageCc\":\"Nevin Nollop <nevinn@alfresco.com>\","+
"\"{http://www.alfresco.org/model/content/1.0}subjectline\":\"The quick brown fox jumps over the lazy dog\"," + "\"{http://www.alfresco.org/model/imap/1.0}messageFrom\":\"Nevin Nollop <nevin.nollop@alfresco.com>\","+
"\"{http://www.alfresco.org/model/imap/1.0}messageFrom\":\"Nevin Nollop <nevin.nollop@alfresco.com>\"," + "\"{http://www.alfresco.org/model/imap/1.0}messageId\":\"<20040604122322.GV1905@phoenix.home>\","+
"\"{http://www.alfresco.org/model/content/1.0}originator\":\"Nevin Nollop <nevin.nollop@alfresco.com>\"}"; "\"{http://www.alfresco.org/model/imap/1.0}messageSubject\":\"The quick brown fox jumps over the lazy dog\","+
"\"{http://www.alfresco.org/model/imap/1.0}messageTo\":\"Nevin Nollop <nevin.nollop@gmail.com>\""+
"}";
MvcResult result = sendRequest("eml", MvcResult result = sendRequest("eml",
null, null,
MIMETYPE_RFC822, MIMETYPE_RFC822,
@ -287,9 +289,9 @@ public class MiscControllerTest extends AbstractTransformerControllerTest
"\"messageFrom\":[" + "\"messageFrom\":[" +
"\"{http://www.alfresco.org/model/dod5015/1.0}dodProp1\"]}\n"; "\"{http://www.alfresco.org/model/dod5015/1.0}dodProp1\"]}\n";
String expected = String expected =
"{\"{http://www.alfresco.org/model/imap/1.0}messageSubject\":\"The quick brown fox jumps over the lazy dog\"," + "{\"{http://www.alfresco.org/model/content/1.0}title\":\"The quick brown fox jumps over the lazy dog\","+
"\"{http://www.alfresco.org/model/dod5015/1.0}dodProp1\":\"Nevin Nollop <nevin.nollop@alfresco.com>\"," + "\"{http://www.alfresco.org/model/dod5015/1.0}dodProp1\":\"Nevin Nollop <nevin.nollop@alfresco.com>\"," +
"\"{http://www.alfresco.org/model/content/1.0}title\":\"The quick brown fox jumps over the lazy dog\"}"; "\"{http://www.alfresco.org/model/imap/1.0}messageSubject\":\"The quick brown fox jumps over the lazy dog\"}";
MvcResult result = sendRequest("eml", MvcResult result = sendRequest("eml",
null, null,
MIMETYPE_RFC822, MIMETYPE_RFC822,

View File

@ -38,6 +38,7 @@ import org.alfresco.transformer.metadataExtractors.PdfBoxMetadataExtractor;
import org.alfresco.transformer.metadataExtractors.PoiMetadataExtractor; import org.alfresco.transformer.metadataExtractors.PoiMetadataExtractor;
import org.alfresco.transformer.metadataExtractors.TikaAudioMetadataExtractor; import org.alfresco.transformer.metadataExtractors.TikaAudioMetadataExtractor;
import org.alfresco.transformer.metadataExtractors.TikaAutoMetadataExtractor; import org.alfresco.transformer.metadataExtractors.TikaAutoMetadataExtractor;
import org.alfresco.transformer.metadataExtractors.IPTCMetadataExtractor;
import org.alfresco.transformer.util.RequestParamMap; import org.alfresco.transformer.util.RequestParamMap;
import org.apache.tika.exception.TikaException; import org.apache.tika.exception.TikaException;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
@ -80,6 +81,7 @@ public class TikaJavaExecutor implements JavaExecutor
.put("PoiMetadataExtractor", new PoiMetadataExtractor()) .put("PoiMetadataExtractor", new PoiMetadataExtractor())
.put("TikaAudioMetadataExtractor", new TikaAudioMetadataExtractor()) .put("TikaAudioMetadataExtractor", new TikaAudioMetadataExtractor())
.put("TikaAutoMetadataExtractor", new TikaAutoMetadataExtractor()) .put("TikaAutoMetadataExtractor", new TikaAutoMetadataExtractor())
.put("IPTCMetadataExtractor", new IPTCMetadataExtractor())
.build(); .build();
private final Map<String, AbstractTikaMetadataExtractor> metadataEmbedder = ImmutableMap private final Map<String, AbstractTikaMetadataExtractor> metadataEmbedder = ImmutableMap
.<String, AbstractTikaMetadataExtractor>builder() .<String, AbstractTikaMetadataExtractor>builder()

View File

@ -0,0 +1,58 @@
/*
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2021 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
* -
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* -
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
* -
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer.metadataExtractors;
import java.io.IOException;
import org.alfresco.transform.exceptions.TransformException;
import org.alfresco.transformer.tika.parsers.ExifToolParser;
import org.apache.tika.exception.TikaException;
import org.apache.tika.parser.Parser;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class IPTCMetadataExtractor extends AbstractTikaMetadataExtractor
{
private static final Logger logger = LoggerFactory.getLogger(IPTCMetadataExtractor.class);
public IPTCMetadataExtractor()
{
super(logger);
}
@Override
protected Parser getParser() {
try {
return new ExifToolParser();
} catch (IOException | TikaException e) {
logger.error(e.getMessage(), e);
throw new TransformException(500, "Error creating IPTC parser");
}
}
}

View File

@ -0,0 +1,286 @@
/*
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2021 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
* -
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* -
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
* -
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer.tika.parsers;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.io.Reader;
import java.net.URL;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.exception.TikaException;
import org.apache.tika.io.IOUtils;
import org.apache.tika.io.NullOutputStream;
import org.apache.tika.io.TemporaryResources;
import org.apache.tika.io.TikaInputStream;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.parser.external.ExternalParser;
import org.apache.tika.parser.external.ExternalParsersFactory;
import org.apache.tika.sax.XHTMLContentHandler;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;
import static java.nio.charset.StandardCharsets.UTF_8;
public class ExifToolParser extends ExternalParser {
private static final String EXIFTOOL_PARSER_CONFIG = "parsers/external/config/exiftool-parser.xml";
public ExifToolParser() throws IOException, TikaException {
super();
ExternalParser eParser = ExternalParsersFactory.create(getExternalParserConfigURL()).get(0);
this.setCommand(eParser.getCommand());
this.setIgnoredLineConsumer(eParser.getIgnoredLineConsumer());
this.setMetadataExtractionPatterns(eParser.getMetadataExtractionPatterns());
this.setSupportedTypes(eParser.getSupportedTypes());
}
private URL getExternalParserConfigURL(){
ClassLoader classLoader = ExifToolParser.class.getClassLoader();
return classLoader.getResource(EXIFTOOL_PARSER_CONFIG);
}
/**
* Adapted from {@link org.apache.tika.parser.external.ExternalParser}
* due to errors attempting to {@link #extractMetadata} from the errorStream in original implementation. <p>
* Executes the configured external command and passes the given document
* stream as a simple XHTML document to the given SAX content handler.
* Metadata is only extracted if {@link #setMetadataExtractionPatterns(Map)}
* has been called to set patterns.
*/
public void parse(InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context)
throws IOException, SAXException, TikaException {
XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
TemporaryResources tmp = new TemporaryResources();
try {
parse(TikaInputStream.get(stream, tmp), xhtml, metadata, tmp);
} finally {
tmp.dispose();
}
}
private void parse(TikaInputStream stream, XHTMLContentHandler xhtml, Metadata metadata, TemporaryResources tmp)
throws IOException, SAXException, TikaException {
boolean inputToStdIn = true;
boolean outputFromStdOut = true;
boolean hasPatterns = (getMetadataExtractionPatterns() != null && !getMetadataExtractionPatterns().isEmpty());
File output = null;
// Build our getCommand()
String[] cmd;
if (getCommand().length == 1) {
cmd = getCommand()[0].split(" ");
} else {
cmd = new String[getCommand().length];
System.arraycopy(getCommand(), 0, cmd, 0, getCommand().length);
}
for (int i = 0; i < cmd.length; i++) {
if (cmd[i].indexOf(INPUT_FILE_TOKEN) != -1) {
cmd[i] = cmd[i].replace(INPUT_FILE_TOKEN, stream.getFile().getPath());
inputToStdIn = false;
}
if (cmd[i].indexOf(OUTPUT_FILE_TOKEN) != -1) {
output = tmp.createTemporaryFile();
outputFromStdOut = false;
cmd[i] = cmd[i].replace(OUTPUT_FILE_TOKEN, output.getPath());
}
}
// Execute
Process process = null;
try {
if (cmd.length == 1) {
process = Runtime.getRuntime().exec(cmd[0]);
} else {
process = Runtime.getRuntime().exec(cmd);
}
} catch (Exception e) {
e.printStackTrace();
}
try {
if (inputToStdIn) {
sendInput(process, stream);
} else {
process.getOutputStream().close();
}
InputStream out = process.getInputStream();
InputStream err = process.getErrorStream();
if (hasPatterns) {
if (outputFromStdOut) {
extractOutput(out, xhtml);
} else {
extractMetadata(out, metadata);
}
} else {
ignoreStream(err);
if (outputFromStdOut) {
extractOutput(out, xhtml);
} else {
ignoreStream(out);
}
}
} finally {
try {
process.waitFor();
} catch (InterruptedException ignore) {
}
}
// Grab the output if we haven't already
if (!outputFromStdOut) {
extractOutput(new FileInputStream(output), xhtml);
}
}
/**
* Adapted from {@link org.apache.tika.parser.external.ExternalParser}<p>
* Starts a thread that extracts the contents of the standard output
* stream of the given process to the given XHTML content handler.
* The standard output stream is closed once fully processed.
*
* @param process process
* @param xhtml XHTML content handler
* @throws SAXException if the XHTML SAX events could not be handled
* @throws IOException if an input error occurred
*/
private void extractOutput(InputStream stream, XHTMLContentHandler xhtml) throws SAXException, IOException {
try (Reader reader = new InputStreamReader(stream, UTF_8)) {
xhtml.startDocument();
xhtml.startElement("p");
char[] buffer = new char[1024];
for (int n = reader.read(buffer); n != -1; n = reader.read(buffer)) {
xhtml.characters(buffer, 0, n);
}
xhtml.endElement("p");
xhtml.endDocument();
}
}
/**
* Adapted from {@link org.apache.tika.parser.external.ExternalParser}<p>
* Starts a thread that sends the contents of the given input stream
* to the standard input stream of the given process. Potential
* exceptions are ignored, and the standard input stream is closed
* once fully processed. Note that the given input stream is <em>not</em>
* closed by this method.
*
* @param process process
* @param stream input stream
*/
private void sendInput(final Process process, final InputStream stream) {
Thread t = new Thread() {
public void run() {
OutputStream stdin = process.getOutputStream();
try {
IOUtils.copy(stream, stdin);
} catch (IOException e) {
}
}
};
t.start();
try {
t.join();
} catch (InterruptedException ignore) {
}
}
/**
* Adapted from {@link org.apache.tika.parser.external.ExternalParser}<p>
* Starts a thread that reads and discards the contents of the
* standard stream of the given process. Potential exceptions
* are ignored, and the stream is closed once fully processed.
*
* @param process process
*/
private void ignoreStream(final InputStream stream) {
Thread t = new Thread() {
public void run() {
try {
IOUtils.copy(stream, new NullOutputStream());
} catch (IOException e) {
} finally {
IOUtils.closeQuietly(stream);
}
}
};
t.start();
try {
t.join();
} catch (InterruptedException ignore) {
}
}
private void extractMetadata(final InputStream stream, final Metadata metadata) {
Thread t = new Thread() {
public void run() {
BufferedReader reader;
reader = new BufferedReader(new InputStreamReader(stream, UTF_8));
try {
String line;
while ((line = reader.readLine()) != null) {
for (Pattern p : getMetadataExtractionPatterns().keySet()) {
Matcher m = p.matcher(line);
if (m.find()) {
if (getMetadataExtractionPatterns().get(p) != null
&& !getMetadataExtractionPatterns().get(p).equals("")) {
metadata.add(getMetadataExtractionPatterns().get(p), m.group(1));
} else {
metadata.add(m.group(1), m.group(2));
}
}
}
}
} catch (IOException e) {
// Ignore
} finally {
IOUtils.closeQuietly(reader);
IOUtils.closeQuietly(stream);
}
}
};
t.start();
try {
t.join();
} catch (InterruptedException ignore) {
}
}
}

View File

@ -0,0 +1,308 @@
#
# IPTCMetadataExtracter - default mapping
#
# author: David Edwards
# TODO Complete mappings (currently copied from other files)
# Namespaces
namespace.prefix.cm=http://www.alfresco.org/model/content/1.0
namespace.prefix.audio=http://www.alfresco.org/model/audio/1.0
namespace.prefix.video=http://www.alfresco.org/model/video/1.0
namespace.prefix.custom=MyCustomNameSpace
# Dump of most namespaces returned by the exifTool for testing
# TODO ATS-900
namespace.prefix.File=File
namespace.prefix.IFD0=IFD0
namespace.prefix.ExifIFD=ExifIFD
namespace.prefix.IFD1=IFD1
namespace.prefix.Photoshop=Photoshop
namespace.prefix.IPTC=IPTC
namespace.prefix.ICC-header=ICC-header
namespace.prefix.ICC_Profile=ICC_Profile
namespace.prefix.ICC-view=ICC-view
namespace.prefix.ICC-meas=ICC-meas
namespace.prefix.XMP-x=XMP-x
namespace.prefix.XMP-photoshop=XMP-photoshop
namespace.prefix.XMP-xmp=XMP-xmp
namespace.prefix.XMP-iptcCore=XMP-iptcCore
namespace.prefix.XMP-dc=XMP-dc
namespace.prefix.XMP-xmpRights=XMP-xmpRights
namespace.prefix.XMP-xmpMM=XMP-xmpMM
namespace.prefix.XMP-illustrator=XMP-illustrator
namespace.prefix.XMP-mediapro=XMP-mediapro
namespace.prefix.XMP-aux=XMP-aux
namespace.prefix.XMP-iptcExt=XMP-iptcExt
namespace.prefix.XMP-plus=XMP-plus
namespace.prefix.XMP-custom=XMP-custom
namespace.prefix.Adobe=Adobe
namespace.prefix.Composite=Composite
# Core mappings
# There is overlap with the TikaAutoMetadatorExtractor. These core mappings should still be included for backwards compatability.
# TODO ATS-900
XMP-dc\:Creator=cm:author
XMP-dc\:Title=cm:title
XMP-dc\:Description=cm:description
created=cm:created
# Dump of most namespaces returned by the exifTool for testing
# TODO ATS-900
ExifTool\:ExifToolVersion=custom:ExifToolVersion
File\:FileType=File:FileType
File\:FileTypeExtension=File:FileTypeExtension
File\:MIMEType=File:MIMEType
File\:ExifByteOrder=File:ExifByteOrder
File\:CurrentIPTCDigest=File:CurrentIPTCDigest
File\:ImageWidth=File:ImageWidth
File\:ImageHeight=File:ImageHeight
File\:EncodingProcess=File:EncodingProcess
File\:BitsPerSample=File:BitsPerSample
File\:ColorComponents=File:ColorComponents
File\:YCbCrSubSampling=File:YCbCrSubSampling
IFD0\:PhotometricInterpretation=IFD0:PhotometricInterpretation
IFD0\:ImageDescription=IFD0:ImageDescription
IFD0\:Orientation=IFD0:Orientation
IFD0\:SamplesPerPixel=IFD0:SamplesPerPixel
IFD0\:XResolution=IFD0:XResolution
IFD0\:YResolution=IFD0:YResolution
IFD0\:ResolutionUnit=IFD0:ResolutionUnit
IFD0\:Software=IFD0:Software
IFD0\:ModifyDate=IFD0:ModifyDate
IFD0\:Artist=IFD0:Artist
IFD0\:Copyright=IFD0:Copyright
ExifIFD\:Make=ExifIFD:Make
ExifIFD\:Model=ExifIFD:Model
ExifIFD\:ExposureTime=ExifIFD:ExposureTime
ExifIFD\:FNumber=ExifIFD:FNumber
ExifIFD\:ExposureProgram=ExifIFD:ExposureProgram
ExifIFD\:ISO=ExifIFD:ISO
ExifIFD\:ExifVersion=ExifIFD:ExifVersion
ExifIFD\:DateTimeOriginal=ExifIFD:DateTimeOriginal
ExifIFD\:CreateDate=ExifIFD:CreateDate
ExifIFD\:ComponentsConfiguration=ExifIFD:ComponentsConfiguration
ExifIFD\:ShutterSpeedValue=ExifIFD:ShutterSpeedValue
ExifIFD\:ApertureValue=ExifIFD:ApertureValue
ExifIFD\:ExposureCompensation=ExifIFD:ExposureCompensation
ExifIFD\:MaxApertureValue=ExifIFD:MaxApertureValue
ExifIFD\:MeteringMode=ExifIFD:MeteringMode
ExifIFD\:Flash=ExifIFD:Flash
ExifIFD\:FocalLength=ExifIFD:FocalLength
ExifIFD\:SubSecTime=ExifIFD:SubSecTime
ExifIFD\:SubSecTimeOriginal=ExifIFD:SubSecTimeOriginal
ExifIFD\:SubSecTimeDigitized=ExifIFD:SubSecTimeDigitized
ExifIFD\:FlashpixVersion=ExifIFD:FlashpixVersion
ExifIFD\:ColorSpace=ExifIFD:ColorSpace
ExifIFD\:ExifImageWidth=ExifIFD:ExifImageWidth
ExifIFD\:ExifImageHeight=ExifIFD:ExifImageHeight
ExifIFD\:FocalPlaneXResolution=ExifIFD:FocalPlaneXResolution
ExifIFD\:FocalPlaneYResolution=ExifIFD:FocalPlaneYResolution
ExifIFD\:FocalPlaneResolutionUnit=ExifIFD:FocalPlaneResolutionUnit
ExifIFD\:CustomRendered=ExifIFD:CustomRendered
ExifIFD\:ExposureMode=ExifIFD:ExposureMode
ExifIFD\:WhiteBalance=ExifIFD:WhiteBalance
ExifIFD\:SceneCaptureType=ExifIFD:SceneCaptureType
ExifIFD\:SerialNumber=ExifIFD:SerialNumber
ExifIFD\:LensInfo=ExifIFD:LensInfo
ExifIFD\:LensModel=ExifIFD:LensModel
ExifIFD\:LensSerialNumber=ExifIFD:LensSerialNumber
IFD1\:Compression=IFD1:Compression
IFD1\:ThumbnailOffset=IFD1:ThumbnailOffset
IFD1\:ThumbnailLength=IFD1:ThumbnailLength
IFD1\:ThumbnailImage=IFD1:ThumbnailImage
IPTC\:CodedCharacterSet=IPTC:CodedCharacterSet
IPTC\:ApplicationRecordVersion=IPTC:ApplicationRecordVersion
IPTC\:Caption-Abstract=IPTC:Caption-Abstract
IPTC\:Writer-Editor=IPTC:Writer-Editor
IPTC\:SpecialInstructions=IPTC:SpecialInstructions
IPTC\:By-line=IPTC:By-line
IPTC\:By-lineTitle=IPTC:By-lineTitle
IPTC\:ObjectName=IPTC:ObjectName
IPTC\:TimeCreated=IPTC:TimeCreated
IPTC\:Sub-location=IPTC:Sub-location
IPTC\:Province-State=IPTC:Province-State
IPTC\:Country-PrimaryLocationName=IPTC:Country-PrimaryLocationName
IPTC\:Country-PrimaryLocationCode=IPTC:Country-PrimaryLocationCode
IPTC\:OriginalTransmissionReference=IPTC:OriginalTransmissionReference
IPTC\:Keywords=IPTC:Keywords
IPTC\:CopyrightNotice=IPTC:CopyrightNotice
IPTC\:FixtureIdentifier=IPTC:FixtureIdentifier
IPTC\:EditStatus=IPTC:EditStatus
IPTC\:Contact=IPTC:Contact
Photoshop\:IPTCDigest=Photoshop:IPTCDigest
Photoshop\:DisplayedUnitsX=Photoshop:DisplayedUnitsX
Photoshop\:DisplayedUnitsY=Photoshop:DisplayedUnitsY
Photoshop\:PrintStyle=Photoshop:PrintStyle
Photoshop\:PrintPosition=Photoshop:PrintPosition
Photoshop\:PrintScale=Photoshop:PrintScale
Photoshop\:GlobalAngle=Photoshop:GlobalAngle
Photoshop\:GlobalAltitude=Photoshop:GlobalAltitude
Photoshop\:CopyrightFlag=Photoshop:CopyrightFlag
Photoshop\:URL=Photoshop:URL
Photoshop\:URL_List=Photoshop:URL_List
Photoshop\:SlicesGroupName=Photoshop:SlicesGroupName
Photoshop\:NumSlices=Photoshop:NumSlices
Photoshop\:PixelAspectRatio=Photoshop:PixelAspectRatio
Photoshop\:PhotoshopThumbnail=Photoshop:PhotoshopThumbnail
Photoshop\:HasRealMergedData=Photoshop:HasRealMergedData
Photoshop\:WriterName=Photoshop:WriterName
Photoshop\:ReaderName=Photoshop:ReaderName
Photoshop\:PhotoshopQuality=Photoshop:PhotoshopQuality
Photoshop\:PhotoshopFormat=Photoshop:PhotoshopFormat
Photoshop\:ProgressiveScans=Photoshop:ProgressiveScans
XMP-x\:XMPToolkit=XMP-x:XMPToolkit
XMP-photoshop\:LegacyIPTCDigest=XMP-photoshop:LegacyIPTCDigest
XMP-photoshop\:AuthorsPosition=XMP-photoshop:AuthorsPosition
XMP-photoshop\:Headline=XMP-photoshop:Headline
XMP-photoshop\:CaptionWriter=XMP-photoshop:CaptionWriter
XMP-photoshop\:DateCreated=XMP-photoshop:DateCreated
XMP-photoshop\:City=XMP-photoshop:City
XMP-photoshop\:State=XMP-photoshop:State
XMP-photoshop\:Country=XMP-photoshop:Country
XMP-photoshop\:TransmissionReference=XMP-photoshop:TransmissionReference
XMP-photoshop\:Instructions=XMP-photoshop:Instructions
XMP-photoshop\:Credit=XMP-photoshop:Credit
XMP-photoshop\:Source=XMP-photoshop:Source
XMP-photoshop\:Category=XMP-photoshop:Category
XMP-photoshop\:ColorMode=XMP-photoshop:ColorMode
XMP-photoshop\:ICCProfileName=XMP-photoshop:ICCProfileName
XMP-photoshop\:SupplementalCategories=XMP-photoshop:SupplementalCategories
XMP-xmp\:CreatorTool=XMP-xmp:CreatorTool
XMP-xmp\:MetadataDate=XMP-xmp:MetadataDate
XMP-iptcCore\:IntellectualGenre=XMP-iptcCore:IntellectualGenre
XMP-iptcCore\:Location=XMP-iptcCore:Location
XMP-iptcCore\:CountryCode=XMP-iptcCore:CountryCode
XMP-iptcCore\:CreatorAddress=XMP-iptcCore:CreatorAddress
XMP-iptcCore\:CreatorCity=XMP-iptcCore:CreatorCity
XMP-iptcCore\:CreatorRegion=XMP-iptcCore:CreatorRegion
XMP-iptcCore\:CreatorPostalCode=XMP-iptcCore:CreatorPostalCode
XMP-iptcCore\:CreatorCountry=XMP-iptcCore:CreatorCountry
XMP-iptcCore\:CreatorWorkTelephone=XMP-iptcCore:CreatorWorkTelephone
XMP-iptcCore\:CreatorWorkEmail=XMP-iptcCore:CreatorWorkEmail
XMP-iptcCore\:CreatorWorkURL=XMP-iptcCore:CreatorWorkURL
XMP-iptcCore\:Scene=XMP-iptcCore:Scene
XMP-iptcCore\:SubjectCode=XMP-iptcCore:SubjectCode
XMP-dc\:Format=XMP-dc:Format
XMP-dc\:Title=XMP-dc:Title
XMP-dc\:Subject=XMP-dc:Subject
XMP-dc\:Creator=XMP-dc:Creator
XMP-dc\:Rights=XMP-dc:Rights
XMP-dc\:Description=XMP-dc:Description
XMP-xmpRights\:WebStatement=XMP-xmpRights:WebStatement
XMP-xmpRights\:Marked=XMP-xmpRights:Marked
XMP-xmpRights\:UsageTerms=XMP-xmpRights:UsageTerms
XMP-xmpMM\:DocumentID=XMP-xmpMM:DocumentID
XMP-xmpMM\:InstanceID=XMP-xmpMM:InstanceID
XMP-xmpMM\:OriginalDocumentID=XMP-xmpMM:OriginalDocumentID
XMP-xmpMM\:HistoryAction=XMP-xmpMM:HistoryAction
XMP-xmpMM\:HistoryInstanceID=XMP-xmpMM:HistoryInstanceID
XMP-xmpMM\:HistoryWhen=XMP-xmpMM:HistoryWhen
XMP-xmpMM\:HistorySoftwareAgent=XMP-xmpMM:HistorySoftwareAgent
XMP-xmpMM\:HistoryChanged=XMP-xmpMM:HistoryChanged
XMP-illustrator\:StartupProfile=XMP-illustrator:StartupProfile
XMP-mediapro\:Status=XMP-mediapro:Status
XMP-mediapro\:People=XMP-mediapro:People
XMP-aux\:Lens=XMP-aux:Lens
XMP-iptcExt\:AdditionalModelInformation=XMP-iptcExt:AdditionalModelInformation
XMP-iptcExt\:MaxAvailWidth=XMP-iptcExt:MaxAvailWidth
XMP-iptcExt\:MaxAvailHeight=XMP-iptcExt:MaxAvailHeight
XMP-iptcExt\:DigitalSourceType=XMP-iptcExt:DigitalSourceType
XMP-iptcExt\:LocationCreatedSublocation=XMP-iptcExt:LocationCreatedSublocation
XMP-iptcExt\:LocationCreatedCity=XMP-iptcExt:LocationCreatedCity
XMP-iptcExt\:LocationCreatedProvinceState=XMP-iptcExt:LocationCreatedProvinceState
XMP-iptcExt\:LocationCreatedCountryName=XMP-iptcExt:LocationCreatedCountryName
XMP-iptcExt\:LocationCreatedCountryCode=XMP-iptcExt:LocationCreatedCountryCode
XMP-iptcExt\:LocationCreatedWorldRegion=XMP-iptcExt:LocationCreatedWorldRegion
XMP-iptcExt\:Event=XMP-iptcExt:Event
XMP-iptcExt\:ModelAge=XMP-iptcExt:ModelAge
XMP-iptcExt\:ArtworkDateCreated=XMP-iptcExt:ArtworkDateCreated
XMP-iptcExt\:ArtworkSource=XMP-iptcExt:ArtworkSource
XMP-iptcExt\:ArtworkSourceInventoryNo=XMP-iptcExt:ArtworkSourceInventoryNo
XMP-iptcExt\:ArtworkCopyrightNotice=XMP-iptcExt:ArtworkCopyrightNotice
XMP-iptcExt\:ArtworkTitle=XMP-iptcExt:ArtworkTitle
XMP-iptcExt\:ArtworkCreator=XMP-iptcExt:ArtworkCreator
XMP-iptcExt\:RegistryOrganisationID=XMP-iptcExt:RegistryOrganisationID
XMP-iptcExt\:RegistryItemID=XMP-iptcExt:RegistryItemID
XMP-iptcExt\:OrganisationInImageName=XMP-iptcExt:OrganisationInImageName
XMP-iptcExt\:OrganisationInImageCode=XMP-iptcExt:OrganisationInImageCode
XMP-iptcExt\:PersonInImage=XMP-iptcExt:PersonInImage
XMP-iptcExt\:LocationShownSublocation=XMP-iptcExt:LocationShownSublocation
XMP-iptcExt\:LocationShownCity=XMP-iptcExt:LocationShownCity
XMP-iptcExt\:LocationShownProvinceState=XMP-iptcExt:LocationShownProvinceState
XMP-iptcExt\:LocationShownCountryName=XMP-iptcExt:LocationShownCountryName
XMP-iptcExt\:LocationShownCountryCode=XMP-iptcExt:LocationShownCountryCode
XMP-iptcExt\:LocationShownWorldRegion=XMP-iptcExt:LocationShownWorldRegion
XMP-plus\:PropertyReleaseStatus=XMP-plus:PropertyReleaseStatus
XMP-plus\:ImageSupplierImageID=XMP-plus:ImageSupplierImageID
XMP-plus\:MinorModelAgeDisclosure=XMP-plus:MinorModelAgeDisclosure
XMP-plus\:ModelReleaseStatus=XMP-plus:ModelReleaseStatus
XMP-plus\:PLUSVersion=XMP-plus:PLUSVersion
XMP-plus\:ImageSupplierName=XMP-plus:ImageSupplierName
XMP-plus\:ImageSupplierID=XMP-plus:ImageSupplierID
XMP-plus\:ImageCreatorName=XMP-plus:ImageCreatorName
XMP-plus\:ImageCreatorID=XMP-plus:ImageCreatorID
XMP-plus\:ModelReleaseID=XMP-plus:ModelReleaseID
XMP-plus\:PropertyReleaseID=XMP-plus:PropertyReleaseID
XMP-plus\:CopyrightOwnerName=XMP-plus:CopyrightOwnerName
XMP-plus\:CopyrightOwnerID=XMP-plus:CopyrightOwnerID
XMP-plus\:LicensorName=XMP-plus:LicensorName
XMP-plus\:LicensorID=XMP-plus:LicensorID
XMP-plus\:LicensorTelephone1=XMP-plus:LicensorTelephone1
XMP-plus\:LicensorTelephone2=XMP-plus:LicensorTelephone2
XMP-plus\:LicensorEmail=XMP-plus:LicensorEmail
XMP-plus\:LicensorURL=XMP-plus:LicensorURL
XMP-custom\:Text=XMP-custom:Text
XMP-custom\:TextML=XMP-custom:TextML
ICC-header\:ProfileCMMType=ICC-header:ProfileCMMType
ICC-header\:ProfileVersion=ICC-header:ProfileVersion
ICC-header\:ProfileClass=ICC-header:ProfileClass
ICC-header\:ColorSpaceData=ICC-header:ColorSpaceData
ICC-header\:ProfileConnectionSpace=ICC-header:ProfileConnectionSpace
ICC-header\:ProfileDateTime=ICC-header:ProfileDateTime
ICC-header\:ProfileFileSignature=ICC-header:ProfileFileSignature
ICC-header\:PrimaryPlatform=ICC-header:PrimaryPlatform
ICC-header\:CMMFlags=ICC-header:CMMFlags
ICC-header\:DeviceManufacturer=ICC-header:DeviceManufacturer
ICC-header\:DeviceModel=ICC-header:DeviceModel
ICC-header\:DeviceAttributes=ICC-header:DeviceAttributes
ICC-header\:RenderingIntent=ICC-header:RenderingIntent
ICC-header\:ConnectionSpaceIlluminant=ICC-header:ConnectionSpaceIlluminant
ICC-header\:ProfileCreator=ICC-header:ProfileCreator
ICC-header\:ProfileID=ICC-header:ProfileID
ICC_Profile\:ProfileCopyright=ICC_Profile:ProfileCopyright
ICC_Profile\:ProfileDescription=ICC_Profile:ProfileDescription
ICC_Profile\:MediaWhitePoint=ICC_Profile:MediaWhitePoint
ICC_Profile\:MediaBlackPoint=ICC_Profile:MediaBlackPoint
ICC_Profile\:RedMatrixColumn=ICC_Profile:RedMatrixColumn
ICC_Profile\:GreenMatrixColumn=ICC_Profile:GreenMatrixColumn
ICC_Profile\:BlueMatrixColumn=ICC_Profile:BlueMatrixColumn
ICC_Profile\:DeviceMfgDesc=ICC_Profile:DeviceMfgDesc
ICC_Profile\:DeviceModelDesc=ICC_Profile:DeviceModelDesc
ICC_Profile\:ViewingCondDesc=ICC_Profile:ViewingCondDesc
ICC_Profile\:Luminance=ICC_Profile:Luminance
ICC_Profile\:Technology=ICC_Profile:Technology
ICC_Profile\:RedTRC=ICC_Profile:RedTRC
ICC_Profile\:GreenTRC=ICC_Profile:GreenTRC
ICC_Profile\:BlueTRC=ICC_Profile:BlueTRC
ICC-view\:ViewingCondIlluminant=ICC-view:ViewingCondIlluminant
ICC-view\:ViewingCondSurround=ICC-view:ViewingCondSurround
ICC-view\:ViewingCondIlluminantType=ICC-view:ViewingCondIlluminantType
ICC-meas\:MeasurementObserver=ICC-meas:MeasurementObserver
ICC-meas\:MeasurementBacking=ICC-meas:MeasurementBacking
ICC-meas\:MeasurementGeometry=ICC-meas:MeasurementGeometry
ICC-meas\:MeasurementFlare=ICC-meas:MeasurementFlare
ICC-meas\:MeasurementIlluminant=ICC-meas:MeasurementIlluminant
Adobe\:DCTEncodeVersion=Adobe:DCTEncodeVersion
Adobe\:APP14Flags0=Adobe:APP14Flags0
Adobe\:APP14Flags1=Adobe:APP14Flags1
Adobe\:ColorTransform=Adobe:ColorTransform
Composite\:Aperture=Composite:Aperture
Composite\:ImageSize=Composite:ImageSize
Composite\:Megapixels=Composite:Megapixels
Composite\:ShutterSpeed=Composite:ShutterSpeed
Composite\:SubSecModifyDate=Composite:SubSecModifyDate
Composite\:DateTimeCreated=Composite:DateTimeCreated
Composite\:FocalLength35efl=Composite:FocalLength35efl
Composite\:LightValue=Composite:LightValue
Composite\:LensID=Composite:LensID

View File

@ -0,0 +1,35 @@
<?xml version="1.0" encoding="UTF-8"?>
<external-parsers>
<parser>
<check>
<command>exiftool -ver</command>
<error-codes>126,127</error-codes>
</check>
<command>env FOO=${OUTPUT} exiftool -args -G1 ${INPUT}</command>
<mime-types>
<mime-type>image/x-raw-hasselblad</mime-type>
<mime-type>image/x-raw-sony</mime-type>
<mime-type>image/x-raw-canon</mime-type>
<mime-type>image/x-raw-adobe</mime-type>
<mime-type>image/gif</mime-type>
<mime-type>image/jp2</mime-type>
<mime-type>image/jpeg</mime-type>
<mime-type>image/x-raw-kodak</mime-type>
<mime-type>image/x-raw-minolta</mime-type>
<mime-type>image/x-raw-nikon</mime-type>
<mime-type>image/x-raw-olympus</mime-type>
<mime-type>image/x-raw-pentax</mime-type>
<mime-type>image/png</mime-type>
<mime-type>image/x-raw-fuji</mime-type>
<mime-type>image/x-raw-panasonic</mime-type>
<mime-type>image/tiff</mime-type>
<mime-type>image/webp</mime-type>
</mime-types>
<metadata>
<!-- Default output-->
<match>\s*([A-Za-z0-9/ \(\)]+\S{1})\s+:\s+([A-Za-z0-9\(\)\[\] \:\-\.]+)\s*</match>
<!-- args format-->
<match>^-([\S]+)\=(.*)</match>
</metadata>
</parser>
</external-parsers>

View File

@ -526,6 +526,16 @@
"metadataOptions" "metadataOptions"
] ]
}, },
{
"transformerName": "IPTCMetadataExtractor",
"supportedSourceAndTargetList": [
{"sourceMediaType": "image/jpeg", "priority": 60, "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "image/png", "priority": 60, "targetMediaType": "alfresco-metadata-extract"}
],
"transformOptions": [
"metadataOptions"
]
},
{ {
"transformerName": "MailMetadataExtractor", "transformerName": "MailMetadataExtractor",
"supportedSourceAndTargetList": [ "supportedSourceAndTargetList": [

View File

@ -46,6 +46,7 @@ import java.util.Map;
import java.util.Properties; import java.util.Properties;
import java.util.Set; import java.util.Set;
import java.util.StringTokenizer; import java.util.StringTokenizer;
import java.util.TreeMap;
/** /**
* Helper methods for metadata extract and embed. * Helper methods for metadata extract and embed.
@ -577,7 +578,7 @@ public abstract class AbstractMetadataExtractor
systemProperties.put(systemQName, documentValue); systemProperties.put(systemQName, documentValue);
} }
} }
return systemProperties; return new TreeMap<String, Serializable>(systemProperties);
} }
private void writeMetadata(File targetFile, Map<String, Serializable> results) private void writeMetadata(File targetFile, Map<String, Serializable> results)