ATS-892 Convert ExifTool separated strings into collections for ACS consumption (#397)

ATS-911 Add regex pattern matching for date replacement
This commit is contained in:
David Edwards 2021-05-06 08:58:42 +01:00 committed by GitHub
parent d25e3c365a
commit e11cbd5180
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
11 changed files with 447 additions and 27 deletions

View File

@ -100,8 +100,10 @@ public class TikaMetadataExtractsIT extends AbstractMetadataExtractsIT
return Stream.of(
//IPTCMetadataExtractor
testFile(MIMETYPE_IMAGE_JPEG, "jpg", "quick.jpg"),
testFile(MIMETYPE_IMAGE_JPEG, "jpg", "quickIPTC-EXT.jpg"),
testFile(MIMETYPE_IMAGE_JPEG, "jpg", "quickIPTC-multi-creator.jpg"),
testFile(MIMETYPE_IMAGE_JPEG, "jpg", "testJPEG_IPTC_EXT.jpg"),
testFile(MIMETYPE_IMAGE_GIF, "gif", "quick.gif"),
testFile(MIMETYPE_IMAGE_PNG, "png", "quick.png"),
testFile(MIMETYPE_IMAGE_RAW_RAF, "raf", "quick.raf"),

View File

@ -9,7 +9,7 @@
"{http://purl.org/dc/elements/1.1/}description" : "Gym class featuring a brown fox and lazy dog",
"{http://purl.org/dc/elements/1.1/}creator" : "Nevin Nollop",
"{http://www.alfresco.org/model/exif/1.0}orientation" : "1",
"{http://purl.org/dc/elements/1.1/}subject" : "Pangram, fox, dog",
"{http://purl.org/dc/elements/1.1/}subject" : [ "Pangram", "fox", "dog" ],
"{http://www.alfresco.org/model/exif/1.0}resolutionUnit" : "Inch",
"{http://www.alfresco.org/model/exif/1.0}yResolution" : "50.0",
"{http://www.alfresco.org/model/exif/1.0}xResolution" : "50.0"

View File

@ -7,7 +7,7 @@
"{http://ns.useplus.org/ldf/xmp/1.0/}LicensorID" : "RGAUSS",
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}LocationShownCountryName" : "United Kingdom",
"{http://www.alfresco.org/model/content/1.0}title" : "The quick brown fox jumps over the lazy dog",
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}AODateCreated" : "1885:03:14",
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}AODateCreated" : "1885-03-14",
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}LocationShownSublocation" : "The Gym",
"{http://purl.org/dc/elements/1.1/}description" : "Gym class featuring a brown fox and lazy dog",
"{http://purl.org/dc/elements/1.1/}creator" : "Nevin Nollop",
@ -21,7 +21,7 @@
"{http://www.alfresco.org/model/exif/1.0}software" : "Adobe Photoshop CC (Macintosh)",
"{http://ns.useplus.org/ldf/xmp/1.0/}Version" : "1.2.0",
"{http://www.alfresco.org/model/exif/1.0}orientation" : "1",
"{http://purl.org/dc/elements/1.1/}subject" : "fox, dog, lazy, jumping",
"{http://purl.org/dc/elements/1.1/}subject" : [ "fox", "dog", "lazy", "jumping" ],
"{http://www.alfresco.org/model/exif/1.0}resolutionUnit" : "Inch",
"{http://www.alfresco.org/model/exif/1.0}yResolution" : "1.0",
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}AOTitle" : "The Dog",

View File

@ -9,7 +9,7 @@
"{http://www.alfresco.org/model/content/1.0}title" : "The quick brown fox jumps over the lazy dog",
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}LocationShownSublocation" : "The Gym",
"{http://purl.org/dc/elements/1.1/}description" : "Gym class featuring a brown fox and lazy dog",
"{http://purl.org/dc/elements/1.1/}creator" : "John Smith, Jane Doe",
"{http://purl.org/dc/elements/1.1/}creator" : [ "John Smith", "Jane Doe" ],
"{http://www.alfresco.org/model/exif/1.0}xResolution" : "1.0",
"{http://ns.useplus.org/ldf/xmp/1.0/}ImageCreatorName" : "Derek Hulley",
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}LocationShownCity" : "Maidenhead",
@ -20,7 +20,7 @@
"{http://www.alfresco.org/model/exif/1.0}software" : "Adobe Photoshop CC (Macintosh)",
"{http://ns.useplus.org/ldf/xmp/1.0/}Version" : "1.2.0",
"{http://www.alfresco.org/model/exif/1.0}orientation" : "1",
"{http://purl.org/dc/elements/1.1/}subject" : "fox, dog, lazy, jumping",
"{http://purl.org/dc/elements/1.1/}subject" : [ "fox", "dog", "lazy", "jumping" ],
"{http://www.alfresco.org/model/exif/1.0}resolutionUnit" : "Inch",
"{http://www.alfresco.org/model/exif/1.0}yResolution" : "1.0",
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}AOTitle" : "The Dog",

Binary file not shown.

After

Width:  |  Height:  |  Size: 27 KiB

View File

@ -0,0 +1,166 @@
{
"{http://iptc.org/std/Iptc4xmpCore/1.0/xmlns/}CiAdrCity": "Atlanta",
"{http://iptc.org/std/Iptc4xmpCore/1.0/xmlns/}CiAdrExtadr": "1234 Some Road",
"{http://iptc.org/std/Iptc4xmpCore/1.0/xmlns/}CiAdrPcode": "30339",
"{http://iptc.org/std/Iptc4xmpCore/1.0/xmlns/}CiAdrRegion": "GA",
"{http://iptc.org/std/Iptc4xmpCore/1.0/xmlns/}CiEmailWork": "info@alfresco.com.other@example.com",
"{http://iptc.org/std/Iptc4xmpCore/1.0/xmlns/}CiTelWork": "555-1234.555-4321",
"{http://iptc.org/std/Iptc4xmpCore/1.0/xmlns/}CiUrlWork": "http://alfresco.com.http://example.com",
"{http://iptc.org/std/Iptc4xmpCore/1.0/xmlns/}CountryCode": "US",
"{http://iptc.org/std/Iptc4xmpCore/1.0/xmlns/}IntellectualGenre": "intellectual genre",
"{http://iptc.org/std/Iptc4xmpCore/1.0/xmlns/}Location": "Rock Creek Park",
"{http://iptc.org/std/Iptc4xmpCore/1.0/xmlns/}Scene": [
"iptc scene 1",
"iptc scene 2"
],
"{http://iptc.org/std/Iptc4xmpCore/1.0/xmlns/}SubjectCode": [
"iptc subject code 1",
"iptc subject code 2"
],
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}AOCopyrightNotice": "Ray Gauss II",
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}AOCreator": [
"Mother Nature",
"Man",
"Mother Nature"
],
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}AODateCreated": [
"1890-01-01",
"1901-02-01"
],
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}AOSource": "National Park Service",
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}AOSourceInvNo": [
"123456",
"654321"
],
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}AOTitle": [
"Rock Creek Stream Bank",
"Pollution",
"Some Tree"
],
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}AddlModelInfo": "rocky 1 and rocky 2 are big",
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}DigitalSourceType": "http://cv.iptc.org/newscodes/digitalsourcetype/digitalCapture",
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}Event": "Photo Bike Tour",
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}LocationCreatedCity": "Washington",
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}LocationCreatedCountryCode": "US",
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}LocationCreatedCountryName": "United States",
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}LocationCreatedProvinceState": "D.C.",
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}LocationCreatedSublocation": "Rock Creek Park",
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}LocationCreatedWorldRegion": "North America",
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}LocationShownCity": "Washington",
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}LocationShownCountryCode": "US",
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}LocationShownCountryName": "United States",
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}LocationShownProvinceState": "D.C.",
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}LocationShownSublocation": [
"Rock Creek Park Sub",
"Stream Section"
],
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}LocationShownWorldRegion": "North America",
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}MaxAvailHeight": "3456",
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}MaxAvailWidth": "5184",
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}ModelAge": [
"1000",
"1001"
],
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}OrganisationInImageCode": [
"ASPP",
"OTHER_ORG"
],
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}OrganisationInImageName": [
"ASPP",
"Other Org"
],
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}PersonInImage": [
"rocky 1",
"rocky 2"
],
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}RegItemId": [
"100-ABC-ABC-555",
"11223344",
"55667788"
],
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}RegOrgId": [
"PLUS",
"ORG 2"
],
"{http://ns.adobe.com/photoshop/1.0/}AuthorsPosition": "DAM Architect",
"{http://ns.adobe.com/photoshop/1.0/}CaptionWriter": "Ray Gauss II",
"{http://ns.adobe.com/photoshop/1.0/}Category": "PrimaryCategory",
"{http://ns.adobe.com/photoshop/1.0/}City": "Washington",
"{http://ns.adobe.com/photoshop/1.0/}Country": "United States",
"{http://ns.adobe.com/photoshop/1.0/}Credit": "provider",
"{http://ns.adobe.com/photoshop/1.0/}DateCreated": "2011-08-31",
"{http://ns.adobe.com/photoshop/1.0/}Headline": "Rock Creek Park",
"{http://ns.adobe.com/photoshop/1.0/}Instructions": "instructions",
"{http://ns.adobe.com/photoshop/1.0/}Source": "source",
"{http://ns.adobe.com/photoshop/1.0/}State": "DC",
"{http://ns.adobe.com/photoshop/1.0/}SupplementalCategories": [
"category1",
"category2"
],
"{http://ns.adobe.com/photoshop/1.0/}TransmissionReference": "job identifier",
"{http://ns.adobe.com/xap/1.0/rights/}UsageTerms": "rights usage terms",
"{http://ns.useplus.org/ldf/xmp/1.0/}CopyrightOwnerID": "RGAUSS",
"{http://ns.useplus.org/ldf/xmp/1.0/}CopyrightOwnerName": [
"Ray Gauss II",
"GG"
],
"{http://ns.useplus.org/ldf/xmp/1.0/}ImageCreatorID": "RGAUSS",
"{http://ns.useplus.org/ldf/xmp/1.0/}ImageCreatorName": [
"Ray Gauss II",
"GG"
],
"{http://ns.useplus.org/ldf/xmp/1.0/}ImageSupplierID": "RGAUSS",
"{http://ns.useplus.org/ldf/xmp/1.0/}ImageSupplierImageID": "supplier image ID",
"{http://ns.useplus.org/ldf/xmp/1.0/}ImageSupplierName": "Ray Gauss II",
"{http://ns.useplus.org/ldf/xmp/1.0/}LicensorEmail": "r@example.com",
"{http://ns.useplus.org/ldf/xmp/1.0/}LicensorID": "RGAUSS",
"{http://ns.useplus.org/ldf/xmp/1.0/}LicensorName": [
"Ray Gauss II",
"GG"
],
"{http://ns.useplus.org/ldf/xmp/1.0/}LicensorTelephone1": "555-5555",
"{http://ns.useplus.org/ldf/xmp/1.0/}LicensorTelephone2": "555-4444",
"{http://ns.useplus.org/ldf/xmp/1.0/}LicensorURL": "http://rgauss.com",
"{http://ns.useplus.org/ldf/xmp/1.0/}MinorModelAgeDisclosure": "Age Unknown",
"{http://ns.useplus.org/ldf/xmp/1.0/}ModelReleaseID": [
"model release id 1",
"model release id 2"
],
"{http://ns.useplus.org/ldf/xmp/1.0/}ModelReleaseStatus": "Not Applicable",
"{http://ns.useplus.org/ldf/xmp/1.0/}PropertyReleaseID": [
"prop release id 1",
"prop release id 2"
],
"{http://ns.useplus.org/ldf/xmp/1.0/}PropertyReleaseStatus": "Not Applicable",
"{http://ns.useplus.org/ldf/xmp/1.0/}Version": "1.2.0",
"{http://purl.org/dc/elements/1.1/}creator": "Ray Gauss II",
"{http://purl.org/dc/elements/1.1/}description": "A stream bank in Rock Creek Park Washington DC during a photo bike tour with ASPP DC/South chapter.",
"{http://purl.org/dc/elements/1.1/}rights": "© Ray Gauss II",
"{http://purl.org/dc/elements/1.1/}subject": [
"bank",
"park",
"rock creek",
"stream",
"washington"
],
"{http://purl.org/dc/elements/1.1/}title": "Downstream",
"{http://www.alfresco.org/model/content/1.0}author": "Ray Gauss II",
"{http://www.alfresco.org/model/content/1.0}created": "2011-08-13T14:40:51",
"{http://www.alfresco.org/model/content/1.0}description": "A stream bank in Rock Creek Park Washington DC during a photo bike tour with ASPP DC/South chapter.",
"{http://www.alfresco.org/model/content/1.0}title": "Downstream",
"{http://www.alfresco.org/model/exif/1.0}dateTimeOriginal": "2011-08-13T14:40:51",
"{http://www.alfresco.org/model/exif/1.0}exposureTime": "0.0125",
"{http://www.alfresco.org/model/exif/1.0}fNumber": "10.0",
"{http://www.alfresco.org/model/exif/1.0}flash": "false",
"{http://www.alfresco.org/model/exif/1.0}focalLength": "50.0",
"{http://www.alfresco.org/model/exif/1.0}isoSpeedRatings": "640",
"{http://www.alfresco.org/model/exif/1.0}manufacturer": "Canon",
"{http://www.alfresco.org/model/exif/1.0}model": "Canon EOS 60D",
"{http://www.alfresco.org/model/exif/1.0}orientation": "1",
"{http://www.alfresco.org/model/exif/1.0}pixelXDimension": "103",
"{http://www.alfresco.org/model/exif/1.0}pixelYDimension": "69",
"{http://www.alfresco.org/model/exif/1.0}resolutionUnit": "Inch",
"{http://www.alfresco.org/model/exif/1.0}software": "Adobe Photoshop CS6 (Macintosh)",
"{http://www.alfresco.org/model/exif/1.0}xResolution": "72.0",
"{http://www.alfresco.org/model/exif/1.0}yResolution": "72.0"
}

View File

@ -26,13 +26,15 @@
*/
package org.alfresco.transformer.metadataExtractors;
import java.io.IOException;
import java.io.Serializable;
import java.util.Arrays;
import java.util.Map;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.alfresco.transform.exceptions.TransformException;
import org.alfresco.transformer.tika.parsers.ExifToolParser;
import org.apache.tika.exception.TikaException;
import org.apache.commons.lang3.StringUtils;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.parser.Parser;
import org.slf4j.Logger;
@ -43,19 +45,24 @@ public class IPTCMetadataExtractor extends AbstractTikaMetadataExtractor
private static final Logger logger = LoggerFactory.getLogger(IPTCMetadataExtractor.class);
private static Set<String> IPTC_DATE_KEYS = Set.of("XMP-photoshop:DateCreated", "XMP-iptcExt:ArtworkDateCreated");
private static final Pattern YEAR_IPTC = Pattern.compile("(\\d{4}[:|-]\\d{2}[:|-]\\d{2})");
private ExifToolParser parser;
public IPTCMetadataExtractor()
{
super(logger);
}
@Override
protected Parser getParser() {
try {
return new ExifToolParser();
} catch (IOException | TikaException e) {
logger.error(e.getMessage(), e);
throw new TransformException(500, "Error creating IPTC parser: " + e.getMessage());
protected Parser getParser()
{
if (this.parser == null) {
this.parser = new ExifToolParser();
}
return this.parser;
}
/**
@ -65,9 +72,87 @@ public class IPTCMetadataExtractor extends AbstractTikaMetadataExtractor
*/
@Override
protected Map<String, Serializable> extractSpecific(Metadata metadata, Map<String, Serializable> properties,
Map<String, String> headers) {
Map<String, String> headers)
{
properties = new TikaAutoMetadataExtractor().extractSpecific(metadata, properties, headers);
ExifToolParser etParser = (ExifToolParser)this.getParser();
if (etParser.getSeparator()!=null)
{
for (String key : properties.keySet())
{
if (properties.get(key) instanceof String)
{
String value = (String) properties.get(key);
String separator = etParser.getSeparator();
if (value.contains(separator))
{
if (value.contains(String.format("\"%s\"",separator)))
{
separator = String.format("\"%s\"",separator);
}
String [] values = StringUtils.splitByWholeSeparator(value, separator);
// Change dateTime format. MM converted ':' to '-'
if (IPTC_DATE_KEYS.contains(key)){
values = iptcToIso8601DateStrings(values);
}
putRawValue(key, (Serializable) Arrays.asList(values), properties);
}
else if (IPTC_DATE_KEYS.contains(key)) {
// Handle property with a single date string
putRawValue(key, (Serializable) iptcToIso8601DateString(value), properties);
}
}
}
}
return properties;
}
/**
* Converts a date or date time strings into Iso8601 format <p>
*
* @param dateStrings
* @return dateStrings in Iso8601 format
* @see #iptcToIso8601DateString
*/
protected String[] iptcToIso8601DateStrings(String[] dateStrings)
{
for (int i = 0; i < dateStrings.length; i++)
{
dateStrings[i] = iptcToIso8601DateString(dateStrings[i]);
}
return dateStrings;
}
/**
* Converts a date or date time string into Iso8601 format <p>
* Converts any ':' in the year portion of a date string characters to '-'. <p>
* Expects the year in the format YYYY:MM:DD or YYYY-MM-DD <p>
* Will add the correct delimiter, 'T', to any dateTime strings, where | can be any char other than ,'T':
* YYYY:MM:DD|HH:mm:ss.... or YYYY-MM-DD|HH:mm:ss....
* <p>
* Examples: <p><ul>
* <li>"1919:10:16" will convert to "1919-10-16"</li>
* <li>"1901:02:01 00:00:00.000Z" will convert to "1901-02-01T00:00:00.000Z"</li>
* <li>"2001:02:01 16:15+00:00" will convert to "2001-02-01T16:15+00:00"</li>
* <li>"2021-06-11 05:36-01:00" will convert to "2021-06-11T05:36-01:00"</li>
* </ul>
* @param dateStr
* @return dateStr in Iso8601 format
*/
protected String iptcToIso8601DateString(String dateStr)
{
char timeSeparator = 'T';
Matcher yearMatcher = YEAR_IPTC.matcher(dateStr);
if (yearMatcher.find())
{
String year = yearMatcher.group(1);
dateStr = yearMatcher.replaceFirst(year.replaceAll(":", "-"));
if (dateStr.length()>year.length() && dateStr.charAt(year.length())!=timeSeparator)
{
dateStr = dateStr.replace(dateStr.charAt(year.length()), timeSeparator);
}
}
return dateStr;
}
}

View File

@ -39,6 +39,7 @@ import java.io.InputStreamReader;
import java.io.OutputStream;
import java.io.Reader;
import java.net.URL;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
@ -58,20 +59,40 @@ import org.apache.tika.parser.image.ImageParser;
import org.apache.tika.parser.image.TiffParser;
import org.apache.tika.parser.jpeg.JpegParser;
import org.apache.tika.sax.XHTMLContentHandler;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;
public class ExifToolParser extends ExternalParser {
private static final Logger logger = LoggerFactory.getLogger(ExifToolParser.class);
private static final String EXIFTOOL_PARSER_CONFIG = "parsers/external/config/exiftool-parser.xml";
public ExifToolParser() throws IOException, TikaException {
protected static final String DEFAULT_SEPARATOR = ", ";
protected static final String SEPARATOR_SETTING = "-sep";
private String separator;
public ExifToolParser() {
super();
ExternalParser eParser = ExternalParsersFactory.create(getExternalParserConfigURL()).get(0);
try {
List<ExternalParser> eParsers = ExternalParsersFactory.create(getExternalParserConfigURL());
// if ExifTool is not installed then no parsers are returned
if (eParsers.size() > 0) {
ExternalParser eParser = eParsers.get(0);
this.setCommand(eParser.getCommand());
this.setIgnoredLineConsumer(eParser.getIgnoredLineConsumer());
this.setMetadataExtractionPatterns(eParser.getMetadataExtractionPatterns());
this.setSupportedTypes(eParser.getSupportedTypes());
} else {
logger.error(
"Error creating ExifToolParser from config, ExifToolExtractions not enabled. Please check ExifTool is installed correctly.");
}
} catch (IOException | TikaException e) {
logger.error("Error creating ExifToolParser from config, ExifToolExtractions not enabled: ", e);
}
}
private URL getExternalParserConfigURL(){
@ -79,6 +100,43 @@ public class ExifToolParser extends ExternalParser {
return classLoader.getResource(EXIFTOOL_PARSER_CONFIG);
}
public void setSeparator(String sep) {
this.separator = sep;
}
public String getSeparator() {
return this.separator;
}
@Override
public void setCommand(String... command){
super.setCommand(command);
if (command.length==1) {
setSeparator(findSeparator(command[0]));
}
else {
setSeparator(DEFAULT_SEPARATOR);
}
}
protected String findSeparator(String command) {
if (command.contains(SEPARATOR_SETTING)) {
int start = command.indexOf(SEPARATOR_SETTING)+SEPARATOR_SETTING.length()+1;
String separator = DEFAULT_SEPARATOR;
if (command.charAt(start)=='\"') {
//get all chars up to the next \"
int end = command.indexOf("\"", start+1);
separator = command.substring(start+1, end);
}
else {
int end = command.indexOf(" ", start);
separator = command.substring(start, end);
}
return separator;
}
return DEFAULT_SEPARATOR;
}
/**
* Adapted from {@link org.apache.tika.parser.external.ExternalParser}
* due to errors attempting to {@link #extractMetadata} from the errorStream in original implementation. <p>
@ -95,7 +153,9 @@ public class ExifToolParser extends ExternalParser {
TemporaryResources tmp = new TemporaryResources();
try {
TikaInputStream tis = TikaInputStream.get(stream, tmp);
if (this.getSupportedTypes().contains(mediaType)) {
parse(tis, xhtml, metadata, tmp);
}
switch (mediaType.getType()+"/"+mediaType.getSubtype()) {
case MIMETYPE_IMAGE_JPEG:
parseAdditional(new JpegParser(), tis, handler, metadata, context, mediaType);

View File

@ -5,7 +5,7 @@
<command>exiftool -ver</command>
<error-codes>126,127</error-codes>
</check>
<command>env FOO=${OUTPUT} exiftool -args -G1 ${INPUT}</command>
<command>env FOO=${OUTPUT} exiftool -args -G1 -sep "|||" ${INPUT}</command>
<mime-types>
<mime-type>image/x-raw-hasselblad</mime-type>
<mime-type>image/x-raw-sony</mime-type>

View File

@ -0,0 +1,48 @@
/*
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2021 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
* -
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* -
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
* -
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer.metadataExtractors;
import static org.junit.jupiter.api.Assertions.assertArrayEquals;
import org.junit.jupiter.api.Test;
public class IPTCMetadataExtractorTest {
IPTCMetadataExtractor extractor = new IPTCMetadataExtractor();
@Test
public void testIptcToIso8601DateStrings() {
String[] testStrings = { "1890:01:01", "1901:02:01 00:00:00.000Z", "1901-02-01 00:00:00.000Z",
"1901-02-01T00:00:00.000Z", "1901:02:01T00:00+00:00", "1901:02:01 00:00+00:00" };
String[] expected = { "1890-01-01", "1901-02-01T00:00:00.000Z", "1901-02-01T00:00:00.000Z",
"1901-02-01T00:00:00.000Z", "1901-02-01T00:00+00:00", "1901-02-01T00:00+00:00" };
assertArrayEquals(expected, extractor.iptcToIso8601DateStrings(testStrings));
}
}

View File

@ -0,0 +1,59 @@
/*
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2021 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
* -
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* -
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
* -
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer.tika.parsers;
import static org.junit.jupiter.api.Assertions.assertEquals;
import org.junit.jupiter.api.Test;
public class ExifToolParserTest {
ExifToolParser exifToolParser = new ExifToolParser();
@Test
public void testFindSeparator() {
String testCommand = "env FOO=${OUTPUT} exiftool -args -G1 " + ExifToolParser.SEPARATOR_SETTING
+ " \"|||\" ${INPUT}";
String expected = "|||";
String actual = exifToolParser.findSeparator(testCommand);
assertEquals(expected, actual);
expected = "TESTWITHOUTQUOTES";
testCommand = "nothing matters until the " + ExifToolParser.SEPARATOR_SETTING + " " + expected
+ " now all this extra should be ignored";
actual = exifToolParser.findSeparator(testCommand);
assertEquals(expected, actual);
expected = "Test something bonkers 112!£$%^£$^";
testCommand = ExifToolParser.SEPARATOR_SETTING + " \""+expected+"\"";
actual = exifToolParser.findSeparator(testCommand);
assertEquals(expected, actual);
}
}