mirror of
https://github.com/Alfresco/alfresco-transform-core.git
synced 2025-05-12 17:04:48 +00:00
ATS-892 Convert ExifTool separated strings into collections for ACS consumption (#397)
ATS-911 Add regex pattern matching for date replacement
This commit is contained in:
parent
d25e3c365a
commit
e11cbd5180
@ -100,8 +100,10 @@ public class TikaMetadataExtractsIT extends AbstractMetadataExtractsIT
|
||||
|
||||
return Stream.of(
|
||||
//IPTCMetadataExtractor
|
||||
testFile(MIMETYPE_IMAGE_JPEG, "jpg", "quick.jpg"),
|
||||
testFile(MIMETYPE_IMAGE_JPEG, "jpg", "quickIPTC-EXT.jpg"),
|
||||
testFile(MIMETYPE_IMAGE_JPEG, "jpg", "quickIPTC-multi-creator.jpg"),
|
||||
testFile(MIMETYPE_IMAGE_JPEG, "jpg", "testJPEG_IPTC_EXT.jpg"),
|
||||
testFile(MIMETYPE_IMAGE_GIF, "gif", "quick.gif"),
|
||||
testFile(MIMETYPE_IMAGE_PNG, "png", "quick.png"),
|
||||
testFile(MIMETYPE_IMAGE_RAW_RAF, "raf", "quick.raf"),
|
||||
|
@ -9,7 +9,7 @@
|
||||
"{http://purl.org/dc/elements/1.1/}description" : "Gym class featuring a brown fox and lazy dog",
|
||||
"{http://purl.org/dc/elements/1.1/}creator" : "Nevin Nollop",
|
||||
"{http://www.alfresco.org/model/exif/1.0}orientation" : "1",
|
||||
"{http://purl.org/dc/elements/1.1/}subject" : "Pangram, fox, dog",
|
||||
"{http://purl.org/dc/elements/1.1/}subject" : [ "Pangram", "fox", "dog" ],
|
||||
"{http://www.alfresco.org/model/exif/1.0}resolutionUnit" : "Inch",
|
||||
"{http://www.alfresco.org/model/exif/1.0}yResolution" : "50.0",
|
||||
"{http://www.alfresco.org/model/exif/1.0}xResolution" : "50.0"
|
||||
|
@ -7,7 +7,7 @@
|
||||
"{http://ns.useplus.org/ldf/xmp/1.0/}LicensorID" : "RGAUSS",
|
||||
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}LocationShownCountryName" : "United Kingdom",
|
||||
"{http://www.alfresco.org/model/content/1.0}title" : "The quick brown fox jumps over the lazy dog",
|
||||
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}AODateCreated" : "1885:03:14",
|
||||
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}AODateCreated" : "1885-03-14",
|
||||
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}LocationShownSublocation" : "The Gym",
|
||||
"{http://purl.org/dc/elements/1.1/}description" : "Gym class featuring a brown fox and lazy dog",
|
||||
"{http://purl.org/dc/elements/1.1/}creator" : "Nevin Nollop",
|
||||
@ -21,7 +21,7 @@
|
||||
"{http://www.alfresco.org/model/exif/1.0}software" : "Adobe Photoshop CC (Macintosh)",
|
||||
"{http://ns.useplus.org/ldf/xmp/1.0/}Version" : "1.2.0",
|
||||
"{http://www.alfresco.org/model/exif/1.0}orientation" : "1",
|
||||
"{http://purl.org/dc/elements/1.1/}subject" : "fox, dog, lazy, jumping",
|
||||
"{http://purl.org/dc/elements/1.1/}subject" : [ "fox", "dog", "lazy", "jumping" ],
|
||||
"{http://www.alfresco.org/model/exif/1.0}resolutionUnit" : "Inch",
|
||||
"{http://www.alfresco.org/model/exif/1.0}yResolution" : "1.0",
|
||||
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}AOTitle" : "The Dog",
|
||||
|
@ -9,7 +9,7 @@
|
||||
"{http://www.alfresco.org/model/content/1.0}title" : "The quick brown fox jumps over the lazy dog",
|
||||
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}LocationShownSublocation" : "The Gym",
|
||||
"{http://purl.org/dc/elements/1.1/}description" : "Gym class featuring a brown fox and lazy dog",
|
||||
"{http://purl.org/dc/elements/1.1/}creator" : "John Smith, Jane Doe",
|
||||
"{http://purl.org/dc/elements/1.1/}creator" : [ "John Smith", "Jane Doe" ],
|
||||
"{http://www.alfresco.org/model/exif/1.0}xResolution" : "1.0",
|
||||
"{http://ns.useplus.org/ldf/xmp/1.0/}ImageCreatorName" : "Derek Hulley",
|
||||
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}LocationShownCity" : "Maidenhead",
|
||||
@ -20,7 +20,7 @@
|
||||
"{http://www.alfresco.org/model/exif/1.0}software" : "Adobe Photoshop CC (Macintosh)",
|
||||
"{http://ns.useplus.org/ldf/xmp/1.0/}Version" : "1.2.0",
|
||||
"{http://www.alfresco.org/model/exif/1.0}orientation" : "1",
|
||||
"{http://purl.org/dc/elements/1.1/}subject" : "fox, dog, lazy, jumping",
|
||||
"{http://purl.org/dc/elements/1.1/}subject" : [ "fox", "dog", "lazy", "jumping" ],
|
||||
"{http://www.alfresco.org/model/exif/1.0}resolutionUnit" : "Inch",
|
||||
"{http://www.alfresco.org/model/exif/1.0}yResolution" : "1.0",
|
||||
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}AOTitle" : "The Dog",
|
||||
|
Binary file not shown.
After Width: | Height: | Size: 27 KiB |
@ -0,0 +1,166 @@
|
||||
{
|
||||
"{http://iptc.org/std/Iptc4xmpCore/1.0/xmlns/}CiAdrCity": "Atlanta",
|
||||
"{http://iptc.org/std/Iptc4xmpCore/1.0/xmlns/}CiAdrExtadr": "1234 Some Road",
|
||||
"{http://iptc.org/std/Iptc4xmpCore/1.0/xmlns/}CiAdrPcode": "30339",
|
||||
"{http://iptc.org/std/Iptc4xmpCore/1.0/xmlns/}CiAdrRegion": "GA",
|
||||
"{http://iptc.org/std/Iptc4xmpCore/1.0/xmlns/}CiEmailWork": "info@alfresco.com.other@example.com",
|
||||
"{http://iptc.org/std/Iptc4xmpCore/1.0/xmlns/}CiTelWork": "555-1234.555-4321",
|
||||
"{http://iptc.org/std/Iptc4xmpCore/1.0/xmlns/}CiUrlWork": "http://alfresco.com.http://example.com",
|
||||
"{http://iptc.org/std/Iptc4xmpCore/1.0/xmlns/}CountryCode": "US",
|
||||
"{http://iptc.org/std/Iptc4xmpCore/1.0/xmlns/}IntellectualGenre": "intellectual genre",
|
||||
"{http://iptc.org/std/Iptc4xmpCore/1.0/xmlns/}Location": "Rock Creek Park",
|
||||
"{http://iptc.org/std/Iptc4xmpCore/1.0/xmlns/}Scene": [
|
||||
"iptc scene 1",
|
||||
"iptc scene 2"
|
||||
],
|
||||
"{http://iptc.org/std/Iptc4xmpCore/1.0/xmlns/}SubjectCode": [
|
||||
"iptc subject code 1",
|
||||
"iptc subject code 2"
|
||||
],
|
||||
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}AOCopyrightNotice": "Ray Gauss II",
|
||||
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}AOCreator": [
|
||||
"Mother Nature",
|
||||
"Man",
|
||||
"Mother Nature"
|
||||
],
|
||||
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}AODateCreated": [
|
||||
"1890-01-01",
|
||||
"1901-02-01"
|
||||
],
|
||||
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}AOSource": "National Park Service",
|
||||
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}AOSourceInvNo": [
|
||||
"123456",
|
||||
"654321"
|
||||
],
|
||||
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}AOTitle": [
|
||||
"Rock Creek Stream Bank",
|
||||
"Pollution",
|
||||
"Some Tree"
|
||||
],
|
||||
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}AddlModelInfo": "rocky 1 and rocky 2 are big",
|
||||
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}DigitalSourceType": "http://cv.iptc.org/newscodes/digitalsourcetype/digitalCapture",
|
||||
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}Event": "Photo Bike Tour",
|
||||
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}LocationCreatedCity": "Washington",
|
||||
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}LocationCreatedCountryCode": "US",
|
||||
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}LocationCreatedCountryName": "United States",
|
||||
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}LocationCreatedProvinceState": "D.C.",
|
||||
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}LocationCreatedSublocation": "Rock Creek Park",
|
||||
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}LocationCreatedWorldRegion": "North America",
|
||||
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}LocationShownCity": "Washington",
|
||||
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}LocationShownCountryCode": "US",
|
||||
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}LocationShownCountryName": "United States",
|
||||
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}LocationShownProvinceState": "D.C.",
|
||||
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}LocationShownSublocation": [
|
||||
"Rock Creek Park Sub",
|
||||
"Stream Section"
|
||||
],
|
||||
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}LocationShownWorldRegion": "North America",
|
||||
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}MaxAvailHeight": "3456",
|
||||
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}MaxAvailWidth": "5184",
|
||||
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}ModelAge": [
|
||||
"1000",
|
||||
"1001"
|
||||
],
|
||||
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}OrganisationInImageCode": [
|
||||
"ASPP",
|
||||
"OTHER_ORG"
|
||||
],
|
||||
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}OrganisationInImageName": [
|
||||
"ASPP",
|
||||
"Other Org"
|
||||
],
|
||||
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}PersonInImage": [
|
||||
"rocky 1",
|
||||
"rocky 2"
|
||||
],
|
||||
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}RegItemId": [
|
||||
"100-ABC-ABC-555",
|
||||
"11223344",
|
||||
"55667788"
|
||||
],
|
||||
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}RegOrgId": [
|
||||
"PLUS",
|
||||
"ORG 2"
|
||||
],
|
||||
"{http://ns.adobe.com/photoshop/1.0/}AuthorsPosition": "DAM Architect",
|
||||
"{http://ns.adobe.com/photoshop/1.0/}CaptionWriter": "Ray Gauss II",
|
||||
"{http://ns.adobe.com/photoshop/1.0/}Category": "PrimaryCategory",
|
||||
"{http://ns.adobe.com/photoshop/1.0/}City": "Washington",
|
||||
"{http://ns.adobe.com/photoshop/1.0/}Country": "United States",
|
||||
"{http://ns.adobe.com/photoshop/1.0/}Credit": "provider",
|
||||
"{http://ns.adobe.com/photoshop/1.0/}DateCreated": "2011-08-31",
|
||||
"{http://ns.adobe.com/photoshop/1.0/}Headline": "Rock Creek Park",
|
||||
"{http://ns.adobe.com/photoshop/1.0/}Instructions": "instructions",
|
||||
"{http://ns.adobe.com/photoshop/1.0/}Source": "source",
|
||||
"{http://ns.adobe.com/photoshop/1.0/}State": "DC",
|
||||
"{http://ns.adobe.com/photoshop/1.0/}SupplementalCategories": [
|
||||
"category1",
|
||||
"category2"
|
||||
],
|
||||
"{http://ns.adobe.com/photoshop/1.0/}TransmissionReference": "job identifier",
|
||||
"{http://ns.adobe.com/xap/1.0/rights/}UsageTerms": "rights usage terms",
|
||||
"{http://ns.useplus.org/ldf/xmp/1.0/}CopyrightOwnerID": "RGAUSS",
|
||||
"{http://ns.useplus.org/ldf/xmp/1.0/}CopyrightOwnerName": [
|
||||
"Ray Gauss II",
|
||||
"GG"
|
||||
],
|
||||
"{http://ns.useplus.org/ldf/xmp/1.0/}ImageCreatorID": "RGAUSS",
|
||||
"{http://ns.useplus.org/ldf/xmp/1.0/}ImageCreatorName": [
|
||||
"Ray Gauss II",
|
||||
"GG"
|
||||
],
|
||||
"{http://ns.useplus.org/ldf/xmp/1.0/}ImageSupplierID": "RGAUSS",
|
||||
"{http://ns.useplus.org/ldf/xmp/1.0/}ImageSupplierImageID": "supplier image ID",
|
||||
"{http://ns.useplus.org/ldf/xmp/1.0/}ImageSupplierName": "Ray Gauss II",
|
||||
"{http://ns.useplus.org/ldf/xmp/1.0/}LicensorEmail": "r@example.com",
|
||||
"{http://ns.useplus.org/ldf/xmp/1.0/}LicensorID": "RGAUSS",
|
||||
"{http://ns.useplus.org/ldf/xmp/1.0/}LicensorName": [
|
||||
"Ray Gauss II",
|
||||
"GG"
|
||||
],
|
||||
"{http://ns.useplus.org/ldf/xmp/1.0/}LicensorTelephone1": "555-5555",
|
||||
"{http://ns.useplus.org/ldf/xmp/1.0/}LicensorTelephone2": "555-4444",
|
||||
"{http://ns.useplus.org/ldf/xmp/1.0/}LicensorURL": "http://rgauss.com",
|
||||
"{http://ns.useplus.org/ldf/xmp/1.0/}MinorModelAgeDisclosure": "Age Unknown",
|
||||
"{http://ns.useplus.org/ldf/xmp/1.0/}ModelReleaseID": [
|
||||
"model release id 1",
|
||||
"model release id 2"
|
||||
],
|
||||
"{http://ns.useplus.org/ldf/xmp/1.0/}ModelReleaseStatus": "Not Applicable",
|
||||
"{http://ns.useplus.org/ldf/xmp/1.0/}PropertyReleaseID": [
|
||||
"prop release id 1",
|
||||
"prop release id 2"
|
||||
],
|
||||
"{http://ns.useplus.org/ldf/xmp/1.0/}PropertyReleaseStatus": "Not Applicable",
|
||||
"{http://ns.useplus.org/ldf/xmp/1.0/}Version": "1.2.0",
|
||||
"{http://purl.org/dc/elements/1.1/}creator": "Ray Gauss II",
|
||||
"{http://purl.org/dc/elements/1.1/}description": "A stream bank in Rock Creek Park Washington DC during a photo bike tour with ASPP DC/South chapter.",
|
||||
"{http://purl.org/dc/elements/1.1/}rights": "© Ray Gauss II",
|
||||
"{http://purl.org/dc/elements/1.1/}subject": [
|
||||
"bank",
|
||||
"park",
|
||||
"rock creek",
|
||||
"stream",
|
||||
"washington"
|
||||
],
|
||||
"{http://purl.org/dc/elements/1.1/}title": "Downstream",
|
||||
"{http://www.alfresco.org/model/content/1.0}author": "Ray Gauss II",
|
||||
"{http://www.alfresco.org/model/content/1.0}created": "2011-08-13T14:40:51",
|
||||
"{http://www.alfresco.org/model/content/1.0}description": "A stream bank in Rock Creek Park Washington DC during a photo bike tour with ASPP DC/South chapter.",
|
||||
"{http://www.alfresco.org/model/content/1.0}title": "Downstream",
|
||||
"{http://www.alfresco.org/model/exif/1.0}dateTimeOriginal": "2011-08-13T14:40:51",
|
||||
"{http://www.alfresco.org/model/exif/1.0}exposureTime": "0.0125",
|
||||
"{http://www.alfresco.org/model/exif/1.0}fNumber": "10.0",
|
||||
"{http://www.alfresco.org/model/exif/1.0}flash": "false",
|
||||
"{http://www.alfresco.org/model/exif/1.0}focalLength": "50.0",
|
||||
"{http://www.alfresco.org/model/exif/1.0}isoSpeedRatings": "640",
|
||||
"{http://www.alfresco.org/model/exif/1.0}manufacturer": "Canon",
|
||||
"{http://www.alfresco.org/model/exif/1.0}model": "Canon EOS 60D",
|
||||
"{http://www.alfresco.org/model/exif/1.0}orientation": "1",
|
||||
"{http://www.alfresco.org/model/exif/1.0}pixelXDimension": "103",
|
||||
"{http://www.alfresco.org/model/exif/1.0}pixelYDimension": "69",
|
||||
"{http://www.alfresco.org/model/exif/1.0}resolutionUnit": "Inch",
|
||||
"{http://www.alfresco.org/model/exif/1.0}software": "Adobe Photoshop CS6 (Macintosh)",
|
||||
"{http://www.alfresco.org/model/exif/1.0}xResolution": "72.0",
|
||||
"{http://www.alfresco.org/model/exif/1.0}yResolution": "72.0"
|
||||
}
|
@ -26,13 +26,15 @@
|
||||
*/
|
||||
package org.alfresco.transformer.metadataExtractors;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.Serializable;
|
||||
import java.util.Arrays;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import org.alfresco.transform.exceptions.TransformException;
|
||||
import org.alfresco.transformer.tika.parsers.ExifToolParser;
|
||||
import org.apache.tika.exception.TikaException;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.apache.tika.metadata.Metadata;
|
||||
import org.apache.tika.parser.Parser;
|
||||
import org.slf4j.Logger;
|
||||
@ -43,19 +45,24 @@ public class IPTCMetadataExtractor extends AbstractTikaMetadataExtractor
|
||||
|
||||
private static final Logger logger = LoggerFactory.getLogger(IPTCMetadataExtractor.class);
|
||||
|
||||
private static Set<String> IPTC_DATE_KEYS = Set.of("XMP-photoshop:DateCreated", "XMP-iptcExt:ArtworkDateCreated");
|
||||
|
||||
private static final Pattern YEAR_IPTC = Pattern.compile("(\\d{4}[:|-]\\d{2}[:|-]\\d{2})");
|
||||
|
||||
private ExifToolParser parser;
|
||||
|
||||
public IPTCMetadataExtractor()
|
||||
{
|
||||
super(logger);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Parser getParser() {
|
||||
try {
|
||||
return new ExifToolParser();
|
||||
} catch (IOException | TikaException e) {
|
||||
logger.error(e.getMessage(), e);
|
||||
throw new TransformException(500, "Error creating IPTC parser: " + e.getMessage());
|
||||
protected Parser getParser()
|
||||
{
|
||||
if (this.parser == null) {
|
||||
this.parser = new ExifToolParser();
|
||||
}
|
||||
return this.parser;
|
||||
}
|
||||
|
||||
/**
|
||||
@ -65,9 +72,87 @@ public class IPTCMetadataExtractor extends AbstractTikaMetadataExtractor
|
||||
*/
|
||||
@Override
|
||||
protected Map<String, Serializable> extractSpecific(Metadata metadata, Map<String, Serializable> properties,
|
||||
Map<String, String> headers) {
|
||||
|
||||
Map<String, String> headers)
|
||||
{
|
||||
properties = new TikaAutoMetadataExtractor().extractSpecific(metadata, properties, headers);
|
||||
ExifToolParser etParser = (ExifToolParser)this.getParser();
|
||||
if (etParser.getSeparator()!=null)
|
||||
{
|
||||
for (String key : properties.keySet())
|
||||
{
|
||||
if (properties.get(key) instanceof String)
|
||||
{
|
||||
String value = (String) properties.get(key);
|
||||
String separator = etParser.getSeparator();
|
||||
if (value.contains(separator))
|
||||
{
|
||||
if (value.contains(String.format("\"%s\"",separator)))
|
||||
{
|
||||
separator = String.format("\"%s\"",separator);
|
||||
}
|
||||
String [] values = StringUtils.splitByWholeSeparator(value, separator);
|
||||
// Change dateTime format. MM converted ':' to '-'
|
||||
if (IPTC_DATE_KEYS.contains(key)){
|
||||
values = iptcToIso8601DateStrings(values);
|
||||
}
|
||||
putRawValue(key, (Serializable) Arrays.asList(values), properties);
|
||||
}
|
||||
else if (IPTC_DATE_KEYS.contains(key)) {
|
||||
// Handle property with a single date string
|
||||
putRawValue(key, (Serializable) iptcToIso8601DateString(value), properties);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return properties;
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts a date or date time strings into Iso8601 format <p>
|
||||
*
|
||||
* @param dateStrings
|
||||
* @return dateStrings in Iso8601 format
|
||||
* @see #iptcToIso8601DateString
|
||||
*/
|
||||
protected String[] iptcToIso8601DateStrings(String[] dateStrings)
|
||||
{
|
||||
for (int i = 0; i < dateStrings.length; i++)
|
||||
{
|
||||
dateStrings[i] = iptcToIso8601DateString(dateStrings[i]);
|
||||
}
|
||||
return dateStrings;
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts a date or date time string into Iso8601 format <p>
|
||||
* Converts any ':' in the year portion of a date string characters to '-'. <p>
|
||||
* Expects the year in the format YYYY:MM:DD or YYYY-MM-DD <p>
|
||||
* Will add the correct delimiter, 'T', to any dateTime strings, where | can be any char other than ,'T':
|
||||
* YYYY:MM:DD|HH:mm:ss.... or YYYY-MM-DD|HH:mm:ss....
|
||||
* <p>
|
||||
* Examples: <p><ul>
|
||||
* <li>"1919:10:16" will convert to "1919-10-16"</li>
|
||||
* <li>"1901:02:01 00:00:00.000Z" will convert to "1901-02-01T00:00:00.000Z"</li>
|
||||
* <li>"2001:02:01 16:15+00:00" will convert to "2001-02-01T16:15+00:00"</li>
|
||||
* <li>"2021-06-11 05:36-01:00" will convert to "2021-06-11T05:36-01:00"</li>
|
||||
* </ul>
|
||||
* @param dateStr
|
||||
* @return dateStr in Iso8601 format
|
||||
*/
|
||||
protected String iptcToIso8601DateString(String dateStr)
|
||||
{
|
||||
char timeSeparator = 'T';
|
||||
Matcher yearMatcher = YEAR_IPTC.matcher(dateStr);
|
||||
if (yearMatcher.find())
|
||||
{
|
||||
String year = yearMatcher.group(1);
|
||||
dateStr = yearMatcher.replaceFirst(year.replaceAll(":", "-"));
|
||||
if (dateStr.length()>year.length() && dateStr.charAt(year.length())!=timeSeparator)
|
||||
{
|
||||
dateStr = dateStr.replace(dateStr.charAt(year.length()), timeSeparator);
|
||||
}
|
||||
}
|
||||
return dateStr;
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -39,6 +39,7 @@ import java.io.InputStreamReader;
|
||||
import java.io.OutputStream;
|
||||
import java.io.Reader;
|
||||
import java.net.URL;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
@ -58,20 +59,40 @@ import org.apache.tika.parser.image.ImageParser;
|
||||
import org.apache.tika.parser.image.TiffParser;
|
||||
import org.apache.tika.parser.jpeg.JpegParser;
|
||||
import org.apache.tika.sax.XHTMLContentHandler;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
import org.xml.sax.ContentHandler;
|
||||
import org.xml.sax.SAXException;
|
||||
|
||||
public class ExifToolParser extends ExternalParser {
|
||||
|
||||
private static final Logger logger = LoggerFactory.getLogger(ExifToolParser.class);
|
||||
|
||||
private static final String EXIFTOOL_PARSER_CONFIG = "parsers/external/config/exiftool-parser.xml";
|
||||
|
||||
public ExifToolParser() throws IOException, TikaException {
|
||||
protected static final String DEFAULT_SEPARATOR = ", ";
|
||||
protected static final String SEPARATOR_SETTING = "-sep";
|
||||
|
||||
private String separator;
|
||||
|
||||
public ExifToolParser() {
|
||||
super();
|
||||
ExternalParser eParser = ExternalParsersFactory.create(getExternalParserConfigURL()).get(0);
|
||||
try {
|
||||
List<ExternalParser> eParsers = ExternalParsersFactory.create(getExternalParserConfigURL());
|
||||
// if ExifTool is not installed then no parsers are returned
|
||||
if (eParsers.size() > 0) {
|
||||
ExternalParser eParser = eParsers.get(0);
|
||||
this.setCommand(eParser.getCommand());
|
||||
this.setIgnoredLineConsumer(eParser.getIgnoredLineConsumer());
|
||||
this.setMetadataExtractionPatterns(eParser.getMetadataExtractionPatterns());
|
||||
this.setSupportedTypes(eParser.getSupportedTypes());
|
||||
} else {
|
||||
logger.error(
|
||||
"Error creating ExifToolParser from config, ExifToolExtractions not enabled. Please check ExifTool is installed correctly.");
|
||||
}
|
||||
} catch (IOException | TikaException e) {
|
||||
logger.error("Error creating ExifToolParser from config, ExifToolExtractions not enabled: ", e);
|
||||
}
|
||||
}
|
||||
|
||||
private URL getExternalParserConfigURL(){
|
||||
@ -79,6 +100,43 @@ public class ExifToolParser extends ExternalParser {
|
||||
return classLoader.getResource(EXIFTOOL_PARSER_CONFIG);
|
||||
}
|
||||
|
||||
public void setSeparator(String sep) {
|
||||
this.separator = sep;
|
||||
}
|
||||
|
||||
public String getSeparator() {
|
||||
return this.separator;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setCommand(String... command){
|
||||
super.setCommand(command);
|
||||
if (command.length==1) {
|
||||
setSeparator(findSeparator(command[0]));
|
||||
}
|
||||
else {
|
||||
setSeparator(DEFAULT_SEPARATOR);
|
||||
}
|
||||
}
|
||||
|
||||
protected String findSeparator(String command) {
|
||||
if (command.contains(SEPARATOR_SETTING)) {
|
||||
int start = command.indexOf(SEPARATOR_SETTING)+SEPARATOR_SETTING.length()+1;
|
||||
String separator = DEFAULT_SEPARATOR;
|
||||
if (command.charAt(start)=='\"') {
|
||||
//get all chars up to the next \"
|
||||
int end = command.indexOf("\"", start+1);
|
||||
separator = command.substring(start+1, end);
|
||||
}
|
||||
else {
|
||||
int end = command.indexOf(" ", start);
|
||||
separator = command.substring(start, end);
|
||||
}
|
||||
return separator;
|
||||
}
|
||||
return DEFAULT_SEPARATOR;
|
||||
}
|
||||
|
||||
/**
|
||||
* Adapted from {@link org.apache.tika.parser.external.ExternalParser}
|
||||
* due to errors attempting to {@link #extractMetadata} from the errorStream in original implementation. <p>
|
||||
@ -95,7 +153,9 @@ public class ExifToolParser extends ExternalParser {
|
||||
TemporaryResources tmp = new TemporaryResources();
|
||||
try {
|
||||
TikaInputStream tis = TikaInputStream.get(stream, tmp);
|
||||
if (this.getSupportedTypes().contains(mediaType)) {
|
||||
parse(tis, xhtml, metadata, tmp);
|
||||
}
|
||||
switch (mediaType.getType()+"/"+mediaType.getSubtype()) {
|
||||
case MIMETYPE_IMAGE_JPEG:
|
||||
parseAdditional(new JpegParser(), tis, handler, metadata, context, mediaType);
|
||||
|
@ -5,7 +5,7 @@
|
||||
<command>exiftool -ver</command>
|
||||
<error-codes>126,127</error-codes>
|
||||
</check>
|
||||
<command>env FOO=${OUTPUT} exiftool -args -G1 ${INPUT}</command>
|
||||
<command>env FOO=${OUTPUT} exiftool -args -G1 -sep "|||" ${INPUT}</command>
|
||||
<mime-types>
|
||||
<mime-type>image/x-raw-hasselblad</mime-type>
|
||||
<mime-type>image/x-raw-sony</mime-type>
|
||||
|
@ -0,0 +1,48 @@
|
||||
/*
|
||||
* #%L
|
||||
* Alfresco Transform Core
|
||||
* %%
|
||||
* Copyright (C) 2005 - 2021 Alfresco Software Limited
|
||||
* %%
|
||||
* This file is part of the Alfresco software.
|
||||
* -
|
||||
* If the software was purchased under a paid Alfresco license, the terms of
|
||||
* the paid license agreement will prevail. Otherwise, the software is
|
||||
* provided under the following open source license terms:
|
||||
* -
|
||||
* Alfresco is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Lesser General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
* -
|
||||
* Alfresco is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Lesser General Public License for more details.
|
||||
* -
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
|
||||
* #L%
|
||||
*/
|
||||
package org.alfresco.transformer.metadataExtractors;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.assertArrayEquals;
|
||||
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
public class IPTCMetadataExtractorTest {
|
||||
|
||||
IPTCMetadataExtractor extractor = new IPTCMetadataExtractor();
|
||||
|
||||
@Test
|
||||
public void testIptcToIso8601DateStrings() {
|
||||
String[] testStrings = { "1890:01:01", "1901:02:01 00:00:00.000Z", "1901-02-01 00:00:00.000Z",
|
||||
"1901-02-01T00:00:00.000Z", "1901:02:01T00:00+00:00", "1901:02:01 00:00+00:00" };
|
||||
String[] expected = { "1890-01-01", "1901-02-01T00:00:00.000Z", "1901-02-01T00:00:00.000Z",
|
||||
"1901-02-01T00:00:00.000Z", "1901-02-01T00:00+00:00", "1901-02-01T00:00+00:00" };
|
||||
|
||||
assertArrayEquals(expected, extractor.iptcToIso8601DateStrings(testStrings));
|
||||
|
||||
}
|
||||
|
||||
}
|
@ -0,0 +1,59 @@
|
||||
/*
|
||||
* #%L
|
||||
* Alfresco Transform Core
|
||||
* %%
|
||||
* Copyright (C) 2005 - 2021 Alfresco Software Limited
|
||||
* %%
|
||||
* This file is part of the Alfresco software.
|
||||
* -
|
||||
* If the software was purchased under a paid Alfresco license, the terms of
|
||||
* the paid license agreement will prevail. Otherwise, the software is
|
||||
* provided under the following open source license terms:
|
||||
* -
|
||||
* Alfresco is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Lesser General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
* -
|
||||
* Alfresco is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Lesser General Public License for more details.
|
||||
* -
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
|
||||
* #L%
|
||||
*/
|
||||
package org.alfresco.transformer.tika.parsers;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
public class ExifToolParserTest {
|
||||
|
||||
ExifToolParser exifToolParser = new ExifToolParser();
|
||||
|
||||
@Test
|
||||
public void testFindSeparator() {
|
||||
|
||||
String testCommand = "env FOO=${OUTPUT} exiftool -args -G1 " + ExifToolParser.SEPARATOR_SETTING
|
||||
+ " \"|||\" ${INPUT}";
|
||||
String expected = "|||";
|
||||
String actual = exifToolParser.findSeparator(testCommand);
|
||||
assertEquals(expected, actual);
|
||||
|
||||
expected = "TESTWITHOUTQUOTES";
|
||||
testCommand = "nothing matters until the " + ExifToolParser.SEPARATOR_SETTING + " " + expected
|
||||
+ " now all this extra should be ignored";
|
||||
actual = exifToolParser.findSeparator(testCommand);
|
||||
assertEquals(expected, actual);
|
||||
|
||||
expected = "Test something bonkers 112!£$%^£$^";
|
||||
testCommand = ExifToolParser.SEPARATOR_SETTING + " \""+expected+"\"";
|
||||
actual = exifToolParser.findSeparator(testCommand);
|
||||
assertEquals(expected, actual);
|
||||
|
||||
}
|
||||
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user