Save point: Simpler project structure for core t-engines

This commit is contained in:
alandavis
2022-07-03 11:41:07 +01:00
parent c44ff5016a
commit 0eb8d9e142
538 changed files with 1182 additions and 1756 deletions

View File

@@ -0,0 +1,75 @@
/*
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2022 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
* -
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* -
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
* -
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transform.tika;
import org.alfresco.transform.base.TransformEngine;
import org.alfresco.transform.base.probes.ProbeTestTransform;
import org.alfresco.transform.common.TransformConfigResourceReader;
import org.alfresco.transform.config.TransformConfig;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Component;
import java.util.Collections;
import static org.alfresco.transform.base.logging.StandardMessages.COMMUNITY_LICENCE;
import static org.alfresco.transform.common.Mimetype.MIMETYPE_PDF;
import static org.alfresco.transform.common.Mimetype.MIMETYPE_TEXT_PLAIN;
@Component
public class TikaTransformEngine implements TransformEngine
{
@Autowired
private TransformConfigResourceReader transformConfigResourceReader;
@Override
public String getTransformEngineName()
{
return "0010-Tika";
}
@Override
public String getStartupMessage()
{
return COMMUNITY_LICENCE +
"This transformer uses Tika from Apache. See the license at http://www.apache.org/licenses/LICENSE-2.0. or in /Apache\\ 2.0.txt\n" +
"This transformer uses ExifTool by Phil Harvey. See license at https://exiftool.org/#license. or in /Perl-Artistic-License.txt";
}
@Override
public TransformConfig getTransformConfig()
{
return transformConfigResourceReader.read("classpath:tika_engine_config.json");
}
@Override
public ProbeTestTransform getLivenessAndReadinessProbeTestTransform()
{
return new ProbeTestTransform("quick.pdf", "quick.txt",
MIMETYPE_PDF, MIMETYPE_TEXT_PLAIN, Collections.emptyMap(),
60, 16, 400, 10240, 60 * 30 + 1, 60 * 15 + 20);
}
}

View File

@@ -0,0 +1,525 @@
/*
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2022 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
* -
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* -
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
* -
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transform.tika.metadataExtractors;
import org.alfresco.transform.base.CustomTransformer;
import org.alfresco.transform.base.metadataExtractors.AbstractMetadataExtractor;
import org.alfresco.transform.common.TransformException;
import org.apache.tika.embedder.Embedder;
import org.apache.tika.extractor.DocumentSelector;
import org.apache.tika.metadata.DublinCore;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.Property;
import org.apache.tika.metadata.TikaCoreProperties;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.parser.Parser;
import org.apache.tika.sax.ContentHandlerDecorator;
import org.apache.tika.sax.XHTMLContentHandler;
import org.apache.tika.sax.xpath.Matcher;
import org.apache.tika.sax.xpath.MatchingContentHandler;
import org.apache.tika.sax.xpath.XPathParser;
import org.joda.time.DateTimeZone;
import org.joda.time.format.DateTimeFormat;
import org.joda.time.format.DateTimeFormatter;
import org.joda.time.format.DateTimeFormatterBuilder;
import org.joda.time.format.DateTimeParser;
import org.slf4j.Logger;
import org.xml.sax.Attributes;
import org.xml.sax.ContentHandler;
import org.xml.sax.Locator;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.Serializable;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashMap;
import java.util.LinkedHashSet;
import java.util.Locale;
import java.util.Map;
import java.util.Objects;
import java.util.Set;
import java.util.stream.Collectors;
import java.util.stream.Stream;
/**
* The parent of all Metadata Extractors which use Apache Tika under the hood. This handles all the
* common parts of processing the files, and the common mappings.
* <pre>
* <b>author:</b> -- cm:author
* <b>title:</b> -- cm:title
* <b>subject:</b> -- cm:description
* <b>created:</b> -- cm:created
* <b>comments:</b>
* </pre>
*
* @author Nick Burch
* @author adavis
*/
public abstract class AbstractTikaMetadataExtractor extends AbstractMetadataExtractor implements CustomTransformer
{
protected static final String KEY_AUTHOR = "author";
protected static final String KEY_TITLE = "title";
protected static final String KEY_SUBJECT = "subject";
protected static final String KEY_CREATED = "created";
protected static final String KEY_DESCRIPTION = "description";
protected static final String KEY_COMMENTS = "comments";
protected static final String KEY_TAGS = DublinCore.SUBJECT.getName();
private static final String METADATA_SEPARATOR = ",";
private final DateTimeFormatter tikaUTCDateFormater;
private final DateTimeFormatter tikaDateFormater;
public AbstractTikaMetadataExtractor(Type type, Logger logger)
{
super(type, logger);
// TODO Once TIKA-451 is fixed this list will get nicer
DateTimeParser[] parsersUTC = {
DateTimeFormat.forPattern("yyyy-MM-dd'T'HH:mm:ss'Z'").getParser(),
DateTimeFormat.forPattern("yyyy-MM-dd'T'HH:mm:ssZ").getParser()
};
DateTimeParser[] parsers = {
DateTimeFormat.forPattern("yyyy-MM-dd'T'HH:mm:ss").getParser(),
DateTimeFormat.forPattern("yyyy-MM-dd").getParser(),
DateTimeFormat.forPattern("yyyy/MM/dd HH:mm:ss").getParser(),
DateTimeFormat.forPattern("yyyy/MM/dd").getParser(),
DateTimeFormat.forPattern("EEE MMM dd hh:mm:ss zzz yyyy").getParser()
};
tikaUTCDateFormater = new DateTimeFormatterBuilder().append(null, parsersUTC).toFormatter().withZone(DateTimeZone.UTC);
tikaDateFormater = new DateTimeFormatterBuilder().append(null, parsers).toFormatter();
}
/**
* Version which also tries the ISO-8601 formats (in order..),
* and similar formats, which Tika makes use of
*/
protected Serializable makeDate(String dateStr)
{
// Try our formats first, in order
try
{
return this.tikaUTCDateFormater.parseDateTime(dateStr).toDate();
}
catch (IllegalArgumentException ignore) {}
try
{
return this.tikaUTCDateFormater.withLocale(Locale.US).parseDateTime(dateStr).toDate();
}
catch (IllegalArgumentException ignore) {}
try
{
return this.tikaDateFormater.parseDateTime(dateStr).toDate();
}
catch (IllegalArgumentException ignore) {}
try
{
return this.tikaDateFormater.withLocale(Locale.US).parseDateTime(dateStr).toDate();
}
catch (IllegalArgumentException ignore) {}
// Fall back to the normal ones: We just return the String as AbstractMappingMetadataExtracter
// convertSystemPropertyValues in the repo will do the conversion that was previously done here.
return dateStr;
}
/**
* Returns the correct Tika Parser to process the document.
* If you don't know which you want, use {@link TikaAutoMetadataExtractor}
* which makes use of the Tika auto-detection.
*/
protected abstract Parser getParser();
/**
* Returns the Tika Embedder to modify
* the document.
*
* @return the Tika embedder
*/
protected Embedder getEmbedder()
{
// TODO make this an abstract method once more extracters support embedding
return null;
}
/**
* Do we care about the contents of the
* extracted header, or nothing at all?
*/
protected boolean needHeaderContents()
{
return false;
}
/**
* Allows implementation specific mappings to be done.
*/
protected Map<String, Serializable> extractSpecific(Metadata metadata,
Map<String, Serializable> properties, Map<String,String> headers)
{
return properties;
}
/**
* Gets the document selector, used for determining whether to parse embedded resources,
* null by default so parse all.
*/
protected DocumentSelector getDocumentSelector(Metadata metadata, String targetMimeType)
{
return null;
}
/**
* By default returns a new ParseContent
*/
private ParseContext buildParseContext(Metadata metadata, String sourceMimeType)
{
ParseContext context = new ParseContext();
DocumentSelector selector = getDocumentSelector(metadata, sourceMimeType);
if (selector != null)
{
context.set(DocumentSelector.class, selector);
}
return context;
}
@Override
public Map<String, Serializable> extractMetadata(String sourceMimetype, Map<String, String> transformOptions,
File sourceFile) throws Exception
{
Map<String, Serializable> rawProperties = new HashMap<>();
try (InputStream is = new FileInputStream(sourceFile))
{
Parser parser = getParser();
Metadata metadata = new Metadata();
metadata.add(Metadata.CONTENT_TYPE, sourceMimetype);
ParseContext context = buildParseContext(metadata, sourceMimetype);
ContentHandler handler;
Map<String,String> headers = null;
if (needHeaderContents())
{
MapCaptureContentHandler headerCapture =
new MapCaptureContentHandler();
headers = headerCapture.tags;
handler = new HeadContentHandler(headerCapture);
}
else
{
handler = new NullContentHandler();
}
parser.parse(is, handler, metadata, context);
// First up, copy all the Tika metadata over
// This allows people to map any of the Tika
// keys onto their own content model
for (String tikaKey : metadata.names())
{
// TODO review this change (part of MNT-15267) - should we really force string concatenation here !?
putRawValue(tikaKey, getMetadataValue(metadata, Property.internalText(tikaKey)), rawProperties);
}
// Now, map the common Tika metadata keys onto
// the common Alfresco metadata keys. This allows
// existing mapping properties files to continue
// to work without needing any changes
// The simple ones
putRawValue(KEY_AUTHOR, getMetadataValue(metadata, TikaCoreProperties.CREATOR), rawProperties);
putRawValue(KEY_TITLE, getMetadataValue(metadata, TikaCoreProperties.TITLE), rawProperties);
putRawValue(KEY_COMMENTS, getMetadataValue(metadata, TikaCoreProperties.COMMENTS), rawProperties);
// Tags
putRawValue(KEY_TAGS, getMetadataValues(metadata, KEY_TAGS), rawProperties);
// Get the subject and description, despite things not
// being nearly as consistent as one might hope
String subject = getMetadataValue(metadata, TikaCoreProperties.SUBJECT);
String description = getMetadataValue(metadata, TikaCoreProperties.DESCRIPTION);
if (subject != null && description != null)
{
putRawValue(KEY_DESCRIPTION, description, rawProperties);
putRawValue(KEY_SUBJECT, subject, rawProperties);
}
else if (subject != null)
{
putRawValue(KEY_DESCRIPTION, subject, rawProperties);
putRawValue(KEY_SUBJECT, subject, rawProperties);
}
else if (description != null)
{
putRawValue(KEY_DESCRIPTION, description, rawProperties);
putRawValue(KEY_SUBJECT, description, rawProperties);
}
// Try for the dates two different ways too
if (metadata.get(TikaCoreProperties.CREATED) != null)
{
putRawValue(KEY_CREATED, metadata.get(TikaCoreProperties.CREATED), rawProperties);
}
else if (metadata.get(TikaCoreProperties.MODIFIED) != null)
{
putRawValue(KEY_CREATED, metadata.get(TikaCoreProperties.MODIFIED), rawProperties);
}
// If people created a specific instance
// (eg OfficeMetadataExtractor), then allow that
// instance to map the Tika keys onto its
// existing namespace so that older properties
// files continue to map correctly
rawProperties = extractSpecific(metadata, rawProperties, headers);
}
return rawProperties;
}
public void embedMetadata(String sourceMimetype, Map<String, String> transformOptions,
String sourceEncoding, InputStream inputStream,
String targetEncoding, OutputStream outputStream) throws Exception
{
// TODO
throw new TransformException(500, "TODO embedMetadata");
}
/**
* @deprecated The content repository's TikaPoweredMetadataExtracter provides no non test implementations.
* This code exists in case there are custom implementations, that need to be converted to T-Engines.
* It is simply a copy and paste from the content repository and has received limited testing.
*/
@Override
public void embedMetadata(String sourceMimetype, String targetMimetype, Map<String, String> transformOptions,
File sourceFile, File targetFile) throws Exception
{
Embedder embedder = getEmbedder();
if (embedder == null)
{
return;
}
Metadata metadataToEmbed = getTikaMetadata(transformOptions);
try (InputStream inputStream = new FileInputStream(sourceFile);
OutputStream outputStream = new FileOutputStream(targetFile))
{
embedder.embed(metadataToEmbed, inputStream, outputStream, null);
}
}
private Metadata getTikaMetadata(Map<String, String> transformOptions)
{
Metadata metadataToEmbed = new Metadata();
Map<String, Serializable> properties = getMetadata(transformOptions);
for (String metadataKey : properties.keySet())
{
Serializable value = properties.get(metadataKey);
if (value == null)
{
continue;
}
if (value instanceof Collection<?>)
{
for (Object singleValue : (Collection<?>) value)
{
try
{
metadataToEmbed.add(metadataKey, (String)singleValue);
}
catch (ClassCastException e)
{
logger.info("Could not convert " + metadataKey + ": " + e.getMessage());
}
}
}
else
{
try
{
metadataToEmbed.add(metadataKey, (String)value);
}
catch (ClassCastException e)
{
logger.info("Could not convert " + metadataKey + ": " + e.getMessage());
}
}
}
return metadataToEmbed;
}
private Serializable getMetadataValues(Metadata metadata, String key)
{
// Use Set to prevent duplicates.
Set<String> valuesSet = new LinkedHashSet<String>();
String[] values = metadata.getValues(key);
for (int i = 0; i < values.length; i++)
{
String[] parts = values[i].split(METADATA_SEPARATOR);
for (String subPart : parts)
{
valuesSet.add(subPart.trim());
}
}
Object[] objArrayValues = valuesSet.toArray();
values = Arrays.copyOf(objArrayValues, objArrayValues.length, String[].class);
return values.length == 0 ? null : (values.length == 1 ? values[0] : values);
}
private String getMetadataValue(Metadata metadata, Property key)
{
if (metadata.isMultiValued(key))
{
return distinct(metadata.getValues(key)).collect(Collectors.joining(", "));
}
else
{
return metadata.get(key);
}
}
protected static Stream<String> distinct(final String[] strings)
{
return Stream.of(strings)
.filter(Objects::nonNull)
.map(String::strip)
.filter(s -> !s.isEmpty())
.distinct();
}
/**
* This content handler will capture entries from within
* the header of the Tika content XHTML, but ignore the
* rest.
*/
protected static class HeadContentHandler extends ContentHandlerDecorator
{
/**
* XHTML XPath parser.
*/
private static final XPathParser PARSER =
new XPathParser("xhtml", XHTMLContentHandler.XHTML);
/**
* The XPath matcher used to select the XHTML body contents.
*/
private static final Matcher MATCHER =
PARSER.parse("/xhtml:html/xhtml:head/descendant:node()");
/**
* Creates a content handler that passes all XHTML body events to the
* given underlying content handler.
*
* @param handler content handler
*/
protected HeadContentHandler(ContentHandler handler)
{
super(new MatchingContentHandler(handler, MATCHER));
}
}
/**
* This content handler will grab all tags and attributes,
* and record the textual content of the last seen one
* of them.
* Normally only used with {@link HeadContentHandler}
*/
protected static class MapCaptureContentHandler implements ContentHandler
{
protected Map<String, String> tags = new HashMap<>();
private StringBuffer text;
public void characters(char[] ch, int start, int len)
{
if (text != null)
{
text.append(ch, start, len);
}
}
public void endElement(String namespace, String localname, String qname)
{
if (text != null && text.length() > 0)
{
tags.put(qname, text.toString());
}
text = null;
}
public void startElement(String namespace, String localname, String qname, Attributes attrs)
{
for(int i=0; i<attrs.getLength(); i++)
{
tags.put(attrs.getQName(i), attrs.getValue(i));
}
text = new StringBuffer();
}
public void endDocument() {}
public void endPrefixMapping(String paramString) {}
public void ignorableWhitespace(char[] paramArrayOfChar, int paramInt1, int paramInt2) {}
public void processingInstruction(String paramString1, String paramString2) {}
public void setDocumentLocator(Locator paramLocator) {}
public void skippedEntity(String paramString) {}
public void startDocument() {}
public void startPrefixMapping(String paramString1, String paramString2) {}
}
/**
* A content handler that ignores all the content it finds.
* Normally used when we only want the metadata, and don't
* care about the file contents.
*/
protected static class NullContentHandler implements ContentHandler
{
public void characters(char[] paramArrayOfChar, int paramInt1, int paramInt2) {}
public void endDocument() {}
public void endElement(String paramString1, String paramString2, String paramString3) {}
public void endPrefixMapping(String paramString) {}
public void ignorableWhitespace(char[] paramArrayOfChar, int paramInt1, int paramInt2) {}
public void processingInstruction(String paramString1, String paramString2) {}
public void setDocumentLocator(Locator paramLocator) {}
public void skippedEntity(String paramString) {}
public void startDocument() {}
public void startElement(String paramString1, String paramString2,
String paramString3, Attributes paramAttributes) {}
public void startPrefixMapping(String paramString1, String paramString2) {}
}
}

View File

@@ -0,0 +1,86 @@
/*
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2022 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
* -
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* -
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
* -
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transform.tika.metadataExtractors;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.TikaCoreProperties;
import org.apache.tika.parser.Parser;
import org.apache.tika.parser.dwg.DWGParser;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.stereotype.Component;
import java.io.Serializable;
import java.util.Map;
import static org.alfresco.transform.base.metadataExtractors.AbstractMetadataExtractor.Type.EXTRACTOR;
/**
* {@code "application/dwg"} and {@code "image/vnd.dwg"} metadata extractor.
*
* Configuration: (see DWGMetadataExtractor_metadata_extract.properties and tika_engine_config.json)
*
* <pre>
* <b>title:</b> -- cm:title
* <b>description:</b> -- cm:description
* <b>author:</b> -- cm:author
* <b>keywords:</b>
* <b>comments:</b>
* <b>lastauthor:</b>
* </pre>
*
* @author Nick Burch
* @author adavis
*/
@Component
public class DWGMetadataExtractor extends AbstractTikaMetadataExtractor
{
private static final Logger logger = LoggerFactory.getLogger(DWGMetadataExtractor.class);
private static final String KEY_KEYWORD = "keyword";
private static final String KEY_LAST_AUTHOR = "lastAuthor";
public DWGMetadataExtractor()
{
super(EXTRACTOR, logger);
}
@Override
protected Map<String, Serializable> extractSpecific(Metadata metadata,
Map<String, Serializable> properties, Map<String,String> headers)
{
putRawValue(KEY_KEYWORD, metadata.get(TikaCoreProperties.SUBJECT), properties);
putRawValue(KEY_LAST_AUTHOR, metadata.get(TikaCoreProperties.MODIFIED), properties);
return properties;
}
@Override
protected Parser getParser()
{
return new DWGParser();
}
}

View File

@@ -0,0 +1,162 @@
/*
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2022 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
* -
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* -
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
* -
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transform.tika.metadataExtractors;
import org.alfresco.transform.tika.parsers.ExifToolParser;
import org.apache.commons.lang3.StringUtils;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.parser.Parser;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.stereotype.Component;
import java.io.Serializable;
import java.util.Arrays;
import java.util.Map;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import static org.alfresco.transform.base.metadataExtractors.AbstractMetadataExtractor.Type.EXTRACTOR;
@Component
public class IPTCMetadataExtractor extends AbstractTikaMetadataExtractor
{
private static final Logger logger = LoggerFactory.getLogger(IPTCMetadataExtractor.class);
private static Set<String> IPTC_DATE_KEYS = Set.of("XMP-photoshop:DateCreated", "XMP-iptcExt:ArtworkDateCreated");
private static final Pattern YEAR_IPTC = Pattern.compile("(\\d{4}[:|-]\\d{2}[:|-]\\d{2})");
private ExifToolParser parser;
public IPTCMetadataExtractor()
{
super(EXTRACTOR, logger);
}
@Override
protected Parser getParser()
{
if (this.parser == null) {
this.parser = new ExifToolParser();
}
return this.parser;
}
/**
* Because some of the mimetypes that IPTCMetadataExtractor now parse, were previously handled
* by TikaAutoMetadataExtractor we call the TikaAutoMetadataExtractor.extractSpecific method to
* ensure that the returned properties contains the expected entries.
*/
@Override
protected Map<String, Serializable> extractSpecific(Metadata metadata, Map<String, Serializable> properties,
Map<String, String> headers)
{
properties = new TikaAutoMetadataExtractor().extractSpecific(metadata, properties, headers);
ExifToolParser etParser = (ExifToolParser)this.getParser();
if (etParser.getSeparator()!=null)
{
for (String key : properties.keySet())
{
if (properties.get(key) instanceof String)
{
String value = (String) properties.get(key);
String separator = etParser.getSeparator();
if (value.contains(separator))
{
if (value.contains(String.format("\"%s\"",separator)))
{
separator = String.format("\"%s\"",separator);
}
String [] values = StringUtils.splitByWholeSeparator(value, separator);
// Change dateTime format. MM converted ':' to '-'
if (IPTC_DATE_KEYS.contains(key)){
values = iptcToIso8601DateStrings(values);
}
putRawValue(key, (Serializable) Arrays.asList(values), properties);
}
else if (IPTC_DATE_KEYS.contains(key)) {
// Handle property with a single date string
putRawValue(key, (Serializable) iptcToIso8601DateString(value), properties);
}
}
}
}
return properties;
}
/**
* Converts a date or date time strings into Iso8601 format <p>
*
* @param dateStrings
* @return dateStrings in Iso8601 format
* @see #iptcToIso8601DateString
*/
protected String[] iptcToIso8601DateStrings(String[] dateStrings)
{
for (int i = 0; i < dateStrings.length; i++)
{
dateStrings[i] = iptcToIso8601DateString(dateStrings[i]);
}
return dateStrings;
}
/**
* Converts a date or date time string into Iso8601 format <p>
* Converts any ':' in the year portion of a date string characters to '-'. <p>
* Expects the year in the format YYYY:MM:DD or YYYY-MM-DD <p>
* Will add the correct delimiter, 'T', to any dateTime strings, where | can be any char other than ,'T':
* YYYY:MM:DD|HH:mm:ss.... or YYYY-MM-DD|HH:mm:ss....
* <p>
* Examples: <p><ul>
* <li>"1919:10:16" will convert to "1919-10-16"</li>
* <li>"1901:02:01 00:00:00.000Z" will convert to "1901-02-01T00:00:00.000Z"</li>
* <li>"2001:02:01 16:15+00:00" will convert to "2001-02-01T16:15+00:00"</li>
* <li>"2021-06-11 05:36-01:00" will convert to "2021-06-11T05:36-01:00"</li>
* </ul>
* @param dateStr
* @return dateStr in Iso8601 format
*/
protected String iptcToIso8601DateString(String dateStr)
{
char timeSeparator = 'T';
Matcher yearMatcher = YEAR_IPTC.matcher(dateStr);
if (yearMatcher.find())
{
String year = yearMatcher.group(1);
dateStr = yearMatcher.replaceFirst(year.replaceAll(":", "-"));
if (dateStr.length()>year.length() && dateStr.charAt(year.length())!=timeSeparator)
{
dateStr = dateStr.replace(dateStr.charAt(year.length()), timeSeparator);
}
}
return dateStr;
}
}

View File

@@ -0,0 +1,114 @@
/*
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2022 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
* -
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* -
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
* -
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transform.tika.metadataExtractors;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.TikaCoreProperties;
import org.apache.tika.metadata.XMPDM;
import org.apache.tika.parser.Parser;
import org.apache.tika.parser.mp3.Mp3Parser;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.stereotype.Component;
import java.io.Serializable;
import java.util.Map;
/**
* MP3 file metadata extractor.
*
* Configuration: (see MP3MetadataExtractor_metadata_extract.properties and tika_engine_config.json)
*
* <pre>
* <b>songTitle:</b> -- cm:title
* <b>albumTitle:</b> -- audio:album
* <b>artist:</b> -- audio:artist, cm:author
* <b>description:</b> -- cm:description
* <b>comment:</b> --
* <b>yearReleased:</b> -- audio:releaseDate
* <b>trackNumber:</b> -- audio:trackNumber
* <b>genre:</b> -- audio:genre
* <b>composer:</b> -- audio:composer
* <b>lyrics:</b> --
* </pre>
*
* Note - XMPDM metadata keys are also emitted, in common with
* the other Tika powered extracters
*
* Uses Apache Tika
*
* @author Nick Burch
* @author adavis
*/
@Component
public class MP3MetadataExtractor extends TikaAudioMetadataExtractor
{
private static final Logger logger = LoggerFactory.getLogger(MP3MetadataExtractor.class);
private static final String KEY_SONG_TITLE = "songTitle";
private static final String KEY_ALBUM_TITLE = "albumTitle";
private static final String KEY_ARTIST = "artist";
private static final String KEY_COMMENT = "comment";
private static final String KEY_YEAR_RELEASED = "yearReleased";
private static final String KEY_TRACK_NUMBER = "trackNumber";
private static final String KEY_GENRE = "genre";
private static final String KEY_COMPOSER = "composer";
public MP3MetadataExtractor()
{
super(logger);
}
@Override
protected Parser getParser()
{
return new Mp3Parser();
}
@Override
protected Map<String, Serializable> extractSpecific(Metadata metadata,
Map<String, Serializable> properties, Map<String,String> headers)
{
// Do the normal Audio mappings
super.extractSpecific(metadata, properties, headers);
// Now do the compatibility ones
// We only need these for people who had pre-existing mapping
// properties from before the proper audio model was added
putRawValue(KEY_ALBUM_TITLE, metadata.get(XMPDM.ALBUM), properties);
putRawValue(KEY_SONG_TITLE, metadata.get(TikaCoreProperties.TITLE), properties);
putRawValue(KEY_ARTIST, metadata.get(XMPDM.ARTIST), properties);
putRawValue(KEY_COMMENT, metadata.get(XMPDM.LOG_COMMENT), properties);
putRawValue(KEY_TRACK_NUMBER, metadata.get(XMPDM.TRACK_NUMBER), properties);
putRawValue(KEY_GENRE, metadata.get(XMPDM.GENRE), properties);
putRawValue(KEY_YEAR_RELEASED, metadata.get(XMPDM.RELEASE_DATE), properties);
putRawValue(KEY_COMPOSER, metadata.get(XMPDM.COMPOSER), properties);
// All done
return properties;
}
}

View File

@@ -0,0 +1,113 @@
/*
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2022 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
* -
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* -
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
* -
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transform.tika.metadataExtractors;
import org.apache.tika.metadata.Message;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.TikaCoreProperties;
import org.apache.tika.parser.Parser;
import org.apache.tika.parser.microsoft.OfficeParser;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.stereotype.Component;
import java.io.Serializable;
import java.util.Map;
import static org.alfresco.transform.base.metadataExtractors.AbstractMetadataExtractor.Type.EXTRACTOR;
/**
* Outlook MAPI format email metadata extractor.
*
* Configuration: (see MailMetadataExtractor_metadata_extract.properties and tika_engine_config.json)
*
* <pre>
* <b>sentDate:</b> -- cm:sentdate
* <b>originator:</b> -- cm:originator, cm:author
* <b>addressee:</b> -- cm:addressee
* <b>addressees:</b> -- cm:addressees
* <b>subjectLine:</b> -- cm:subjectline, cm:description
* <b>toNames:</b> --
* <b>ccNames:</b> --
* <b>bccNames:</b> --
* </pre>
*
* TIKA note - to/cc/bcc go into the html part, not the metadata.
* Also, email addresses not included as yet.
*
* @author Kevin Roast
* @author adavis
*/
@Component
public class MailMetadataExtractor extends AbstractTikaMetadataExtractor
{
private static final Logger logger = LoggerFactory.getLogger(MailMetadataExtractor.class);
private static final String KEY_SENT_DATE = "sentDate";
private static final String KEY_ORIGINATOR = "originator";
private static final String KEY_ADDRESSEE = "addressee";
private static final String KEY_ADDRESSEES = "addressees";
private static final String KEY_SUBJECT = "subjectLine";
private static final String KEY_TO_NAMES = "toNames";
private static final String KEY_CC_NAMES = "ccNames";
private static final String KEY_BCC_NAMES = "bccNames";
public MailMetadataExtractor()
{
super(EXTRACTOR, logger);
}
@Override
protected Parser getParser()
{
// The office parser does Outlook as well as Word, Excel etc
return new OfficeParser();
}
@Override
protected Map<String, Serializable> extractSpecific(Metadata metadata,
Map<String, Serializable> properties, Map<String,String> headers)
{
putRawValue(KEY_ORIGINATOR, metadata.get(TikaCoreProperties.CREATOR), properties);
putRawValue(KEY_SUBJECT, metadata.get(TikaCoreProperties.TITLE), properties);
putRawValue(KEY_DESCRIPTION, metadata.get(TikaCoreProperties.SUBJECT), properties);
putRawValue(KEY_SENT_DATE, metadata.get(TikaCoreProperties.MODIFIED), properties);
// Store the TO, but not cc/bcc in the addressee field
putRawValue(KEY_ADDRESSEE, metadata.get(Message.MESSAGE_TO), properties);
// Store each of To, CC and BCC in their own fields
putRawValue(KEY_TO_NAMES, metadata.getValues(Message.MESSAGE_TO), properties);
putRawValue(KEY_CC_NAMES, metadata.getValues(Message.MESSAGE_CC), properties);
putRawValue(KEY_BCC_NAMES, metadata.getValues(Message.MESSAGE_BCC), properties);
// But store all email addresses (to/cc/bcc) in the addresses field
putRawValue(KEY_ADDRESSEES, metadata.getValues(Message.MESSAGE_RECIPIENT_ADDRESS), properties);
return properties;
}
}

View File

@@ -0,0 +1,116 @@
/*
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2022 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
* -
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* -
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
* -
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transform.tika.metadataExtractors;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.Office;
import org.apache.tika.metadata.TikaCoreProperties;
import org.apache.tika.parser.Parser;
import org.apache.tika.parser.microsoft.OfficeParser;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.stereotype.Component;
import java.io.Serializable;
import java.util.Map;
import static org.alfresco.transform.base.metadataExtractors.AbstractMetadataExtractor.Type.EXTRACTOR;
/**
* Office file format metadata extractor.
*
* Configuration: (see OfficeMetadataExtractor_metadata_extract.properties and tika_engine_config.json)
*
* This extractor uses the POI library to extract the following:
* <pre>
* <b>author:</b> -- cm:author
* <b>title:</b> -- cm:title
* <b>subject:</b> -- cm:description
* <b>createDateTime:</b> -- cm:created
* <b>lastSaveDateTime:</b> -- cm:modified
* <b>comments:</b>
* <b>editTime:</b>
* <b>format:</b>
* <b>keywords:</b>
* <b>lastAuthor:</b>
* <b>lastPrinted:</b>
* <b>osVersion:</b>
* <b>thumbnail:</b>
* <b>pageCount:</b>
* <b>wordCount:</b>
* </pre>
*
* Uses Apache Tika
*
* @author Derek Hulley
* @author Nick Burch
* @author adavis
*/
@Component
public class OfficeMetadataExtractor extends AbstractTikaMetadataExtractor
{
private static final Logger logger = LoggerFactory.getLogger(OfficeMetadataExtractor.class);
public static final String KEY_CREATE_DATETIME = "createDateTime";
public static final String KEY_LAST_SAVE_DATETIME = "lastSaveDateTime";
public static final String KEY_EDIT_TIME = "editTime";
public static final String KEY_FORMAT = "format";
public static final String KEY_KEYWORDS = "keywords";
public static final String KEY_LAST_AUTHOR = "lastAuthor";
public static final String KEY_LAST_PRINTED = "lastPrinted";
public static final String KEY_PAGE_COUNT = "pageCount";
public static final String KEY_PARAGRAPH_COUNT = "paragraphCount";
public static final String KEY_WORD_COUNT = "wordCount";
public OfficeMetadataExtractor()
{
super(EXTRACTOR, logger);
}
@Override
protected Parser getParser()
{
return new OfficeParser();
}
@Override
protected Map<String, Serializable> extractSpecific(Metadata metadata,
Map<String, Serializable> properties, Map<String,String> headers)
{
putRawValue(KEY_CREATE_DATETIME, metadata.get(TikaCoreProperties.CREATED), properties);
putRawValue(KEY_LAST_SAVE_DATETIME, metadata.get(TikaCoreProperties.MODIFIED), properties);
putRawValue(KEY_EDIT_TIME, metadata.get(TikaCoreProperties.MODIFIED), properties);
putRawValue(KEY_FORMAT, metadata.get(TikaCoreProperties.FORMAT), properties);
putRawValue(KEY_KEYWORDS, metadata.get(TikaCoreProperties.SUBJECT), properties);
putRawValue(KEY_LAST_AUTHOR, metadata.get(TikaCoreProperties.MODIFIER), properties);
putRawValue(KEY_LAST_PRINTED, metadata.get(TikaCoreProperties.PRINT_DATE), properties);
putRawValue(KEY_PAGE_COUNT, metadata.get(Office.PAGE_COUNT), properties);
putRawValue(KEY_PARAGRAPH_COUNT, metadata.get(Office.PARAGRAPH_COUNT), properties);
putRawValue(KEY_WORD_COUNT, metadata.get(Office.WORD_COUNT), properties);
return properties;
}
}

View File

@@ -0,0 +1,174 @@
/*
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2022 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
* -
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* -
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
* -
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transform.tika.metadataExtractors;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.TikaCoreProperties;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.parser.Parser;
import org.apache.tika.parser.odf.OpenDocumentMetaParser;
import org.apache.tika.parser.odf.OpenDocumentParser;
import org.apache.tika.parser.xml.ElementMetadataHandler;
import org.apache.tika.sax.TeeContentHandler;
import org.joda.time.format.DateTimeFormat;
import org.joda.time.format.DateTimeFormatter;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.stereotype.Component;
import org.xml.sax.ContentHandler;
import java.io.Serializable;
import java.util.Date;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.stream.Collectors;
import static org.alfresco.transform.base.metadataExtractors.AbstractMetadataExtractor.Type.EXTRACTOR;
import static org.apache.tika.metadata.DublinCore.NAMESPACE_URI_DC;
/**
* {@code "application/vnd.oasis.opendocument..."} and {@code "applicationvnd.oasis.opendocument..."} metadata extractor.
*
* Configuration: (see OpenDocumentMetadataExtractor_metadata_extract.properties and tika_engine_config.json)
*
* <pre>
* <b>creationDate:</b> -- cm:created
* <b>creator:</b> -- cm:author
* <b>date:</b>
* <b>description:</b> -- cm:description
* <b>generator:</b>
* <b>initialCreator:</b>
* <b>keyword:</b>
* <b>language:</b>
* <b>printDate:</b>
* <b>printedBy:</b>
* <b>subject:</b>
* <b>title:</b> -- cm:title
* <b>All user properties</b>
* </pre>
*
* Uses Apache Tika
*
* @author Antti Jokipii
* @author Derek Hulley
* @author adavis
*/
@Component
public class OpenDocumentMetadataExtractor extends AbstractTikaMetadataExtractor
{
private static final Logger logger = LoggerFactory.getLogger(OpenDocumentMetadataExtractor.class);
private static final String KEY_CREATION_DATE = "creationDate";
private static final String KEY_CREATOR = "creator";
private static final String KEY_DATE = "date";
private static final String KEY_GENERATOR = "generator";
private static final String KEY_INITIAL_CREATOR = "initialCreator";
private static final String KEY_KEYWORD = "keyword";
private static final String KEY_LANGUAGE = "language";
private static final String KEY_ALFRESCO_CREATOR = "_alfresco:creator";
private static final String CUSTOM_PREFIX = "custom:";
private static final DateTimeFormatter dateFormatter = DateTimeFormat.forPattern("yyyy-MM-dd'T'HH:mm:ss");
public OpenDocumentMetadataExtractor()
{
super(EXTRACTOR, logger);
}
@Override
protected Parser getParser()
{
OpenDocumentParser parser = new OpenDocumentParser();
parser.setMetaParser(new OpenDocumentMetaParser() {
@Override
protected ContentHandler getContentHandler(ContentHandler ch, Metadata md, ParseContext context)
{
final ContentHandler superHandler = super.getContentHandler(ch, md, context);
final ContentHandler creatorHandler = new ElementMetadataHandler(NAMESPACE_URI_DC, KEY_CREATOR, md, KEY_ALFRESCO_CREATOR);
return new TeeContentHandler(superHandler, creatorHandler);
}
});
return parser;
}
@Override
protected Map<String, Serializable> extractSpecific(Metadata metadata,
Map<String, Serializable> properties, Map<String, String> headers)
{
putRawValue(KEY_CREATION_DATE, getDateOrNull(metadata.get(TikaCoreProperties.CREATED)), properties);
final String creator = getCreator(metadata);
putRawValue(KEY_CREATOR, creator, properties);
putRawValue(KEY_AUTHOR, creator, properties);
putRawValue(KEY_DATE, getDateOrNull(metadata.get(TikaCoreProperties.MODIFIED)), properties);
putRawValue(KEY_DESCRIPTION, metadata.get(TikaCoreProperties.DESCRIPTION), properties);
putRawValue(KEY_GENERATOR, metadata.get("generator"), properties);
putRawValue(KEY_INITIAL_CREATOR, metadata.get("initial-creator"), properties);
putRawValue(KEY_KEYWORD, metadata.get(TikaCoreProperties.SUBJECT), properties);
putRawValue(KEY_LANGUAGE, metadata.get(TikaCoreProperties.LANGUAGE), properties);
// Handle user-defined properties dynamically
Map<String, Set<String>> mapping = super.getExtractMapping();
for (String key : mapping.keySet())
{
if (metadata.get(CUSTOM_PREFIX + key) != null)
{
putRawValue(key, metadata.get(CUSTOM_PREFIX + key), properties);
}
}
return properties;
}
private String getCreator(Metadata metadata)
{
final List<String> creators = distinct(metadata.getValues(TikaCoreProperties.CREATOR))
.collect(Collectors.toUnmodifiableList());
if (creators.size() == 1)
{
return creators.get(0);
}
return metadata.get(KEY_ALFRESCO_CREATOR);
}
private Date getDateOrNull(String dateString)
{
if (dateString != null && dateString.length() != 0)
{
try
{
return dateFormatter.parseDateTime(dateString).toDate();
}
catch (IllegalArgumentException ignore)
{
}
}
return null;
}
}

View File

@@ -0,0 +1,79 @@
/*
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2022 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
* -
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* -
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
* -
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transform.tika.metadataExtractors;
import org.alfresco.transform.tika.transformers.Tika;
import org.apache.tika.extractor.DocumentSelector;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.parser.Parser;
import org.apache.tika.parser.pdf.PDFParser;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.stereotype.Component;
import static org.alfresco.transform.base.metadataExtractors.AbstractMetadataExtractor.Type.EXTRACTOR;
/**
* Metadata extractor for the PDF documents.
*
* Configuration: (see PdfBoxMetadataExtractor_metadata_extract.properties and tika_engine_config.json)
*
* <pre>
* <b>author:</b> -- cm:author
* <b>title:</b> -- cm:title
* <b>subject:</b> -- cm:description
* <b>created:</b> -- cm:created
* </pre>
*
* Uses Apache Tika
*
* @author Jesper Steen Møller
* @author Derek Hulley
* @author adavis
*/
@Component
public class PdfBoxMetadataExtractor extends AbstractTikaMetadataExtractor
{
private static final Logger logger = LoggerFactory.getLogger(PdfBoxMetadataExtractor.class);
public PdfBoxMetadataExtractor()
{
super(EXTRACTOR, logger);
}
@Override
protected DocumentSelector getDocumentSelector(Metadata metadata, String targetMimeType)
{
return Tika.pdfBoxEmbededDocumentSelector;
}
@Override
protected Parser getParser()
{
return new PDFParser();
}
}

View File

@@ -0,0 +1,179 @@
/*
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2022 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
* -
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* -
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
* -
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transform.tika.metadataExtractors;
import org.apache.poi.ooxml.POIXMLProperties;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
import org.apache.tika.embedder.Embedder;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.mime.MediaType;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.parser.Parser;
import org.apache.tika.parser.microsoft.ooxml.OOXMLParser;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.stereotype.Component;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.util.Collections;
import java.util.Set;
import java.util.StringJoiner;
import static org.alfresco.transform.base.metadataExtractors.AbstractMetadataExtractor.Type.EXTRACTOR;
/**
* POI-based metadata extractor for Office 07 documents. See http://poi.apache.org/ for information on POI.
*
* Configuration: (see PoiMetadataExtractor_metadata_extract.properties and tika_engine_config.json)
*
* <pre>
* <b>author:</b> -- cm:author
* <b>title:</b> -- cm:title
* <b>subject:</b> -- cm:description
* <b>created:</b> -- cm:created
* <b>Any custom property:</b> -- [not mapped]
* </pre>
*
* Uses Apache Tika
*
* Also includes a sample POI metadata embedder to demonstrate it is possible to add custom T-Engines that will add
* metadata. This is not production code so no supported mimetypes exist in the {@code tika_engine_config.json}.
* Adding the following would make it available:
*
* <pre>
* {
* "transformOptions": {
* ...
* "metadataEmbedOptions": [
* {"value": {"name": "metadata", "required": true}}
* ]
* },
* "transformers": [
* ...
* {
* "transformerName": "SamplePoiMetadataEmbedder",
* "supportedSourceAndTargetList": [
* ...
* {"sourceMediaType": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", "targetMediaType": "alfresco-metadata-embed"}
* ],
* "transformOptions": [
* "metadataEmbedOptions"
* ]
* }
* ]
* }
* </pre>
* @author Nick Burch
* @author Neil McErlean
* @author Dmitry Velichkevich
* @author adavis
*/
@Component
public class PoiMetadataExtractor extends AbstractTikaMetadataExtractor
{
private static final Logger logger = LoggerFactory.getLogger(PoiMetadataExtractor.class);
public PoiMetadataExtractor()
{
super(EXTRACTOR, logger);
}
@Override
protected Parser getParser()
{
return new OOXMLParser();
}
@Override
protected Embedder getEmbedder()
{
return new SamplePoiEmbedder();
}
private static class SamplePoiEmbedder implements Embedder
{
private static final Set<MediaType> SUPPORTED_EMBED_TYPES =
Collections.singleton(MediaType.application("vnd.openxmlformats-officedocument.spreadsheetml.sheet"));
@Override
public Set<MediaType> getSupportedEmbedTypes(ParseContext parseContext)
{
return SUPPORTED_EMBED_TYPES;
}
@Override
public void embed(Metadata metadata, InputStream inputStream, OutputStream outputStream, ParseContext parseContext)
throws IOException
{
XSSFWorkbook workbook = new XSSFWorkbook(inputStream);
POIXMLProperties props = workbook.getProperties();
POIXMLProperties.CoreProperties coreProp = props.getCoreProperties();
POIXMLProperties.CustomProperties custProp = props.getCustomProperties();
for (String name : metadata.names())
{
metadata.isMultiValued("description");
String value = null;
if (metadata.isMultiValued(name))
{
String[] values = metadata.getValues(name);
StringJoiner sj = new StringJoiner(", ");
for (String s : values)
{
sj.add(s);
}
value = sj.toString();
}
else
{
value = metadata.get(name);
}
switch (name)
{
case "author":
coreProp.setCreator(value);
break;
case "title":
coreProp.setTitle(value);
break;
case "description":
coreProp.setDescription(value);
break;
// There are other core values but this is sample code, so we will assume it is a custom value.
default:
custProp.addProperty(name, value);
break;
}
}
workbook.write(outputStream);
}
}
}

View File

@@ -0,0 +1,178 @@
/*
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2022 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
* -
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* -
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
* -
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transform.tika.metadataExtractors;
import org.apache.tika.config.TikaConfig;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.TikaCoreProperties;
import org.apache.tika.metadata.XMPDM;
import org.apache.tika.parser.CompositeParser;
import org.apache.tika.parser.Parser;
import org.apache.tika.parser.mp4.MP4Parser;
import org.gagravarr.tika.FlacParser;
import org.gagravarr.tika.VorbisParser;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.stereotype.Component;
import java.io.Serializable;
import java.util.Calendar;
import java.util.Map;
import static org.alfresco.transform.base.metadataExtractors.AbstractMetadataExtractor.Type.EXTRACTOR;
import static org.alfresco.transform.tika.transformers.Tika.readTikaConfig;
/**
* A Metadata Extractor which makes use of the Apache Tika Audio Parsers to extract metadata from media files.
* For backwards compatibility reasons, this doesn't handle the MP3 format, which has its own dedicated extractor
* in {@link MP3MetadataExtractor}
*
* Configuration: (see TikaAudioMetadataExtractor_metadata_extract.properties and tika_engine_config.json)
*
* <pre>
* <b>author:</b> -- cm:author
* <b>title:</b> -- cm:title
* <b>created:</b> -- cm:created
* <b>xmpDM:artist</b> -- audio:artist
* <b>xmpDM:composer</b> -- audio:composer
* <b>xmpDM:engineer</b> -- audio:engineer
* <b>xmpDM:genre</b> -- audio:genre
* <b>xmpDM:trackNumber</b> -- audio:trackNumber
* <b>xmpDM:releaseDate</b> -- audio:releaseDate
* </pre>
*
* @author Nick Burch
* @author adavis
*/
@Component
public class TikaAudioMetadataExtractor extends AbstractTikaMetadataExtractor
{
private static final Logger logger = LoggerFactory.getLogger(TikaAudioMetadataExtractor.class);
// The Audio related parsers we use
private static final Parser[] parsers = new Parser[] {
new VorbisParser(),
new FlacParser(),
new MP4Parser()
};
protected final TikaConfig tikaConfig;
public TikaAudioMetadataExtractor()
{
this(logger);
}
public TikaAudioMetadataExtractor(Logger logger)
{
super(EXTRACTOR, logger);
tikaConfig = readTikaConfig(logger);
}
@Override
protected Parser getParser()
{
return new CompositeParser(tikaConfig.getMediaTypeRegistry(), parsers);
}
@Override
protected Map<String, Serializable> extractSpecific(Metadata metadata,
Map<String, Serializable> properties, Map<String,String> headers)
{
// Most things can go with the default Tika -> Alfresco Mapping
// Handle the few special cases here
// The description is special
putRawValue(KEY_DESCRIPTION, generateDescription(metadata), properties);
// The release date can be fiddly
Serializable releaseDate = generateReleaseDate(metadata);
putRawValue(KEY_CREATED, releaseDate, properties);
putRawValue(XMPDM.RELEASE_DATE.getName(), releaseDate, properties);
return properties;
}
/**
* Generates the release date
*/
private Serializable generateReleaseDate(Metadata metadata)
{
String date = metadata.get(XMPDM.RELEASE_DATE);
if(date == null || date.length() == 0)
{
return null;
}
// Is it just a year?
if(date.matches("\\d\\d\\d\\d"))
{
// Just a year, we need a full date
// Go for the 1st of the 1st
Calendar c = Calendar.getInstance();
c.set(
Integer.parseInt(date), Calendar.JANUARY, 1,
0, 0, 0
);
c.set(Calendar.MILLISECOND, 0);
return c.getTime();
}
// Treat as a normal date
return makeDate(date);
}
/**
* Generate the description
*
* @param metadata the metadata extracted from the file
* @return the description
*/
private String generateDescription(Metadata metadata)
{
StringBuilder result = new StringBuilder();
if (metadata.get(TikaCoreProperties.TITLE) != null)
{
result.append(metadata.get(TikaCoreProperties.TITLE));
if (metadata.get(XMPDM.ALBUM) != null)
{
result
.append(" - ")
.append(metadata.get(XMPDM.ALBUM));
}
if (metadata.get(XMPDM.ARTIST) != null)
{
result
.append(" (")
.append(metadata.get(XMPDM.ARTIST))
.append(")");
}
}
return result.toString();
}
}

View File

@@ -0,0 +1,147 @@
/*
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2022 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
* -
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* -
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
* -
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transform.tika.metadataExtractors;
import org.apache.tika.config.TikaConfig;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.TIFF;
import org.apache.tika.parser.AutoDetectParser;
import org.apache.tika.parser.Parser;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.stereotype.Component;
import java.io.Serializable;
import java.util.Map;
import static org.alfresco.transform.base.metadataExtractors.AbstractMetadataExtractor.Type.EXTRACTOR;
import static org.alfresco.transform.common.Mimetype.MIMETYPE_IMAGE_JPEG;
import static org.alfresco.transform.tika.transformers.Tika.readTikaConfig;
/**
* A Metadata Extractor which makes use of the Apache Tika auto-detection to select the best parser to extract the
* metadata from a document. This will be used for all files which Tika can handle, but where no other more explicit
* extractor is defined.
*
* Configuration: (see TikaAutoMetadataExtractor_metadata_extract.properties and tika_engine_config.json)
*
* <pre>
* <b>author:</b> -- cm:author
* <b>title:</b> -- cm:title
* <b>subject:</b> -- cm:description
* <b>created:</b> -- cm:created
* <b>comments:</b>
* <b>geo:lat:</b> -- cm:latitude
* <b>geo:long:</b> -- cm:longitude
* </pre>
*
* @author Nick Burch
* @author adavis
*/
@Component
public class TikaAutoMetadataExtractor extends AbstractTikaMetadataExtractor
{
private static final Logger logger = LoggerFactory.getLogger(TikaAutoMetadataExtractor.class);
private static final String EXIF_IMAGE_HEIGHT_TAG = "Exif Image Height";
private static final String EXIF_IMAGE_WIDTH_TAG = "Exif Image Width";
private static final String JPEG_IMAGE_HEIGHT_TAG = "Image Height";
private static final String JPEG_IMAGE_WIDTH_TAG = "Image Width";
private static final String COMPRESSION_TAG = "Compression";
protected final TikaConfig tikaConfig;
public TikaAutoMetadataExtractor()
{
super(EXTRACTOR, logger);
tikaConfig = readTikaConfig(logger);
}
/**
* Does auto-detection to select the best Tika Parser.
*/
@Override
protected Parser getParser()
{
return new AutoDetectParser(tikaConfig);
}
/**
* Because some editors use JPEG_IMAGE_HEIGHT_TAG when
* saving JPEG images , a more reliable source for
* image size are the values provided by Tika
* and not the exif/tiff metadata read from the file
* This will override the tiff:Image size
* which gets embedded into the alfresco node properties
* for jpeg files that contain such exif information
*/
@Override
protected Map<String, Serializable> extractSpecific(Metadata metadata,
Map<String, Serializable> properties, Map<String,String> headers)
{
if (MIMETYPE_IMAGE_JPEG.equals(metadata.get(Metadata.CONTENT_TYPE)))
{
//check if the image has exif information
if (metadata.get(EXIF_IMAGE_WIDTH_TAG) != null
&& metadata.get(EXIF_IMAGE_HEIGHT_TAG) != null
&& metadata.get(COMPRESSION_TAG) != null)
{
//replace the exif size properties that will be embedded in the node with
//the guessed dimensions from Tika
putRawValue(TIFF.IMAGE_LENGTH.getName(), extractSize(metadata.get(EXIF_IMAGE_HEIGHT_TAG)), properties);
putRawValue(TIFF.IMAGE_WIDTH.getName(), extractSize(metadata.get(EXIF_IMAGE_WIDTH_TAG)), properties);
putRawValue(JPEG_IMAGE_HEIGHT_TAG, metadata.get(EXIF_IMAGE_HEIGHT_TAG), properties);
putRawValue(JPEG_IMAGE_WIDTH_TAG, metadata.get(EXIF_IMAGE_WIDTH_TAG), properties);
}
}
return properties;
}
/**
* Exif metadata for size also returns the string "pixels"
* after the number value , this function will
* stop at the first non digit character found in the text
* @param sizeText string text
* @return the size value
*/
private String extractSize(String sizeText)
{
StringBuilder sizeValue = new StringBuilder();
for(char c : sizeText.toCharArray())
{
if(Character.isDigit(c))
{
sizeValue.append(c);
}
else
{
break;
}
}
return sizeValue.toString();
}
}

View File

@@ -0,0 +1,372 @@
/*
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2022 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
* -
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* -
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
* -
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transform.tika.parsers;
import static java.nio.charset.StandardCharsets.UTF_8;
import static org.alfresco.transform.common.Mimetype.MIMETYPE_IMAGE_JPEG;
import static org.alfresco.transform.common.Mimetype.MIMETYPE_IMAGE_TIFF;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.io.Reader;
import java.net.URL;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.commons.io.IOUtils;
import org.apache.commons.io.output.NullOutputStream;
import org.apache.tika.exception.TikaException;
import org.apache.tika.io.TemporaryResources;
import org.apache.tika.io.TikaInputStream;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.mime.MediaType;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.parser.Parser;
import org.apache.tika.parser.external.ExternalParser;
import org.apache.tika.parser.external.ExternalParsersFactory;
import org.apache.tika.parser.image.ImageParser;
import org.apache.tika.parser.image.TiffParser;
import org.apache.tika.parser.image.JpegParser;
import org.apache.tika.sax.XHTMLContentHandler;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;
public class ExifToolParser extends ExternalParser {
private static final Logger logger = LoggerFactory.getLogger(ExifToolParser.class);
private static final String EXIFTOOL_PARSER_CONFIG = "parsers/external/config/exiftool-parser.xml";
protected static final String DEFAULT_SEPARATOR = ", ";
protected static final String SEPARATOR_SETTING = "-sep";
private String separator;
public ExifToolParser() {
super();
try {
List<ExternalParser> eParsers = ExternalParsersFactory.create(getExternalParserConfigURL());
// if ExifTool is not installed then no parsers are returned
if (eParsers.size() > 0) {
ExternalParser eParser = eParsers.get(0);
this.setCommand(eParser.getCommand());
this.setIgnoredLineConsumer(eParser.getIgnoredLineConsumer());
this.setMetadataExtractionPatterns(eParser.getMetadataExtractionPatterns());
this.setSupportedTypes(eParser.getSupportedTypes());
} else {
logger.error(
"Error creating ExifToolParser from config, ExifToolExtractions not enabled. Please check ExifTool is installed correctly.");
}
} catch (IOException | TikaException e) {
logger.error("Error creating ExifToolParser from config, ExifToolExtractions not enabled: ", e);
}
}
private URL getExternalParserConfigURL(){
ClassLoader classLoader = ExifToolParser.class.getClassLoader();
return classLoader.getResource(EXIFTOOL_PARSER_CONFIG);
}
public void setSeparator(String sep) {
this.separator = sep;
}
public String getSeparator() {
return this.separator;
}
@Override
public void setCommand(String... command){
super.setCommand(command);
if (command.length==1) {
setSeparator(findSeparator(command[0]));
}
else {
setSeparator(DEFAULT_SEPARATOR);
}
}
protected String findSeparator(String command) {
if (command.contains(SEPARATOR_SETTING)) {
int start = command.indexOf(SEPARATOR_SETTING)+SEPARATOR_SETTING.length()+1;
String separator = DEFAULT_SEPARATOR;
if (command.charAt(start)=='\"') {
//get all chars up to the next \"
int end = command.indexOf("\"", start+1);
separator = command.substring(start+1, end);
}
else {
int end = command.indexOf(" ", start);
separator = command.substring(start, end);
}
return separator;
}
return DEFAULT_SEPARATOR;
}
/**
* Adapted from {@link org.apache.tika.parser.external.ExternalParser}
* due to errors attempting to {@link #extractMetadata} from the errorStream in original implementation. <p>
* Executes the configured external command and passes the given document
* stream as a simple XHTML document to the given SAX content handler.
* Metadata is only extracted if {@link #setMetadataExtractionPatterns(Map)}
* has been called to set patterns.
*/
public void parse(InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context)
throws IOException, SAXException, TikaException {
XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
MediaType mediaType = MediaType.parse(metadata.get(Metadata.CONTENT_TYPE));
TemporaryResources tmp = new TemporaryResources();
try {
TikaInputStream tis = TikaInputStream.get(stream, tmp);
if (this.getSupportedTypes().contains(mediaType)) {
parse(tis, xhtml, metadata, tmp);
}
switch (mediaType.getType()+"/"+mediaType.getSubtype()) {
case MIMETYPE_IMAGE_JPEG:
parseAdditional(new JpegParser(), tis, handler, metadata, context, mediaType);
break;
case MIMETYPE_IMAGE_TIFF:
parseAdditional(new TiffParser(), tis, handler, metadata, context, mediaType);
break;
default:
parseAdditional(new ImageParser(), tis, handler, metadata, context, mediaType);
}
} finally {
tmp.dispose();
}
}
private void parseAdditional(Parser parser, TikaInputStream tis, ContentHandler handler, Metadata metadata, ParseContext context,
MediaType mediaType) throws IOException, SAXException, TikaException {
if (parser.getSupportedTypes(context).contains(mediaType)) {
parser.parse(tis, handler, metadata, context);
}
}
private void parse(TikaInputStream stream, XHTMLContentHandler xhtml, Metadata metadata, TemporaryResources tmp)
throws IOException, SAXException, TikaException {
boolean inputToStdIn = true;
boolean outputFromStdOut = true;
boolean hasPatterns = (getMetadataExtractionPatterns() != null && !getMetadataExtractionPatterns().isEmpty());
File output = null;
// Build our getCommand()
String[] cmd;
if (getCommand().length == 1) {
cmd = getCommand()[0].split(" ");
} else {
cmd = new String[getCommand().length];
System.arraycopy(getCommand(), 0, cmd, 0, getCommand().length);
}
for (int i = 0; i < cmd.length; i++) {
if (cmd[i].indexOf(INPUT_FILE_TOKEN) != -1) {
cmd[i] = cmd[i].replace(INPUT_FILE_TOKEN, stream.getFile().getPath());
inputToStdIn = false;
}
if (cmd[i].indexOf(OUTPUT_FILE_TOKEN) != -1) {
output = tmp.createTemporaryFile();
outputFromStdOut = false;
cmd[i] = cmd[i].replace(OUTPUT_FILE_TOKEN, output.getPath());
}
}
// Execute
Process process = null;
try {
if (cmd.length == 1) {
process = Runtime.getRuntime().exec(cmd[0]);
} else {
process = Runtime.getRuntime().exec(cmd);
}
} catch (Exception e) {
e.printStackTrace();
}
try {
if (inputToStdIn) {
sendInput(process, stream);
} else {
process.getOutputStream().close();
}
InputStream out = process.getInputStream();
InputStream err = process.getErrorStream();
if (hasPatterns) {
if (outputFromStdOut) {
extractOutput(out, xhtml);
} else {
extractMetadata(out, metadata);
}
} else {
ignoreStream(err);
if (outputFromStdOut) {
extractOutput(out, xhtml);
} else {
ignoreStream(out);
}
}
} finally {
try {
process.waitFor();
} catch (InterruptedException ignore) {
}
}
// Grab the output if we haven't already
if (!outputFromStdOut) {
extractOutput(new FileInputStream(output), xhtml);
}
}
/**
* Adapted from {@link org.apache.tika.parser.external.ExternalParser}<p>
* Starts a thread that extracts the contents of the standard output
* stream of the given process to the given XHTML content handler.
* The standard output stream is closed once fully processed.
*
* @param stream stream
* @param xhtml XHTML content handler
* @throws SAXException if the XHTML SAX events could not be handled
* @throws IOException if an input error occurred
*/
private void extractOutput(InputStream stream, XHTMLContentHandler xhtml) throws SAXException, IOException {
try (Reader reader = new InputStreamReader(stream, UTF_8)) {
xhtml.startDocument();
xhtml.startElement("p");
char[] buffer = new char[1024];
for (int n = reader.read(buffer); n != -1; n = reader.read(buffer)) {
xhtml.characters(buffer, 0, n);
}
xhtml.endElement("p");
xhtml.endDocument();
}
}
/**
* Adapted from {@link org.apache.tika.parser.external.ExternalParser}<p>
* Starts a thread that sends the contents of the given input stream
* to the standard input stream of the given process. Potential
* exceptions are ignored, and the standard input stream is closed
* once fully processed. Note that the given input stream is <em>not</em>
* closed by this method.
*
* @param process process
* @param stream input stream
*/
private void sendInput(final Process process, final InputStream stream) {
Thread t = new Thread() {
public void run() {
OutputStream stdin = process.getOutputStream();
try {
IOUtils.copy(stream, stdin);
} catch (IOException e) {
}
}
};
t.start();
try {
t.join();
} catch (InterruptedException ignore) {
}
}
/**
* Adapted from {@link org.apache.tika.parser.external.ExternalParser}<p>
* Starts a thread that reads and discards the contents of the
* standard stream of the given process. Potential exceptions
* are ignored, and the stream is closed once fully processed.
*
* @param stream stream
*/
private void ignoreStream(final InputStream stream) {
Thread t = new Thread() {
public void run() {
try {
IOUtils.copy(stream, NullOutputStream.NULL_OUTPUT_STREAM);
} catch (IOException e) {
} finally {
IOUtils.closeQuietly(stream);
}
}
};
t.start();
try {
t.join();
} catch (InterruptedException ignore) {
}
}
private void extractMetadata(final InputStream stream, final Metadata metadata) {
Thread t = new Thread() {
public void run() {
BufferedReader reader;
reader = new BufferedReader(new InputStreamReader(stream, UTF_8));
try {
String line;
while ((line = reader.readLine()) != null) {
for (Pattern p : getMetadataExtractionPatterns().keySet()) {
Matcher m = p.matcher(line);
if (m.find()) {
if (getMetadataExtractionPatterns().get(p) != null
&& !getMetadataExtractionPatterns().get(p).equals("")) {
metadata.add(getMetadataExtractionPatterns().get(p), m.group(1));
} else {
metadata.add(m.group(1), m.group(2));
}
}
}
}
} catch (IOException e) {
// Ignore
} finally {
IOUtils.closeQuietly(reader);
IOUtils.closeQuietly(stream);
}
}
};
t.start();
try {
t.join();
} catch (InterruptedException ignore) {
}
}
}

View File

@@ -0,0 +1,120 @@
/*
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2022 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
* -
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* -
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
* -
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transform.tika.parsers;
import java.io.IOException;
import java.io.InputStream;
import java.io.PushbackInputStream;
import java.util.HashSet;
import java.util.Set;
import org.apache.poi.poifs.common.POIFSConstants;
import org.apache.poi.util.IOUtils;
import org.apache.tika.exception.TikaException;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.mime.MediaType;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.parser.Parser;
import org.apache.tika.parser.microsoft.OfficeParser;
import org.apache.tika.parser.microsoft.ooxml.OOXMLParser;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;
///////// THIS FILE WAS A COPY OF THE CODE IN alfresco-repository /////////////
/**
* <a href="http://tika.apache.org/Apache Tika">Apache Tika</a> assumes that
* you either know exactly what your content is, or that
* you'll leave it to auto-detection.
* Within Alfresco, we usually do know. However, from time
* to time, we don't know if we have one of the old or one
* of the new office files (eg .xls and .xlsx).
* This class allows automatically selects the appropriate
* old (OLE2) or new (OOXML) Tika parser as required.
*
* @author Nick Burch
*/
public class TikaOfficeDetectParser implements Parser
{
private final Parser ole2Parser = new OfficeParser();
private final Parser ooxmlParser = new OOXMLParser();
public Set<MediaType> getSupportedTypes(ParseContext parseContext)
{
Set<MediaType> types = new HashSet<>();
types.addAll(ole2Parser.getSupportedTypes(parseContext));
types.addAll(ooxmlParser.getSupportedTypes(parseContext));
return types;
}
public void parse(InputStream stream,
ContentHandler handler, Metadata metadata,
ParseContext parseContext) throws IOException, SAXException,
TikaException
{
byte[] initial4 = new byte[4];
InputStream wrapped;
// Preserve TikaInputStreams as TikaInputStreams as they require less memory to process
if (stream.markSupported())
{
stream.mark(initial4.length);
IOUtils.readFully(stream, initial4);
stream.reset();
wrapped = stream;
}
else
{
PushbackInputStream inp = new PushbackInputStream(stream, 4);
IOUtils.readFully(inp, initial4);
inp.unread(initial4);
wrapped = inp;
}
// Which is it?
if (initial4[0] == POIFSConstants.OOXML_FILE_HEADER[0] &&
initial4[1] == POIFSConstants.OOXML_FILE_HEADER[1] &&
initial4[2] == POIFSConstants.OOXML_FILE_HEADER[2] &&
initial4[3] == POIFSConstants.OOXML_FILE_HEADER[3])
{
ooxmlParser.parse(wrapped, handler, metadata, parseContext);
}
else
{
ole2Parser.parse(wrapped, handler, metadata, parseContext);
}
}
/**
* @deprecated This method will be removed in Apache Tika 1.0.
*/
public void parse(InputStream stream,
ContentHandler handler, Metadata metadata)
throws IOException, SAXException, TikaException
{
parse(stream, handler, metadata, new ParseContext());
}
}

View File

@@ -0,0 +1,42 @@
/*
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2022 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
* -
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* -
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
* -
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transform.tika.transformers;
import org.apache.tika.parser.Parser;
import org.springframework.stereotype.Component;
import javax.annotation.PostConstruct;
@Component
public class ArchiveTransformer extends GenericTikaTransformer
{
@Override
protected Parser getParser()
{
return tika.packageParser;
}
}

View File

@@ -0,0 +1,146 @@
/*
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2022 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
* -
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* -
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
* -
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transform.tika.transformers;
import org.alfresco.transform.base.CustomTransformer;
import org.alfresco.transform.base.logging.LogEntry;
import org.alfresco.transform.common.RequestParamMap;
import org.alfresco.transform.common.TransformException;
import org.apache.tika.extractor.DocumentSelector;
import org.apache.tika.parser.Parser;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Value;
import java.io.File;
import java.io.InputStream;
import java.io.OutputStream;
import java.util.ArrayList;
import java.util.Map;
import java.util.StringJoiner;
import static java.lang.Boolean.parseBoolean;
public abstract class GenericTikaTransformer implements CustomTransformer
{
private static final Logger logger = LoggerFactory.getLogger(GenericTikaTransformer.class);
@Value("${transform.core.tika.pdfBox.notExtractBookmarksTextDefault:false}")
boolean notExtractBookmarksTextDefault;
@Autowired
protected Tika tika;
protected abstract Parser getParser();
protected DocumentSelector getDocumentSelector()
{
return null;
}
@Override
public String getTransformerName()
{
String simpleClassName = getClass().getSimpleName();
return simpleClassName.substring(0, simpleClassName.length()-"Transformer".length());
}
@Override
public void transform(String sourceMimetype, String sourceEncoding, InputStream inputStream,
String targetMimetype, String targetEncoding, OutputStream outputStream,
Map<String, String> transformOptions) throws Exception
{
// TODO
throw new TransformException(500, "TODO GenericTikaTransformer transform with InputStreams");
}
public void transform(String transformName, String sourceMimetype, String targetMimetype,
Map<String, String> transformOptions, File sourceFile, File targetFile)
throws Exception
{
final boolean includeContents = parseBoolean(
transformOptions.getOrDefault(RequestParamMap.INCLUDE_CONTENTS, "false"));
final boolean notExtractBookmarksText = parseBoolean(
transformOptions.getOrDefault(RequestParamMap.NOT_EXTRACT_BOOKMARKS_TEXT, String.valueOf(notExtractBookmarksTextDefault)));
final String targetEncoding = transformOptions.getOrDefault("targetEncoding", "UTF-8");
if (transformOptions.get(RequestParamMap.NOT_EXTRACT_BOOKMARKS_TEXT) == null && notExtractBookmarksTextDefault)
{
logger.trace("notExtractBookmarksText default value has been overridden to {}", notExtractBookmarksTextDefault);
}
call(sourceFile, targetFile, transformName,
includeContents ? Tika.INCLUDE_CONTENTS : null,
notExtractBookmarksText ? Tika.NOT_EXTRACT_BOOKMARKS_TEXT : null,
Tika.TARGET_MIMETYPE + targetMimetype, Tika.TARGET_ENCODING + targetEncoding);
}
void call(File sourceFile, File targetFile, String... args)
{
Parser parser = getParser();
DocumentSelector documentSelector = getDocumentSelector();
args = buildArgs(sourceFile, targetFile, args);
tika.transform(parser, documentSelector, args);
}
private static String[] buildArgs(File sourceFile, File targetFile, String[] args)
{
ArrayList<String> methodArgs = new ArrayList<>(args.length + 2);
StringJoiner sj = new StringJoiner(" ");
for (String arg : args)
{
addArg(methodArgs, sj, arg);
}
addFileArg(methodArgs, sj, sourceFile);
addFileArg(methodArgs, sj, targetFile);
LogEntry.setOptions(sj.toString());
return methodArgs.toArray(new String[0]);
}
private static void addArg(ArrayList<String> methodArgs, StringJoiner sj, String arg)
{
if (arg != null)
{
sj.add(arg);
methodArgs.add(arg);
}
}
private static void addFileArg(ArrayList<String> methodArgs, StringJoiner sj, File arg)
{
if (arg != null)
{
String path = arg.getAbsolutePath();
int i = path.lastIndexOf('.');
String ext = i == -1 ? "???" : path.substring(i + 1);
sj.add(ext);
methodArgs.add(path);
}
}
}

View File

@@ -0,0 +1,40 @@
/*
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2021 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
* -
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* -
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
* -
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transform.tika.transformers;
import org.apache.tika.parser.Parser;
import org.springframework.stereotype.Component;
@Component
public class OOXMLTransformer extends GenericTikaTransformer
{
@Override
protected Parser getParser()
{
return tika.ooXmlParser;
}
}

View File

@@ -0,0 +1,40 @@
/*
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2021 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
* -
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* -
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
* -
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transform.tika.transformers;
import org.apache.tika.parser.Parser;
import org.springframework.stereotype.Component;
@Component
public class OfficeTransformer extends GenericTikaTransformer
{
@Override
protected Parser getParser()
{
return tika.officeParser;
}
}

View File

@@ -0,0 +1,40 @@
/*
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2021 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
* -
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* -
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
* -
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transform.tika.transformers;
import org.apache.tika.parser.Parser;
import org.springframework.stereotype.Component;
@Component
public class OutlookMsgTransformer extends GenericTikaTransformer
{
@Override
protected Parser getParser()
{
return tika.officeParser;
}
}

View File

@@ -0,0 +1,47 @@
/*
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2021 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
* -
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* -
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
* -
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transform.tika.transformers;
import org.apache.tika.extractor.DocumentSelector;
import org.apache.tika.parser.Parser;
import org.springframework.stereotype.Component;
@Component
public class PdfBoxTransformer extends GenericTikaTransformer
{
@Override
protected Parser getParser()
{
return tika.pdfParser;
}
@Override
protected DocumentSelector getDocumentSelector()
{
return tika.pdfBoxEmbededDocumentSelector;
}
}

View File

@@ -0,0 +1,40 @@
/*
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2021 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
* -
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* -
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
* -
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transform.tika.transformers;
import org.apache.tika.parser.Parser;
import org.springframework.stereotype.Component;
@Component
public class PoiTransformer extends GenericTikaTransformer
{
@Override
protected Parser getParser()
{
return tika.tikaOfficeDetectParser;
}
}

View File

@@ -0,0 +1,40 @@
/*
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2021 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
* -
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* -
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
* -
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transform.tika.transformers;
import org.apache.tika.parser.Parser;
import org.springframework.stereotype.Component;
@Component
public class TextMiningTransformer extends GenericTikaTransformer
{
@Override
protected Parser getParser()
{
return tika.officeParser;
}
}

View File

@@ -0,0 +1,446 @@
/*
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2022 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
* -
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* -
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
* -
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transform.tika.transformers;
import com.google.common.collect.ImmutableList;
import org.alfresco.transform.tika.parsers.TikaOfficeDetectParser;
import org.apache.tika.config.TikaConfig;
import org.apache.tika.exception.TikaException;
import org.apache.tika.extractor.DocumentSelector;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.parser.AutoDetectParser;
import org.apache.tika.parser.EmptyParser;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.parser.Parser;
import org.apache.tika.parser.microsoft.OfficeParser;
import org.apache.tika.parser.microsoft.ooxml.OOXMLParser;
import org.apache.tika.parser.pdf.PDFParser;
import org.apache.tika.parser.pdf.PDFParserConfig;
import org.apache.tika.parser.pkg.PackageParser;
import org.apache.tika.sax.BodyContentHandler;
import org.apache.tika.sax.ExpandedTitleContentHandler;
import org.slf4j.Logger;
import org.springframework.stereotype.Component;
import org.xml.sax.Attributes;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.TransformerConfigurationException;
import javax.xml.transform.sax.SAXTransformerFactory;
import javax.xml.transform.sax.TransformerHandler;
import javax.xml.transform.stream.StreamResult;
import java.io.BufferedInputStream;
import java.io.BufferedWriter;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.Writer;
import java.net.URL;
import java.util.List;
import java.util.regex.Pattern;
import static org.alfresco.transform.common.Mimetype.MIMETYPE_HTML;
import static org.alfresco.transform.common.Mimetype.MIMETYPE_IMAGE_JPEG;
import static org.alfresco.transform.common.Mimetype.MIMETYPE_IMAGE_PNG;
import static org.alfresco.transform.common.Mimetype.MIMETYPE_IMAGE_TIFF;
import static org.alfresco.transform.common.Mimetype.MIMETYPE_TEXT_CSV;
import static org.alfresco.transform.common.Mimetype.MIMETYPE_TEXT_PLAIN;
import static org.alfresco.transform.common.Mimetype.MIMETYPE_XHTML;
import static org.alfresco.transform.common.Mimetype.MIMETYPE_XML;
@Component
public class Tika
{
public static final String ARCHIVE = "Archive";
public static final String OUTLOOK_MSG = "OutlookMsg";
public static final String PDF_BOX = "PdfBox";
public static final String OFFICE = "Office";
public static final String POI = "Poi";
public static final String OOXML = "OOXML";
public static final String TIKA_AUTO = "TikaAuto";
public static final String TEXT_MINING = "TextMining";
public static final String TARGET_MIMETYPE = "--targetMimetype=";
public static final String TARGET_ENCODING = "--targetEncoding=";
public static final String INCLUDE_CONTENTS = "--includeContents";
public static final String NOT_EXTRACT_BOOKMARKS_TEXT = "--notExtractBookmarksText";
public static final String CSV = "csv";
public static final String DOC = "doc";
public static final String DOCX = "docx";
public static final String HTML = "html";
public static final String MSG = "msg";
public static final String PDF = "pdf";
public static final String PPTX = "pptx";
public static final String TXT = "txt";
public static final String XHTML = "xhtml";
public static final String XSLX = "xslx";
public static final String XML = "xml";
public static final String ZIP = "zip";
public static final Parser packageParser = new PackageParser();
public static final Parser pdfParser = new PDFParser();
public static final Parser officeParser = new OfficeParser();
public final Parser autoDetectParser;
public static final Parser ooXmlParser = new OOXMLParser();
public static final Parser tikaOfficeDetectParser = new TikaOfficeDetectParser();
public final PDFParserConfig pdfParserConfig = new PDFParserConfig();
public static final DocumentSelector pdfBoxEmbededDocumentSelector = new DocumentSelector()
{
private final List<String> disabledMediaTypes = ImmutableList.of(MIMETYPE_IMAGE_JPEG,
MIMETYPE_IMAGE_TIFF, MIMETYPE_IMAGE_PNG);
@Override
public boolean select(Metadata metadata)
{
String contentType = metadata.get(Metadata.CONTENT_TYPE);
if (contentType == null || contentType.equals("") || disabledMediaTypes == null)
{
return true;
}
return !disabledMediaTypes.contains(contentType);
}
};
public Tika() throws TikaException, IOException, SAXException
{
TikaConfig tikaConfig = readTikaConfig();
autoDetectParser = new AutoDetectParser(tikaConfig);
}
public static TikaConfig readTikaConfig(Logger logger)
{
try
{
return readTikaConfig();
}
catch (Exception e)
{
logger.error("Failed to read tika-config.xml", e);
return null;
}
}
private static TikaConfig readTikaConfig() throws TikaException, IOException, SAXException
{
ClassLoader classLoader = Tika.class.getClassLoader();
URL tikaConfigXml = classLoader.getResource("tika-config.xml");
return new TikaConfig(tikaConfigXml);
}
// Extracts parameters form args
public void transform(Parser parser, DocumentSelector documentSelector, String[] args)
{
String transform = null;
String targetMimetype = null;
String targetEncoding = null;
String sourceFilename = null;
String targetFilename = null;
Boolean includeContents = null;
Boolean notExtractBookmarksText = null;
for (String arg : args)
{
if (arg.startsWith("--"))
{
if (INCLUDE_CONTENTS.startsWith(arg))
{
getValue(arg, false, includeContents, INCLUDE_CONTENTS);
includeContents = true;
}
else if (arg.startsWith(TARGET_ENCODING))
{
targetEncoding = getValue(arg, true, targetEncoding, TARGET_ENCODING);
}
else if (arg.startsWith(TARGET_MIMETYPE))
{
targetMimetype = getValue(arg, true, targetMimetype, TARGET_MIMETYPE);
}
else if (arg.startsWith(NOT_EXTRACT_BOOKMARKS_TEXT))
{
getValue(arg, false, notExtractBookmarksText, NOT_EXTRACT_BOOKMARKS_TEXT);
notExtractBookmarksText = true;
}
else
{
throw new IllegalArgumentException("Unexpected argument " + arg);
}
}
else
{
if (transform == null)
{
transform = arg;
}
else if (sourceFilename == null)
{
sourceFilename = arg;
}
else if (targetFilename == null)
{
targetFilename = arg;
}
else
{
throw new IllegalArgumentException("Unexpected argument " + arg);
}
}
}
if (targetFilename == null)
{
throw new IllegalArgumentException("Missing arguments");
}
includeContents = includeContents == null ? false : includeContents;
notExtractBookmarksText = notExtractBookmarksText == null ? false : notExtractBookmarksText;
transform(parser, documentSelector, includeContents, notExtractBookmarksText, sourceFilename,
targetFilename, targetMimetype, targetEncoding);
}
private String getValue(String arg, boolean valueExpected, Object value, String optionName)
{
if (value != null)
{
throw new IllegalArgumentException("Duplicate " + optionName);
}
String stringValue = arg.substring(optionName.length()).trim();
if (!valueExpected && stringValue.length() > 0)
{
throw new IllegalArgumentException("Unexpected value with " + optionName);
}
if (valueExpected && stringValue.length() == 0)
{
throw new IllegalArgumentException("Expected value with " + optionName);
}
return stringValue;
}
private void transform(Parser parser, DocumentSelector documentSelector,
Boolean includeContents,
Boolean notExtractBookmarksText,
String sourceFilename,
String targetFilename, String targetMimetype, String targetEncoding)
{
try (InputStream is = new BufferedInputStream(new FileInputStream(sourceFilename));
OutputStream os = new FileOutputStream(targetFilename);
Writer ow = new BufferedWriter(new OutputStreamWriter(os, targetEncoding)))
{
Metadata metadata = new Metadata();
ParseContext context = buildParseContext(documentSelector, includeContents,
notExtractBookmarksText);
ContentHandler handler = getContentHandler(targetMimetype, ow);
parser.parse(is, handler, metadata, context);
}
catch (SAXException | TikaException | IOException e)
{
throw new IllegalStateException(e.getMessage(), e);
}
}
private ContentHandler getContentHandler(String targetMimetype, Writer output)
{
try
{
ContentHandler handler;
if (MIMETYPE_TEXT_PLAIN.equals(targetMimetype))
{
handler = new BodyContentHandler(output);
}
else
{
SAXTransformerFactory factory = (SAXTransformerFactory) SAXTransformerFactory.newInstance();
TransformerHandler transformerHandler;
transformerHandler = factory.newTransformerHandler();
transformerHandler.getTransformer().setOutputProperty(OutputKeys.INDENT, "yes");
transformerHandler.setResult(new StreamResult(output));
handler = transformerHandler;
if (MIMETYPE_HTML.equals(targetMimetype))
{
transformerHandler.getTransformer().setOutputProperty(OutputKeys.METHOD, HTML);
return new ExpandedTitleContentHandler(transformerHandler);
}
else if (MIMETYPE_XHTML.equals(targetMimetype) ||
MIMETYPE_XML.equals(targetMimetype))
{
transformerHandler.getTransformer().setOutputProperty(OutputKeys.METHOD, XML);
}
else if (MIMETYPE_TEXT_CSV.equals(targetMimetype))
{
handler = new CsvContentHandler(output);
}
else
{
throw new IllegalArgumentException("Invalid target mimetype " + targetMimetype);
}
}
return handler;
}
catch (TransformerConfigurationException e)
{
throw new IllegalStateException(e.getMessage(), e);
}
}
/**
* A wrapper around the normal Tika BodyContentHandler for CSV rather encoding than tab separated.
*/
protected static class CsvContentHandler extends BodyContentHandler
{
private static final char[] comma = new char[]{','};
private static final Pattern all_nums = Pattern.compile("[\\d\\.\\-\\+]+");
private boolean inCell = false;
private boolean needsComma = false;
protected CsvContentHandler(Writer output)
{
super(output);
}
@Override
public void ignorableWhitespace(char[] ch, int start, int length)
throws SAXException
{
if (length == 1 && ch[0] == '\t')
{
// Ignore tabs, as they mess up the CSV output
}
else
{
super.ignorableWhitespace(ch, start, length);
}
}
@Override
public void characters(char[] ch, int start, int length)
throws SAXException
{
if (inCell)
{
StringBuffer t = new StringBuffer(new String(ch, start, length));
// Quote if not all numbers
if (all_nums.matcher(t).matches())
{
super.characters(ch, start, length);
}
else
{
for (int i = t.length() - 1; i >= 0; i--)
{
if (t.charAt(i) == '\"')
{
// Double up double quotes
t.insert(i, '\"');
i--;
}
}
t.insert(0, '\"');
t.append('\"');
char[] c = t.toString().toCharArray();
super.characters(c, 0, c.length);
}
}
else
{
super.characters(ch, start, length);
}
}
@Override
public void startElement(String uri, String localName, String name,
Attributes atts) throws SAXException
{
if (localName.equals("td"))
{
inCell = true;
if (needsComma)
{
super.characters(comma, 0, 1);
needsComma = true;
}
}
else
{
super.startElement(uri, localName, name, atts);
}
}
@Override
public void endElement(String uri, String localName, String name)
throws SAXException
{
if (localName.equals("td"))
{
needsComma = true;
inCell = false;
}
else
{
if (localName.equals("tr"))
{
needsComma = false;
}
super.endElement(uri, localName, name);
}
}
}
private ParseContext buildParseContext(DocumentSelector documentSelector,
Boolean includeContents, Boolean notExtractBookmarksText)
{
ParseContext context = new ParseContext();
if (documentSelector != null)
{
context.set(DocumentSelector.class, documentSelector);
}
if (notExtractBookmarksText.equals(true))
{
pdfParserConfig.setExtractBookmarksText(false);
// pdfParserConfig is set to override default settings
context.set(PDFParserConfig.class, pdfParserConfig);
}
// If Archive transform
if (includeContents != null)
{
context.set(Parser.class, includeContents ? autoDetectParser : new EmptyParser());
}
return context;
}
}

View File

@@ -0,0 +1,40 @@
/*
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2021 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
* -
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* -
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
* -
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transform.tika.transformers;
import org.apache.tika.parser.Parser;
import org.springframework.stereotype.Component;
@Component
public class TikaAutoTransformer extends GenericTikaTransformer
{
@Override
protected Parser getParser()
{
return tika.autoDetectParser;
}
}

View File

@@ -0,0 +1,12 @@
#
# DWGMetadataExtracter - default mapping
#
# author: Nick Burch
# Namespaces
namespace.prefix.cm=http://www.alfresco.org/model/content/1.0
# Mappings
author=cm:author
title=cm:title
description=cm:description

View File

@@ -0,0 +1,141 @@
#
# IPTCMetadataExtracter - default mapping
#
# author: David Edwards
# Namespaces
namespace.prefix.cm=http://www.alfresco.org/model/content/1.0
# IPTC
namespace.prefix.iptcxmp=http://www.alfresco.org/model/content/metadata/IPTCXMP/1.0
namespace.prefix.dc=http://purl.org/dc/elements/1.1/
namespace.prefix.Iptc4xmpCore=http://iptc.org/std/Iptc4xmpCore/1.0/xmlns/
namespace.prefix.Iptc4xmpExt=http://iptc.org/std/Iptc4xmpExt/2008-02-29/
namespace.prefix.photoshop=http://ns.adobe.com/photoshop/1.0/
namespace.prefix.plus=http://ns.useplus.org/ldf/xmp/1.0/
namespace.prefix.xmpRights=http://ns.adobe.com/xap/1.0/rights/
namespace.prefix.stDim=http://ns.adobe.com/xap/1.0/sType/Dimensions
# Exif
namespace.prefix.exif=http://www.alfresco.org/model/exif/1.0
# Mappings from TikaAutoExtractor
author=cm:author
title=cm:title
description=cm:description
created=cm:created
geo\:lat=cm:latitude
geo\:long=cm:longitude
tiff\:ImageWidth=exif:pixelXDimension
tiff\:ImageLength=exif:pixelYDimension
tiff\:Make=exif:manufacturer
tiff\:Model=exif:model
tiff\:Software=exif:software
tiff\:Orientation=exif:orientation
tiff\:XResolution=exif:xResolution
tiff\:YResolution=exif:yResolution
tiff\:ResolutionUnit=exif:resolutionUnit
exif\:Flash=exif:flash
exif\:ExposureTime=exif:exposureTime
exif\:FNumber=exif:fNumber
exif\:FocalLength=exif:focalLength
exif\:IsoSpeedRatings=exif:isoSpeedRatings
exif\:DateTimeOriginal=exif:dateTimeOriginal
# IPTC Mappings
XMP-dc\:Description=dc:description
XMP-dc\:Subject=dc:subject
XMP-dc\:Creator=dc:creator
XMP-dc\:Rights=dc:rights
XMP-dc\:Title=dc:title
XMP-iptcCore\:CreatorCountry=Iptc4xmpCore:CiAdrCtry
XMP-iptcCore\:CountryCode=Iptc4xmpCore:CountryCode
XMP-iptcCore\:CreatorAddress=Iptc4xmpCore:CiAdrExtadr
XMP-iptcCore\:CreatorCity=Iptc4xmpCore:CiAdrCity
XMP-iptcCore\:CreatorPostalCode=Iptc4xmpCore:CiAdrPcode
XMP-iptcCore\:CreatorRegion=Iptc4xmpCore:CiAdrRegion
XMP-iptcCore\:CreatorWorkEmail=Iptc4xmpCore:CiEmailWork
XMP-iptcCore\:CreatorWorkTelephone=Iptc4xmpCore:CiTelWork
XMP-iptcCore\:CreatorWorkURL=Iptc4xmpCore:CiUrlWork
XMP-iptcCore\:IntellectualGenre=Iptc4xmpCore:IntellectualGenre
XMP-iptcCore\:Location=Iptc4xmpCore:Location
XMP-iptcCore\:Scene=Iptc4xmpCore:Scene
XMP-iptcCore\:SubjectCode=Iptc4xmpCore:SubjectCode
XMP-photoshop\:AuthorsPosition=photoshop:AuthorsPosition
XMP-photoshop\:CaptionWriter=photoshop:CaptionWriter
XMP-photoshop\:Category=photoshop:Category
XMP-photoshop\:City=photoshop:City
XMP-photoshop\:Country=photoshop:Country
XMP-photoshop\:Credit=photoshop:Credit
XMP-photoshop\:DateCreated=photoshop:DateCreated
XMP-photoshop\:Headline=photoshop:Headline
XMP-photoshop\:Instructions=photoshop:Instructions
XMP-photoshop\:Source=photoshop:Source
XMP-photoshop\:State=photoshop:State
XMP-photoshop\:SupplementalCategories=photoshop:SupplementalCategories
XMP-photoshop\:TransmissionReference=photoshop:TransmissionReference
XMP-photoshop\:Urgency=photoshop:Urgency 
XMP-xmpRights\:UsageTerms=xmpRights:UsageTerms
XMP-iptcExt\:AdditionalModelInformation=Iptc4xmpExt:AddlModelInfo
XMP-iptcExt\:ArtworkCopyrightNotice=Iptc4xmpExt:AOCopyrightNotice
XMP-iptcExt\:ArtworkCreator=Iptc4xmpExt:AOCreator
XMP-iptcExt\:ArtworkDateCreated=Iptc4xmpExt:AODateCreated
XMP-iptcExt\:ArtworkSource=Iptc4xmpExt:AOSource
XMP-iptcExt\:ArtworkSourceInventoryNo=Iptc4xmpExt:AOSourceInvNo
XMP-iptcExt\:ArtworkTitle=Iptc4xmpExt:AOTitle
XMP-iptcExt\:ControlledVocabularyTerm=Iptc4xmpExt:CVterm
XMP-iptcExt\:DigitalImageGUID=Iptc4xmpExt:DigImageGUID
XMP-iptcExt\:DigitalSourceFileType=Iptc4xmpExt:DigitalSourcefileType
XMP-iptcExt\:DigitalSourceType=Iptc4xmpExt:DigitalSourceType
XMP-iptcExt\:Event=Iptc4xmpExt:Event
XMP-iptcExt\:IPTCLastEdited=Iptc4xmpExt:IptcLastEdited
XMP-iptcExt\:LocationCreatedCity=Iptc4xmpExt:LocationCreatedCity
XMP-iptcExt\:LocationCreatedCountryCode=Iptc4xmpExt:LocationCreatedCountryCode
XMP-iptcExt\:LocationCreatedCountryName=Iptc4xmpExt:LocationCreatedCountryName
XMP-iptcExt\:LocationCreatedProvinceState=Iptc4xmpExt:LocationCreatedProvinceState
XMP-iptcExt\:LocationCreatedSublocation=Iptc4xmpExt:LocationCreatedSublocation
XMP-iptcExt\:LocationCreatedWorldRegion=Iptc4xmpExt:LocationCreatedWorldRegion
XMP-iptcExt\:LocationShownCity=Iptc4xmpExt:LocationShownCity
XMP-iptcExt\:LocationShownCountryCode=Iptc4xmpExt:LocationShownCountryCode
XMP-iptcExt\:LocationShownCountryName=Iptc4xmpExt:LocationShownCountryName
XMP-iptcExt\:LocationShownProvinceState=Iptc4xmpExt:LocationShownProvinceState
XMP-iptcExt\:LocationShownSublocation=Iptc4xmpExt:LocationShownSublocation
XMP-iptcExt\:LocationShownWorldRegion=Iptc4xmpExt:LocationShownWorldRegion
XMP-iptcExt\:MaxAvailHeight=Iptc4xmpExt:MaxAvailHeight
XMP-iptcExt\:MaxAvailWidth=Iptc4xmpExt:MaxAvailWidth
XMP-iptcExt\:ModelAge=Iptc4xmpExt:ModelAge
XMP-iptcExt\:OrganisationInImageCode=Iptc4xmpExt:OrganisationInImageCode
XMP-iptcExt\:OrganisationInImageName=Iptc4xmpExt:OrganisationInImageName
XMP-iptcExt\:PersonInImage=Iptc4xmpExt:PersonInImage
XMP-iptcExt\:RegistryItemID=Iptc4xmpExt:RegItemId
XMP-iptcExt\:RegistryOrganisationID=Iptc4xmpExt:RegOrgId
XMP-plus\:CopyrightOwnerID=plus:CopyrightOwnerID
XMP-plus\:CopyrightOwnerName=plus:CopyrightOwnerName
XMP-plus\:ImageCreatorID=plus:ImageCreatorID
XMP-plus\:ImageCreatorName=plus:ImageCreatorName
XMP-plus\:ImageSupplierID=plus:ImageSupplierID
XMP-plus\:ImageSupplierImageID=plus:ImageSupplierImageID
XMP-plus\:ImageSupplierName=plus:ImageSupplierName
XMP-plus\:LicensorCity=plus:LicensorCity
XMP-plus\:LicensorCountry=plus:LicensorCountry
XMP-plus\:LicensorEmail=plus:LicensorEmail
XMP-plus\:LicensorExtendedAddress=plus:LicensorExtendedAddress
XMP-plus\:LicensorID=plus:LicensorID
XMP-plus\:LicensorName=plus:LicensorName
XMP-plus\:LicensorPostalCode=plus:LicensorPostalCode
XMP-plus\:LicensorRegion=plus:LicensorRegion
XMP-plus\:LicensorStreetAddress=plus:LicensorStreetAddress
XMP-plus\:LicensorTelephone1=plus:LicensorTelephone1
XMP-plus\:LicensorTelephone2=plus:LicensorTelephone2
XMP-plus\:LicensorURL=plus:LicensorURL
XMP-plus\:MinorModelAgeDisclosure=plus:MinorModelAgeDisclosure
XMP-plus\:ModelReleaseID=plus:ModelReleaseID
XMP-plus\:ModelReleaseStatus=plus:ModelReleaseStatus
XMP-plus\:PLUSVersion=plus:Version
XMP-plus\:PropertyReleaseID=plus:PropertyReleaseID
XMP-plus\:PropertyReleaseStatus=plus:PropertyReleaseStatus
stDim\:unit=stDim:unit

View File

@@ -0,0 +1,30 @@
#
# MP3MetadataExtracter - default mapping
#
# author: Derek Hulley
# Namespaces
namespace.prefix.cm=http://www.alfresco.org/model/content/1.0
namespace.prefix.audio=http://www.alfresco.org/model/audio/1.0
# Core mappings
author=cm:author
title=cm:title
description=cm:description
created=cm:created
# Audio descriptive mappings
xmpDM\:album=audio:album
xmpDM\:artist=audio:artist
xmpDM\:composer=audio:composer
xmpDM\:engineer=audio:engineer
xmpDM\:genre=audio:genre
xmpDM\:trackNumber=audio:trackNumber
xmpDM\:releaseDate=audio:releaseDate
#xmpDM:logComment
# Audio specific mappings
xmpDM\:audioSampleRate=audio:sampleRate
xmpDM\:audioSampleType=audio:sampleType
xmpDM\:audioChannelType=audio:channelType
xmpDM\:audioCompressor=audio:compressor

View File

@@ -0,0 +1,14 @@
#
# MailMetadataExtracter - default mapping
#
# author: Derek Hulley
# Namespaces
namespace.prefix.cm=http://www.alfresco.org/model/content/1.0
# Mappings
sentDate=cm:sentdate
originator=cm:originator, cm:author
addressee=cm:addressee
addressees=cm:addressees
subjectLine=cm:subjectline, cm:description

View File

@@ -0,0 +1,14 @@
#
# OfficeMetadataExtracter - default mapping
#
# author: Derek Hulley
# Namespaces
namespace.prefix.cm=http://www.alfresco.org/model/content/1.0
# Mappings
author=cm:author
title=cm:title
subject=cm:description
createDateTime=cm:created
lastSaveDateTime=cm:modified

View File

@@ -0,0 +1,21 @@
#
# OpenDocumentMetadataExtracter - default mapping
#
# author: Derek Hulley
# Namespaces
namespace.prefix.cm=http://www.alfresco.org/model/content/1.0
# Mappings
creationDate=cm:created
creator=cm:author
date=
description=
generator=
initialCreator=
keyword=
language=
printDate=
printedBy=
subject=cm:description
title=cm:title

View File

@@ -0,0 +1,13 @@
#
# PdfBoxMetadataExtracter - default mapping
#
# author: Derek Hulley
# Namespaces
namespace.prefix.cm=http://www.alfresco.org/model/content/1.0
# Mappings
author=cm:author
title=cm:title
subject=cm:description
created=cm:created

View File

@@ -0,0 +1,13 @@
#
# PoiMetadataExtracter - default mapping
#
# author: Neil McErlean
# Namespaces
namespace.prefix.cm=http://www.alfresco.org/model/content/1.0
# Mappings
author=cm:author
title=cm:title
description=cm:description
created=cm:created

View File

@@ -0,0 +1,34 @@
#
# TikaAudioMetadataExtracter - audio mapping
#
# This is used to map from the Tika audio metadata onto your
# content model. This will be used for any Audio content
# for which an explicit extractor isn't defined
#
# author: Nick Burch
# Namespaces
namespace.prefix.cm=http://www.alfresco.org/model/content/1.0
namespace.prefix.audio=http://www.alfresco.org/model/audio/1.0
# Core mappings
author=cm:author
title=cm:title
description=cm:description
created=cm:created
# Audio descriptive mappings
xmpDM\:album=audio:album
xmpDM\:artist=audio:artist
xmpDM\:composer=audio:composer
xmpDM\:engineer=audio:engineer
xmpDM\:genre=audio:genre
xmpDM\:trackNumber=audio:trackNumber
xmpDM\:releaseDate=audio:releaseDate
#xmpDM:logComment
# Audio specific mappings
xmpDM\:audioSampleRate=audio:sampleRate
xmpDM\:audioSampleType=audio:sampleType
xmpDM\:audioChannelType=audio:channelType
xmpDM\:audioCompressor=audio:compressor

View File

@@ -0,0 +1,52 @@
#
# TikaAutoMetadataExtracter - default mapping
#
# This is used to map from the Tika and standard namespaces
# onto your content model. This will be used for any
# content for which an explicit extractor isn't defined,
# by using Tika's auto-selection facilities.
#
# author: Nick Burch
# Namespaces
namespace.prefix.cm=http://www.alfresco.org/model/content/1.0
namespace.prefix.exif=http://www.alfresco.org/model/exif/1.0
namespace.prefix.audio=http://www.alfresco.org/model/audio/1.0
# Mappings
author=cm:author
title=cm:title
description=cm:description
created=cm:created
geo\:lat=cm:latitude
geo\:long=cm:longitude
tiff\:ImageWidth=exif:pixelXDimension
tiff\:ImageLength=exif:pixelYDimension
tiff\:Make=exif:manufacturer
tiff\:Model=exif:model
tiff\:Software=exif:software
tiff\:Orientation=exif:orientation
tiff\:XResolution=exif:xResolution
tiff\:YResolution=exif:yResolution
tiff\:ResolutionUnit=exif:resolutionUnit
exif\:Flash=exif:flash
exif\:ExposureTime=exif:exposureTime
exif\:FNumber=exif:fNumber
exif\:FocalLength=exif:focalLength
exif\:IsoSpeedRatings=exif:isoSpeedRatings
exif\:DateTimeOriginal=exif:dateTimeOriginal
xmpDM\:album=audio:album
xmpDM\:artist=audio:artist
xmpDM\:composer=audio:composer
xmpDM\:engineer=audio:engineer
xmpDM\:genre=audio:genre
xmpDM\:trackNumber=audio:trackNumber
xmpDM\:releaseDate=audio:releaseDate
#xmpDM:logComment
xmpDM\:audioSampleRate=audio:sampleRate
xmpDM\:audioSampleType=audio:sampleType
xmpDM\:audioChannelType=audio:channelType
xmpDM\:audioCompressor=audio:compressor

View File

@@ -0,0 +1,8 @@
queue:
engineRequestQueue: ${TRANSFORM_ENGINE_REQUEST_QUEUE:org.alfresco.transform.engine.tika.acs}
transform:
core:
version: @project.version@
tika:
pdfBox:
notExtractBookmarksTextDefault: ${PDFBOX_NOTEXTRACTBOOKMARKS_DEFAULT:false}

View File

@@ -0,0 +1,201 @@
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright [yyyy] [name of copyright owner]
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.

View File

@@ -0,0 +1,127 @@
The "Artistic License"
Preamble
The intent of this document is to state the conditions under which a
Package may be copied, such that the Copyright Holder maintains some
semblance of artistic control over the development of the package,
while giving the users of the package the right to use and distribute
the Package in a more-or-less customary fashion, plus the right to make
reasonable modifications.
Definitions:
"Package" refers to the collection of files distributed by the
Copyright Holder, and derivatives of that collection of files
created through textual modification.
"Standard Version" refers to such a Package if it has not been
modified, or has been modified in accordance with the wishes
of the Copyright Holder as specified below.
"Copyright Holder" is whoever is named in the copyright or
copyrights for the package.
"You" is you, if you're thinking about copying or distributing
this Package.
"Reasonable copying fee" is whatever you can justify on the
basis of media cost, duplication charges, time of people involved,
and so on. (You will not be required to justify it to the
Copyright Holder, but only to the computing community at large
as a market that must bear the fee.)
"Freely Available" means that no fee is charged for the item
itself, though there may be fees involved in handling the item.
It also means that recipients of the item may redistribute it
under the same conditions they received it.
1. You may make and give away verbatim copies of the source form of the
Standard Version of this Package without restriction, provided that you
duplicate all of the original copyright notices and associated disclaimers.
2. You may apply bug fixes, portability fixes and other modifications
derived from the Public Domain or from the Copyright Holder. A Package
modified in such a way shall still be considered the Standard Version.
3. You may otherwise modify your copy of this Package in any way, provided
that you insert a prominent notice in each changed file stating how and
when you changed that file, and provided that you do at least ONE of the
following:
a) place your modifications in the Public Domain or otherwise make them
Freely Available, such as by posting said modifications to Usenet or
an equivalent medium, or placing the modifications on a major archive
site such as uunet.uu.net, or by allowing the Copyright Holder to include
your modifications in the Standard Version of the Package.
b) use the modified Package only within your corporation or organization.
c) rename any non-standard executables so the names do not conflict
with standard executables, which must also be provided, and provide
a separate manual page for each non-standard executable that clearly
documents how it differs from the Standard Version.
d) make other distribution arrangements with the Copyright Holder.
4. You may distribute the programs of this Package in object code or
executable form, provided that you do at least ONE of the following:
a) distribute a Standard Version of the executables and library files,
together with instructions (in the manual page or equivalent) on where
to get the Standard Version.
b) accompany the distribution with the machine-readable source of
the Package with your modifications.
c) give non-standard executables non-standard names, and clearly
document the differences in manual pages (or equivalent), together
with instructions on where to get the Standard Version.
d) make other distribution arrangements with the Copyright Holder.
5. You may charge a reasonable copying fee for any distribution of this
Package. You may charge any fee you choose for support of this
Package. You may not charge a fee for this Package itself. However,
you may distribute this Package in aggregate with other (possibly
commercial) programs as part of a larger (possibly commercial) software
distribution provided that you do not advertise this Package as a
product of your own. You may embed this Package's interpreter within
an executable of yours (by linking); this shall be construed as a mere
form of aggregation, provided that the complete Standard Version of the
interpreter is so embedded.
6. The scripts and library files supplied as input to or produced as
output from the programs of this Package do not automatically fall
under the copyright of this Package, but belong to whoever generated
them, and may be sold commercially, and may be aggregated with this
Package. If such scripts or library files are aggregated with this
Package via the so-called "undump" or "unexec" methods of producing a
binary executable image, then distribution of such an image shall
neither be construed as a distribution of this Package nor shall it
fall under the restrictions of Paragraphs 3 and 4, provided that you do
not represent such an executable image as a Standard Version of this
Package.
7. C subroutines (or comparably compiled subroutines in other
languages) supplied by you and linked into this Package in order to
emulate subroutines and variables of the language defined by this
Package shall not be considered part of this Package, but are the
equivalent of input as in Paragraph 6, provided these subroutines do
not change the language in any way that would cause it to fail the
regression tests for the language.
8. Aggregation of this Package with a commercial distribution is always
permitted provided that the use of this Package is embedded; that is,
when no overt attempt is made to make this Package's interfaces visible
to the end user of the commercial distribution. Such use shall not be
construed as a distribution of this Package.
9. The name of the Copyright Holder may not be used to endorse or promote
products derived from this software without specific prior written permission.
10. THIS PACKAGE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR
IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
The End

View File

@@ -0,0 +1,35 @@
<?xml version="1.0" encoding="UTF-8"?>
<external-parsers>
<parser>
<check>
<command>exiftool -ver</command>
<error-codes>126,127</error-codes>
</check>
<command>env FOO=${OUTPUT} exiftool -args -G1 -sep "|||" ${INPUT}</command>
<mime-types>
<mime-type>image/x-raw-hasselblad</mime-type>
<mime-type>image/x-raw-sony</mime-type>
<mime-type>image/x-raw-canon</mime-type>
<mime-type>image/x-raw-adobe</mime-type>
<mime-type>image/gif</mime-type>
<mime-type>image/jp2</mime-type>
<mime-type>image/jpeg</mime-type>
<mime-type>image/x-raw-kodak</mime-type>
<mime-type>image/x-raw-minolta</mime-type>
<mime-type>image/x-raw-nikon</mime-type>
<mime-type>image/x-raw-olympus</mime-type>
<mime-type>image/x-raw-pentax</mime-type>
<mime-type>image/png</mime-type>
<mime-type>image/x-raw-fuji</mime-type>
<mime-type>image/x-raw-panasonic</mime-type>
<mime-type>image/tiff</mime-type>
<mime-type>image/webp</mime-type>
</mime-types>
<metadata>
<!-- Default output-->
<match>\s*([A-Za-z0-9/ \(\)]+\S{1})\s+:\s+([A-Za-z0-9\(\)\[\] \:\-\.]+)\s*</match>
<!-- args format-->
<match>^-([\S]+)\=(.*)</match>
</metadata>
</parser>
</external-parsers>

Binary file not shown.

View File

@@ -0,0 +1,28 @@
<html xmlns:th="http://www.thymeleaf.org">
<body>
<div>
<h2>Tika Test Transformations</h2>
<form method="POST" enctype="multipart/form-data" action="/transform">
<table>
<tr><td><div style="text-align:right">file *</div></td><td><input type="file" name="file" /></td></tr>
<tr><td><div style="text-align:right">Direct Url</div></td><td><input type="text" name="directAccessUrl"/></td></tr>
<tr><td><div style="text-align:right">sourceMimetype *</div></td><td><input type="text" name="sourceMimetype" value="application/msword" /></td></tr>
<tr><td><div style="text-align:right">targetExtension *</div></td><td><input type="text" name="targetExtension" value="txt" /></td></tr>
<tr><td><div style="text-align:right">targetMimetype *</div></td><td><input type="text" name="targetMimetype" value="text/plain" /></td></tr>
<tr><td><div style="text-align:right">targetEncoding *</div></td><td><input type="text" name="targetEncoding" value="UTF-8" /></td></tr>
<tr><td><div style="text-align:right">includeContents (archive) *</div></td><td><input type="checkbox" name="includeContents" value="true" /></td></tr>
<tr><td><div style="text-align:right">timeout</div></td><td><input type="text" name="timeout" value="" /></td></tr>
<tr><td><div style="text-align:right">testDelay</div></td><td><input type="text" name="testDelay" value="" /></td></tr>
<tr><td><div style="text-align:right">notExtractBookmarksText</div></td><td><input type="checkbox" name="notExtractBookmarksText" value="true" /></td></tr>
<tr><td></td><td><input type="submit" value="Transform" /></td></tr>
</table>
</form>
</div>
<div>
<a href="/log">Log entries</a>
</div>
</body>
</html>

View File

@@ -0,0 +1,15 @@
<?xml version="1.0" encoding="UTF-8"?>
<properties>
<!-- This property, when set, will hide the start up warnings of tika for libraries are missing. -->
<!-- See https://issues.apache.org/jira/browse/TIKA-2490 -->
<service-loader initializableProblemHandler="ignore"/>
<parsers>
<!-- ATS-816: Use the PackageParser for application/vnd.apple.keynote.13 as that was used in tika-1.21-20190624-alfresco-patched -->
<parser class="org.apache.tika.parser.pkg.PackageParser">
<mime>application/vnd.apple.keynote.13</mime>
</parser>
<!-- Default parser needs to be included if the PackageParser parser is specified here, otherwise just the PackageParser is added-->
<parser class="org.apache.tika.parser.DefaultParser"/>
</parsers>
</properties>

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,647 @@
/*
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2022 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
* -
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* -
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
* -
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transform.tika;
import org.alfresco.transform.base.AbstractTransformControllerTest;
import org.alfresco.transform.base.TransformController;
import org.alfresco.transform.base.executors.RuntimeExec;
import org.alfresco.transform.base.model.FileRefEntity;
import org.alfresco.transform.base.model.FileRefResponse;
import org.alfresco.transform.base.probes.ProbeTestTransform;
import org.alfresco.transform.client.model.TransformReply;
import org.alfresco.transform.client.model.TransformRequest;
import org.apache.poi.ooxml.POIXMLProperties;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import org.mockito.Mock;
import org.mockito.stubbing.Answer;
import org.springframework.boot.test.autoconfigure.web.servlet.WebMvcTest;
import org.springframework.core.io.FileSystemResource;
import org.springframework.core.io.Resource;
import org.springframework.http.HttpHeaders;
import org.springframework.http.ResponseEntity;
import org.springframework.mock.web.MockMultipartFile;
import org.springframework.test.util.ReflectionTestUtils;
import org.springframework.test.web.servlet.MvcResult;
import org.springframework.test.web.servlet.request.MockHttpServletRequestBuilder;
import org.springframework.test.web.servlet.request.MockMvcRequestBuilders;
import org.springframework.test.web.servlet.result.MockMvcResultMatchers;
import javax.servlet.http.HttpServletRequest;
import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.IOException;
import java.util.Map;
import java.util.UUID;
import static java.nio.file.Files.readAllBytes;
import static org.alfresco.transform.common.Mimetype.MIMETYPE_HTML;
import static org.alfresco.transform.common.Mimetype.MIMETYPE_METADATA_EMBED;
import static org.alfresco.transform.common.Mimetype.MIMETYPE_OPENXML_PRESENTATION;
import static org.alfresco.transform.common.Mimetype.MIMETYPE_OPENXML_SPREADSHEET;
import static org.alfresco.transform.common.Mimetype.MIMETYPE_OPENXML_WORDPROCESSING;
import static org.alfresco.transform.common.Mimetype.MIMETYPE_OUTLOOK_MSG;
import static org.alfresco.transform.common.Mimetype.MIMETYPE_PDF;
import static org.alfresco.transform.common.Mimetype.MIMETYPE_TEXT_CSV;
import static org.alfresco.transform.common.Mimetype.MIMETYPE_TEXT_PLAIN;
import static org.alfresco.transform.common.Mimetype.MIMETYPE_WORD;
import static org.alfresco.transform.common.Mimetype.MIMETYPE_XHTML;
import static org.alfresco.transform.common.Mimetype.MIMETYPE_XML;
import static org.alfresco.transform.common.Mimetype.MIMETYPE_ZIP;
import static org.alfresco.transform.common.RequestParamMap.ENDPOINT_TRANSFORM;
import static org.alfresco.transform.common.RequestParamMap.INCLUDE_CONTENTS;
import static org.alfresco.transform.common.RequestParamMap.NOT_EXTRACT_BOOKMARKS_TEXT;
import static org.alfresco.transform.tika.transformers.Tika.ARCHIVE;
import static org.alfresco.transform.tika.transformers.Tika.CSV;
import static org.alfresco.transform.tika.transformers.Tika.DOC;
import static org.alfresco.transform.tika.transformers.Tika.DOCX;
import static org.alfresco.transform.tika.transformers.Tika.HTML;
import static org.alfresco.transform.tika.transformers.Tika.MSG;
import static org.alfresco.transform.tika.transformers.Tika.OFFICE;
import static org.alfresco.transform.tika.transformers.Tika.OOXML;
import static org.alfresco.transform.tika.transformers.Tika.OUTLOOK_MSG;
import static org.alfresco.transform.tika.transformers.Tika.PDF;
import static org.alfresco.transform.tika.transformers.Tika.PDF_BOX;
import static org.alfresco.transform.tika.transformers.Tika.POI;
import static org.alfresco.transform.tika.transformers.Tika.PPTX;
import static org.alfresco.transform.tika.transformers.Tika.TEXT_MINING;
import static org.alfresco.transform.tika.transformers.Tika.TIKA_AUTO;
import static org.alfresco.transform.tika.transformers.Tika.TXT;
import static org.alfresco.transform.tika.transformers.Tika.XHTML;
import static org.alfresco.transform.tika.transformers.Tika.XML;
import static org.alfresco.transform.tika.transformers.Tika.XSLX;
import static org.alfresco.transform.tika.transformers.Tika.ZIP;
import static org.junit.jupiter.api.Assertions.assertArrayEquals;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertNotNull;
import static org.junit.jupiter.api.Assertions.assertTrue;
import static org.mockito.ArgumentMatchers.any;
import static org.mockito.ArgumentMatchers.anyLong;
import static org.mockito.Mockito.when;
import static org.springframework.http.HttpHeaders.ACCEPT;
import static org.springframework.http.HttpHeaders.CONTENT_DISPOSITION;
import static org.springframework.http.HttpHeaders.CONTENT_TYPE;
import static org.springframework.http.HttpStatus.CREATED;
import static org.springframework.http.HttpStatus.INTERNAL_SERVER_ERROR;
import static org.springframework.http.HttpStatus.OK;
import static org.springframework.http.MediaType.APPLICATION_JSON_VALUE;
import static org.springframework.http.MediaType.APPLICATION_PDF_VALUE;
import static org.springframework.http.MediaType.TEXT_PLAIN_VALUE;
import static org.springframework.util.StringUtils.getFilenameExtension;
/**
* Test the TikaController without a server.
* Super class includes tests for the TransformController.
*/
@WebMvcTest()
public class TikaControllerTest extends AbstractTransformControllerTest
{
private static final String ENGINE_CONFIG_NAME = "tika_engine_config.json";
private static final String EXPECTED_XHTML_CONTENT_CONTAINS = "<p>The quick brown fox jumps over the lazy dog</p>";
private static final String EXPECTED_TEXT_CONTENT_CONTAINS = "The quick brown fox jumps over the lazy dog";
private static final String EXPECTED_MSG_CONTENT_CONTAINS = "Recipients\n" +
"\tmark.rogers@alfresco.com; speedy@quick.com; mrquick@nowhere.com\n" +
"\n" +
"The quick brown fox jumps over the lazy dogs";
private static final String EXPECTED_CSV_CONTENT_CONTAINS = "\"The\",\"quick\",\"brown\",\"fox\"";
@Mock
private RuntimeExec.ExecutionResult mockExecutionResult;
@Mock
private RuntimeExec mockTransformCommand;
@Mock
private RuntimeExec mockCheckCommand;
private String targetEncoding = "UTF-8";
private String targetMimetype = MIMETYPE_TEXT_PLAIN;
@BeforeEach
public void before()
{
sourceExtension = "pdf";
targetExtension = "txt";
sourceMimetype = MIMETYPE_PDF;
targetMimetype = MIMETYPE_TEXT_PLAIN;
}
@Override
public String getEngineConfigName()
{
return ENGINE_CONFIG_NAME;
}
@Override
protected void mockTransformCommand(String sourceExtension,
String targetExtension, String sourceMimetype,
boolean readTargetFileBytes) throws IOException
{
this.sourceExtension = sourceExtension;
this.targetExtension = targetExtension;
this.sourceMimetype = sourceMimetype;
expectedOptions = null;
expectedSourceSuffix = null;
expectedSourceFileBytes = readTestFile(sourceExtension);
expectedTargetFileBytes = readTargetFileBytes ? readTestFile(targetExtension) : null;
sourceFile = new MockMultipartFile("file", "quick." + sourceExtension, sourceMimetype,
expectedSourceFileBytes);
when(mockTransformCommand.execute(any(), anyLong())).thenAnswer(
(Answer<RuntimeExec.ExecutionResult>) invocation -> {
Map<String, String> actualProperties = invocation.getArgument(0);
assertEquals(3, actualProperties.size(),"There should be 3 properties");
String actualOptions = actualProperties.get("options");
String actualSource = actualProperties.get("source");
String actualTarget = actualProperties.get("target");
String actualTargetExtension = getFilenameExtension(actualTarget);
assertNotNull(actualSource);
assertNotNull(actualTarget);
if (expectedSourceSuffix != null)
{
assertTrue(actualSource.endsWith(expectedSourceSuffix),
"The source file \"" + actualSource + "\" should have ended in \"" + expectedSourceSuffix + "\"");
actualSource = actualSource.substring(0,
actualSource.length() - expectedSourceSuffix.length());
}
assertNotNull(actualOptions);
if (expectedOptions != null)
{
Assertions.assertEquals(expectedOptions, actualOptions, "expectedOptions");
}
Long actualTimeout = invocation.getArgument(1);
assertNotNull(actualTimeout);
if (expectedTimeout != null)
{
Assertions.assertEquals(expectedTimeout, actualTimeout, "expectedTimeout");
}
// Copy a test file into the target file location if it exists
int i = actualTarget.lastIndexOf('_');
if (i >= 0)
{
String testFilename = actualTarget.substring(i + 1);
File testFile = getTestFile(testFilename, false);
File targetFile = new File(actualTarget);
generateTargetFileFromResourceFile(actualTargetExtension, testFile,
targetFile);
}
// Check the supplied source file has not been changed.
byte[] actualSourceFileBytes = readAllBytes(new File(actualSource).toPath());
Assertions.assertArrayEquals(expectedSourceFileBytes, actualSourceFileBytes,
"Source file is not the same");
return mockExecutionResult;
});
when(mockExecutionResult.getExitValue()).thenReturn(0);
when(mockExecutionResult.getStdErr()).thenReturn("STDERROR");
when(mockExecutionResult.getStdOut()).thenReturn("STDOUT");
}
private void transform(String transform, String sourceExtension, String targetExtension,
String sourceMimetype, String targetMimetype,
Boolean includeContents, String expectedContentContains) throws Exception
{
// We don't use targetFileBytes as some of the transforms contain different date text based on the os being used.
mockTransformCommand(sourceExtension, targetExtension, sourceMimetype, false);
this.targetMimetype = targetMimetype;
System.out.println("Test " + transform + " " + sourceExtension + " to " + targetExtension);
MockHttpServletRequestBuilder requestBuilder = includeContents == null
? mockMvcRequest(ENDPOINT_TRANSFORM, sourceFile,
"targetExtension", this.targetExtension)
: mockMvcRequest(ENDPOINT_TRANSFORM, sourceFile,
"targetExtension", this.targetExtension, INCLUDE_CONTENTS, includeContents.toString());
MvcResult result = mockMvc.perform(requestBuilder)
.andExpect(MockMvcResultMatchers.status().is(OK.value()))
.andExpect(MockMvcResultMatchers.header().string("Content-Disposition",
"attachment; filename*= UTF-8''quick." + this.targetExtension)).
andReturn();
String content = result.getResponse().getContentAsString();
assertTrue(content.contains(expectedContentContains),
"The content did not include \"" + expectedContentContains);
}
@Override
// Add extra required parameters to the request.
protected MockHttpServletRequestBuilder mockMvcRequest(String url, MockMultipartFile sourceFile,
String... params)
{
return super.mockMvcRequest(url, sourceFile, params)
.param("targetEncoding", targetEncoding)
.param("targetMimetype", targetMimetype)
.param("sourceMimetype", sourceMimetype);
}
@Mock
HttpServletRequest httpServletRequest;
@Test
public void testImmutableEmptyMap()
{
// See ACS-373
TransformController controller = getController();
ProbeTestTransform probeTestTransform = getProbeTestTransform();
ReflectionTestUtils.setField(probeTestTransform, "livenessTransformEnabled", true);
probeTestTransform.doTransformOrNothing(httpServletRequest, true, controller);
}
@Test
@Override
public void simpleTransformTest() throws Exception
{
mockTransformCommand(PDF, TXT, MIMETYPE_PDF, true);
super.simpleTransformTest();
}
@Test
@Override
public void testDelayTest() throws Exception
{
mockTransformCommand(PDF, TXT, MIMETYPE_PDF, true);
super.testDelayTest();
}
@Test
@Override
public void noTargetFileTest()
{
// Ignore the test in super class as the Tika transforms are real rather than mocked up.
// It is the mock that returns a zero length file for other transformers, when we supply an invalid targetExtension.
}
// --- Super class tests (need modified setup) ---
@Test
@Override
public void dotDotSourceFilenameTest() throws Exception
{
mockTransformCommand(PDF, TXT, MIMETYPE_PDF, true);
super.dotDotSourceFilenameTest();
}
@Test
@Override
public void noExtensionSourceFilenameTest() throws Exception
{
mockTransformCommand(PDF, TXT, MIMETYPE_PDF, true);
super.noExtensionSourceFilenameTest();
}
@Test
@Override
public void badSourceFilenameTest() throws Exception
{
mockTransformCommand(PDF, TXT, MIMETYPE_PDF, true);
super.badSourceFilenameTest();
}
@Test
@Override
public void blankSourceFilenameTest() throws Exception
{
mockTransformCommand(PDF, TXT, MIMETYPE_PDF, true);
super.blankSourceFilenameTest();
}
@Test
@Override
public void noTargetExtensionTest() throws Exception
{
mockTransformCommand(PDF, TXT, MIMETYPE_PDF, true);
super.noTargetExtensionTest();
}
@Test
@Override
public void calculateMaxTime() throws Exception
{
mockTransformCommand(PDF, TXT, MIMETYPE_PDF, true);
super.calculateMaxTime();
}
// --- General Tika tests ---
@Test
public void badEncodingTest() throws Exception
{
mockTransformCommand(PDF, TXT, MIMETYPE_PDF, true);
targetEncoding = "rubbish";
mockMvc.perform(
mockMvcRequest(ENDPOINT_TRANSFORM, sourceFile, "targetExtension", targetExtension))
.andExpect(MockMvcResultMatchers.status().is(INTERNAL_SERVER_ERROR.value()));
}
// --- Archive ---
@Test
public void zipToTextArchiveTest() throws Exception
{
transform(ARCHIVE, ZIP, TXT, MIMETYPE_ZIP, MIMETYPE_TEXT_PLAIN, false,
"quick.html\n" +
"\n" +
"\n" +
"quick.pdf\n" +
"\n" +
"\n");
}
@Test
public void zipToTextIncludeArchiveTest() throws Exception
{
transform(ARCHIVE, ZIP, TXT, MIMETYPE_ZIP, MIMETYPE_TEXT_PLAIN, true,
"quick.html\n" +
"\n" +
"\n" +
"The quick brown fox jumps over the lazy dog\n" +
"\n" +
"\n" +
"\n" +
"quick.pdf\n" +
"\n" +
"\n" +
"The quick brown fox jumps over the lazy dog" +
"\n" +
"\n");
}
@Test
public void zipToTextExcludeArchiveTest() throws Exception
{
transform(ARCHIVE, ZIP, TXT, MIMETYPE_ZIP, MIMETYPE_TEXT_PLAIN,
false, "\n" +
"folder/subfolder/quick.jpg\n" +
"\n" +
"\n" +
"quick.doc\n" +
"\n" +
"\n" +
"quick.html\n" +
"\n" +
"\n" +
"quick.pdf\n" +
"\n" +
"\n" +
"quick.txt\n" +
"\n" +
"\n" +
"quick.xml\n" +
"\n");
}
// --- OutlookMsg ---
@Test
public void msgToTxtOutlookMsgTest() throws Exception
{
transform(OUTLOOK_MSG, MSG, TXT, MIMETYPE_OUTLOOK_MSG, MIMETYPE_TEXT_PLAIN, null,
EXPECTED_MSG_CONTENT_CONTAINS);
}
// --- PdfBox ---
@Test
public void pdfToTxtPdfBoxTest() throws Exception
{
transform(PDF_BOX, PDF, TXT, MIMETYPE_PDF, MIMETYPE_TEXT_PLAIN, null,
EXPECTED_TEXT_CONTENT_CONTAINS);
}
@Test
public void pdfToCsvPdfBoxTest() throws Exception
{
transform(PDF_BOX, PDF, CSV, MIMETYPE_PDF, MIMETYPE_TEXT_CSV, null,
EXPECTED_TEXT_CONTENT_CONTAINS); // Yes it is just text
}
@Test
public void pdfToXmlPdfBoxTest() throws Exception
{
transform(PDF_BOX, PDF, XML, MIMETYPE_PDF, MIMETYPE_XML, null,
EXPECTED_XHTML_CONTENT_CONTAINS); // Yes it is just XHTML
}
@Test
public void pdfToXhtmlPdfBoxTest() throws Exception
{
transform(PDF_BOX, PDF, XHTML, MIMETYPE_PDF, MIMETYPE_XHTML, null,
EXPECTED_XHTML_CONTENT_CONTAINS);
}
@Test
public void pdfToHtmlPdfBoxTest() throws Exception
{
transform(PDF_BOX, PDF, HTML, MIMETYPE_PDF, MIMETYPE_HTML, null,
EXPECTED_XHTML_CONTENT_CONTAINS); // Yes it is just XHTML
}
// --- Office ---
@Test
public void msgToTxtOfficeTest() throws Exception
{
transform(OFFICE, MSG, TXT, MIMETYPE_OUTLOOK_MSG, MIMETYPE_TEXT_PLAIN, null,
EXPECTED_MSG_CONTENT_CONTAINS);
}
@Test
public void docToTxtOfficeTest() throws Exception
{
transform(OFFICE, DOC, TXT, MIMETYPE_WORD, MIMETYPE_TEXT_PLAIN, null,
EXPECTED_TEXT_CONTENT_CONTAINS);
}
// --- Poi ---
@Test
public void xslxToCsvPoiTest() throws Exception
{
transform(POI, XSLX, CSV, MIMETYPE_OPENXML_SPREADSHEET, MIMETYPE_TEXT_CSV, null,
EXPECTED_CSV_CONTENT_CONTAINS);
}
// --- OOXML ---
@Test
public void docxToTxtOoXmlTest() throws Exception
{
transform(OOXML, DOCX, TXT, MIMETYPE_OPENXML_WORDPROCESSING, MIMETYPE_TEXT_PLAIN, null,
EXPECTED_TEXT_CONTENT_CONTAINS);
}
@Test
public void pptxToTxtOoXmlTest() throws Exception
{
transform(OOXML, PPTX, TXT, MIMETYPE_OPENXML_PRESENTATION, MIMETYPE_TEXT_PLAIN, null,
EXPECTED_TEXT_CONTENT_CONTAINS);
}
// --- TikaAuto ---
@Test
public void ppxtToTxtTikaAutoTest() throws Exception
{
transform(TIKA_AUTO, PPTX, TXT, MIMETYPE_OPENXML_PRESENTATION, MIMETYPE_TEXT_PLAIN, null,
EXPECTED_TEXT_CONTENT_CONTAINS);
}
@Test
public void doctToTxtTikaAutoTest() throws Exception
{
transform(TIKA_AUTO, DOCX, TXT, MIMETYPE_OPENXML_WORDPROCESSING, MIMETYPE_TEXT_PLAIN, null,
EXPECTED_TEXT_CONTENT_CONTAINS);
}
// --- TextMining ---
@Test
public void docToTxtTextMiningTest() throws Exception
{
transform(TEXT_MINING, DOC, TXT, MIMETYPE_WORD, MIMETYPE_TEXT_PLAIN, null,
EXPECTED_TEXT_CONTENT_CONTAINS);
}
@Test
public void xlsxEmbedTest() throws Exception
{
mockTransformCommand(XSLX, XSLX, MIMETYPE_OPENXML_SPREADSHEET, false);
String metadata =
"{\"{http://www.alfresco.org/model/content/1.0}author\":\"author1\"," +
"\"{http://www.alfresco.org/model/content/1.0}title\":\"title1\"," +
"\"{http://www.alfresco.org/model/content/1.0}description\":[\"desc1\",\"desc2\"]," +
"\"{http://www.alfresco.org/model/content/1.0}created\":\"created1\"}";
MockHttpServletRequestBuilder requestBuilder =
super.mockMvcRequest(ENDPOINT_TRANSFORM, sourceFile,
"targetExtension", XSLX,
"metadata", metadata,
"targetMimetype", MIMETYPE_METADATA_EMBED,
"sourceMimetype", MIMETYPE_OPENXML_SPREADSHEET);
MvcResult result = mockMvc.perform(requestBuilder)
.andExpect(MockMvcResultMatchers.status().is(OK.value()))
.andExpect(MockMvcResultMatchers.header().string("Content-Disposition",
"attachment; filename*= UTF-8''quick." + targetExtension)).
andReturn();
byte[] bytes = result.getResponse().getContentAsByteArray();
ByteArrayInputStream inputStream = new ByteArrayInputStream(bytes);
XSSFWorkbook workbook = new XSSFWorkbook(inputStream);
POIXMLProperties props = workbook.getProperties();
POIXMLProperties.CoreProperties coreProp = props.getCoreProperties();
POIXMLProperties.CustomProperties custProp = props.getCustomProperties();
assertEquals("author1", coreProp.getCreator());
assertEquals("title1", coreProp.getTitle());
assertEquals("desc1, desc2", coreProp.getDescription()); // multi value
assertEquals("created1", custProp.getProperty("created").getLpwstr());
}
@Test
public void pdfToTxtExtractBookmarksTest() throws Exception
{
mockTransformCommand(PDF, TXT, MIMETYPE_PDF, true);
mockMvc.perform(
mockMvcRequest(ENDPOINT_TRANSFORM, sourceFile, "targetExtension", targetExtension).param(
NOT_EXTRACT_BOOKMARKS_TEXT, "true"))
.andExpect(MockMvcResultMatchers.status().is(OK.value()))
.andExpect(MockMvcResultMatchers.header().string("Content-Disposition",
"attachment; filename*= UTF-8''quick." + targetExtension));
}
@Override
protected void updateTransformRequestWithSpecificOptions(TransformRequest transformRequest)
{
transformRequest.setSourceExtension(sourceExtension);
transformRequest.setTargetExtension(targetExtension);
transformRequest.setSourceMediaType(APPLICATION_PDF_VALUE);
transformRequest.setTargetMediaType(TEXT_PLAIN_VALUE);
transformRequest.getTransformRequestOptions().put("targetEncoding", "UTF-8");
}
@Test
public void testPojoTransform() throws Exception
{
// Files
String sourceFileRef = UUID.randomUUID().toString();
File sourceFile = getTestFile("quick." + sourceExtension, true);
String targetFileRef = UUID.randomUUID().toString();
TransformRequest transformRequest = createTransformRequest(sourceFileRef, sourceFile);
// HTTP Request
HttpHeaders headers = new HttpHeaders();
headers.set(CONTENT_DISPOSITION, "attachment; filename=quick." + sourceExtension);
ResponseEntity<Resource> response = new ResponseEntity<>(new FileSystemResource(
sourceFile), headers, OK);
when(alfrescoSharedFileStoreClient.retrieveFile(sourceFileRef)).thenReturn(response);
when(alfrescoSharedFileStoreClient.saveFile(any()))
.thenReturn(new FileRefResponse(new FileRefEntity(targetFileRef)));
when(mockExecutionResult.getExitValue()).thenReturn(0);
// Update the Transformation Request with any specific params before sending it
updateTransformRequestWithSpecificOptions(transformRequest);
// Serialize and call the transformer
String tr = objectMapper.writeValueAsString(transformRequest);
String transformationReplyAsString = mockMvc
.perform(MockMvcRequestBuilders
.post(ENDPOINT_TRANSFORM)
.header(ACCEPT, APPLICATION_JSON_VALUE)
.header(CONTENT_TYPE, APPLICATION_JSON_VALUE)
.content(tr))
.andExpect(MockMvcResultMatchers.status().is(CREATED.value()))
.andReturn().getResponse().getContentAsString();
TransformReply transformReply = objectMapper.readValue(transformationReplyAsString,
TransformReply.class);
// Assert the reply
assertEquals(transformRequest.getRequestId(), transformReply.getRequestId());
assertEquals(transformRequest.getClientData(), transformReply.getClientData());
assertEquals(transformRequest.getSchema(), transformReply.getSchema());
}
@Test
@Override
public void httpTransformRequestUsingDirectAccessUrlTest() throws Exception
{
expectedTargetFileBytes = readTestFile(targetExtension);
super.httpTransformRequestUsingDirectAccessUrlTest();
}
}

View File

@@ -0,0 +1,69 @@
/*
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2022 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
* -
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* -
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
* -
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transform.tika;
import org.alfresco.transform.base.AbstractHttpRequestTest;
import org.springframework.boot.test.context.SpringBootTest;
import org.springframework.boot.test.context.SpringBootTest.WebEnvironment;
import org.springframework.util.LinkedMultiValueMap;
/**
* Tests TikaController with a server test harness.
*/
@SpringBootTest(webEnvironment = WebEnvironment.RANDOM_PORT)
public class TikaHttpRequestTest extends AbstractHttpRequestTest
{
@Override
protected String getTransformerName()
{
return "Tika";
}
@Override
protected String getSourceExtension()
{
return "pdf";
}
// Override method as Tika requires sourceMimetype
// If not provided then sourceMimetype request parameter error will be thrown.
@Override
protected void assertTransformError(boolean addFile,
String errorMessage,
LinkedMultiValueMap<String, Object> additionalParams)
{
LinkedMultiValueMap<String, Object> parameters = new LinkedMultiValueMap<>();
parameters.add("sourceMimetype", "application/pdf");
if (additionalParams != null)
{
parameters.addAll(additionalParams);
}
super.assertTransformError(addFile, errorMessage, parameters);
}
}

View File

@@ -0,0 +1,577 @@
/*
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2022 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
* -
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* -
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
* -
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transform.tika;
import static org.alfresco.transform.common.Mimetype.MIMETYPE_APP_DWG;
import static org.alfresco.transform.common.Mimetype.MIMETYPE_OUTLOOK_MSG;
import static org.alfresco.transform.base.TestFileInfo.testFile;
import static org.alfresco.transform.common.Mimetype.MIMETYPE_AUDIO_MP4;
import static org.alfresco.transform.common.Mimetype.MIMETYPE_EXCEL;
import static org.alfresco.transform.common.Mimetype.MIMETYPE_IMAGE_BMP;
import static org.alfresco.transform.common.Mimetype.MIMETYPE_IMAGE_GIF;
import static org.alfresco.transform.common.Mimetype.MIMETYPE_IMAGE_JPEG;
import static org.alfresco.transform.common.Mimetype.MIMETYPE_IMAGE_PNG;
import static org.alfresco.transform.common.Mimetype.MIMETYPE_IMAGE_TIFF;
import static org.alfresco.transform.common.Mimetype.MIMETYPE_IWORK_KEYNOTE;
import static org.alfresco.transform.common.Mimetype.MIMETYPE_IWORK_NUMBERS;
import static org.alfresco.transform.common.Mimetype.MIMETYPE_IWORK_PAGES;
import static org.alfresco.transform.common.Mimetype.MIMETYPE_MP3;
import static org.alfresco.transform.common.Mimetype.MIMETYPE_OPENDOCUMENT_FORMULA;
import static org.alfresco.transform.common.Mimetype.MIMETYPE_OPENDOCUMENT_GRAPHICS;
import static org.alfresco.transform.common.Mimetype.MIMETYPE_OPENDOCUMENT_GRAPHICS_TEMPLATE;
import static org.alfresco.transform.common.Mimetype.MIMETYPE_OPENDOCUMENT_PRESENTATION;
import static org.alfresco.transform.common.Mimetype.MIMETYPE_OPENDOCUMENT_PRESENTATION_TEMPLATE;
import static org.alfresco.transform.common.Mimetype.MIMETYPE_OPENDOCUMENT_SPREADSHEET;
import static org.alfresco.transform.common.Mimetype.MIMETYPE_OPENDOCUMENT_SPREADSHEET_TEMPLATE;
import static org.alfresco.transform.common.Mimetype.MIMETYPE_OPENDOCUMENT_TEXT;
import static org.alfresco.transform.common.Mimetype.MIMETYPE_OPENDOCUMENT_TEXT_TEMPLATE;
import static org.alfresco.transform.common.Mimetype.MIMETYPE_OPENOFFICE1_WRITER;
import static org.alfresco.transform.common.Mimetype.MIMETYPE_OPENXML_PRESENTATION;
import static org.alfresco.transform.common.Mimetype.MIMETYPE_OPENXML_SPREADSHEET;
import static org.alfresco.transform.common.Mimetype.MIMETYPE_OPENXML_WORDPROCESSING;
import static org.alfresco.transform.common.Mimetype.MIMETYPE_PDF;
import static org.alfresco.transform.common.Mimetype.MIMETYPE_PPT;
import static org.alfresco.transform.common.Mimetype.MIMETYPE_TEXT_PLAIN;
import static org.alfresco.transform.common.Mimetype.MIMETYPE_VIDEO_3GP;
import static org.alfresco.transform.common.Mimetype.MIMETYPE_VIDEO_3GP2;
import static org.alfresco.transform.common.Mimetype.MIMETYPE_VIDEO_FLV;
import static org.alfresco.transform.common.Mimetype.MIMETYPE_VIDEO_MP4;
import static org.alfresco.transform.common.Mimetype.MIMETYPE_VIDEO_QUICKTIME;
import static org.alfresco.transform.common.Mimetype.MIMETYPE_VISIO;
import static org.alfresco.transform.common.Mimetype.MIMETYPE_VORBIS;
import static org.alfresco.transform.common.Mimetype.MIMETYPE_WORD;
import static org.alfresco.transform.common.Mimetype.MIMETYPE_XML;
import static org.alfresco.transform.common.Mimetype.MIMETYPE_ZIP;
import static org.alfresco.transform.common.Mimetype.MIMETYPE_IMAGE_RAW_RAF;
import static org.alfresco.transform.common.Mimetype.MIMETYPE_IMAGE_RAW_ARW;
import static org.alfresco.transform.common.Mimetype.MIMETYPE_IMAGE_RAW_CR2;
import static org.alfresco.transform.common.Mimetype.MIMETYPE_IMAGE_RAW_RW2;
import static org.alfresco.transform.common.Mimetype.MIMETYPE_IMAGE_RAW_NEF;
import java.util.stream.Stream;
import org.alfresco.transform.base.AbstractMetadataExtractsIT;
import org.alfresco.transform.base.TestFileInfo;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.MethodSource;
/**
* Metadata integration tests in the Tika T-Engine.
*
* @author adavis
* @author dedwards
*/
public class TikaMetadataExtractsIT extends AbstractMetadataExtractsIT
{
@ParameterizedTest
@MethodSource("engineTransformations")
@Override
public void testTransformation(TestFileInfo testFileInfo)
{
super.testTransformation(testFileInfo);
}
private static Stream<TestFileInfo> engineTransformations()
{
// The following files are the ones tested in the content repository.
// There are many more mimetypes supported by these extractors.
// Where a line has been commented out, the repository code tries to test it but stops because there is
// either no quick file or the target extension has not been registered.
return Stream.of(
//IPTCMetadataExtractor
TestFileInfo.testFile(MIMETYPE_IMAGE_JPEG, "jpg", "quick.jpg"),
TestFileInfo.testFile(MIMETYPE_IMAGE_JPEG, "jpg", "quickIPTC-EXT.jpg"),
TestFileInfo.testFile(MIMETYPE_IMAGE_JPEG, "jpg", "quickIPTC-multi-creator.jpg"),
TestFileInfo.testFile(MIMETYPE_IMAGE_JPEG, "jpg", "testJPEG_IPTC_EXT.jpg"),
TestFileInfo.testFile(MIMETYPE_IMAGE_GIF, "gif", "quickIPTC.gif"),
TestFileInfo.testFile(MIMETYPE_IMAGE_PNG, "png", "quickIPTC.png"),
TestFileInfo.testFile(MIMETYPE_IMAGE_RAW_ARW, "arw", "20140614_163822_Photogrpahy_Class.ARW"),
TestFileInfo.testFile(MIMETYPE_IMAGE_RAW_CR2, "cr2", "20141227_134519_Palace.CR2"),
TestFileInfo.testFile(MIMETYPE_IMAGE_RAW_RW2, "rw2", "20140629_145035_Flower.RW2"),
TestFileInfo.testFile(MIMETYPE_IMAGE_RAW_NEF, "nef", "20150408_074941_Bush.NEF"),
TestFileInfo.testFile(MIMETYPE_IMAGE_RAW_RAF, "raf", "20160502_190928_London_Underground.RAF"),
// DWGMetadataExtractor
TestFileInfo.testFile(MIMETYPE_APP_DWG, "dwg", "quick2010CustomProps.dwg"),
// MailMetadataExtractor
TestFileInfo.testFile(MIMETYPE_OUTLOOK_MSG, "msg", "quick.msg"),
// MP3MetadataExtractor
TestFileInfo.testFile(MIMETYPE_MP3, "mp3", "quick.mp3"),
// OfficeMetadataExtractor
TestFileInfo.testFile(MIMETYPE_WORD, "doc", "quick.doc"),
//testFile("application/x-tika-msoffice-embedded; format=ole10_native", "", ""),
TestFileInfo.testFile(MIMETYPE_VISIO, "vsd", "quick.vsd"),
//testFile("application/vnd.ms-project", "mpp", ""),
//testFile("application/x-tika-msworks-spreadsheet", "", ""),
//testFile("application/x-mspublisher", "", ""),
TestFileInfo.testFile(MIMETYPE_PPT, "ppt", "quick.ppt"),
//testFile("application/x-tika-msoffice", "", ""),
//testFile(MIMETYPE_VISIO_2013, "vsdx", ""),
//testFile("application/sldworks", "", ""),
//testFile(MIMETYPE_ENCRYPTED_OFFICE, "", ""),
TestFileInfo.testFile(MIMETYPE_EXCEL, "xls", "quick.xls"),
// OpenDocumentMetadataExtractor
//testFile("application/x-vnd.oasis.opendocument.presentation", "", ""),
//testFile(MIMETYPE_OPENDOCUMENT_CHART, "odc", ""),
//testFile(MIMETYPE_OPENDOCUMENT_IMAGE_TEMPLATE, "", ""),
//testFile("application/x-vnd.oasis.opendocument.text-web", "", ""),
//testFile("application/x-vnd.oasis.opendocument.image", "", ""),
TestFileInfo.testFile(MIMETYPE_OPENDOCUMENT_GRAPHICS_TEMPLATE, "otg", "quick.otg"),
//testFile(MIMETYPE_OPENDOCUMENT_TEXT_WEB, "oth", ""),
//testFile("application/x-vnd.oasis.opendocument.spreadsheet-template", "", ""),
TestFileInfo.testFile(MIMETYPE_OPENDOCUMENT_SPREADSHEET_TEMPLATE, "ots", "quick.ots"),
TestFileInfo.testFile(MIMETYPE_OPENOFFICE1_WRITER, "sxw", "quick.sxw"),
//testFile("application/x-vnd.oasis.opendocument.graphics-template", "", ""),
TestFileInfo.testFile(MIMETYPE_OPENDOCUMENT_GRAPHICS, "odg", "quick.odg"),
TestFileInfo.testFile(MIMETYPE_OPENDOCUMENT_SPREADSHEET, "ods", "quick.ods"),
//testFile("application/x-vnd.oasis.opendocument.chart", "", ""),
//testFile("application/x-vnd.oasis.opendocument.spreadsheet", "", ""),
//testFile(MIMETYPE_OPENDOCUMENT_IMAGE, "odi", ""),
//testFile("application/x-vnd.oasis.opendocument.text", "", ""),
//testFile("application/x-vnd.oasis.opendocument.text-template", "", ""),
//testFile("application/vnd.oasis.opendocument.formula-template", "", ""),
//testFile("application/x-vnd.oasis.opendocument.formula", "", ""),
//testFile("application/vnd.oasis.opendocument.image-template", "", ""),
//testFile("application/x-vnd.oasis.opendocument.image-template", "", ""),
//testFile("application/x-vnd.oasis.opendocument.presentation-template", "", ""),
TestFileInfo.testFile(MIMETYPE_OPENDOCUMENT_PRESENTATION_TEMPLATE, "otp", "quick.otp"),
TestFileInfo.testFile(MIMETYPE_OPENDOCUMENT_TEXT, "odt", "quick.odt"),
//testFile(MIMETYPE_OPENDOCUMENT_FORMULA_TEMPLATE, "", ""),
TestFileInfo.testFile(MIMETYPE_OPENDOCUMENT_TEXT_TEMPLATE, "ott", "quick.ott"),
//testFile("application/vnd.oasis.opendocument.chart-template", "", ""),
//testFile("application/x-vnd.oasis.opendocument.chart-template", "", ""),
//testFile("application/x-vnd.oasis.opendocument.formula-template", "", ""),
//testFile(MIMETYPE_OPENDOCUMENT_DATABASE, "odb", ""),
//testFile("application/x-vnd.oasis.opendocument.text-master", "", ""),
TestFileInfo.testFile(MIMETYPE_OPENDOCUMENT_PRESENTATION, "odp", "quick.odp"),
//testFile(MIMETYPE_OPENDOCUMENT_CHART_TEMPLATE, "", ""),
//testFile("application/x-vnd.oasis.opendocument.graphics", "", ""),
TestFileInfo.testFile(MIMETYPE_OPENDOCUMENT_FORMULA, "odf", "quick.odf"),
//testFile(MIMETYPE_OPENDOCUMENT_TEXT_MASTER, "odm", ""),
// PdfBoxMetadataExtractor
TestFileInfo.testFile(MIMETYPE_PDF, "pdf", "quick.pdf"),
//testFile(MIMETYPE_APPLICATION_ILLUSTRATOR, "ai", ""),
// PoiMetadataExtractor
//testFile(MIMETYPE_OPENXML_PRESENTATION_TEMPLATE_MACRO, "potm", ""),
//testFile(MIMETYPE_OPENXML_SPREADSHEET_ADDIN_MACRO, "xlam", ""),
//testFile(MIMETYPE_OPENXML_WORD_TEMPLATE, "dotx", ""),
//testFile(MIMETYPE_OPENXML_SPREADSHEET_BINARY_MACRO, "xlsb", ""),
TestFileInfo.testFile(MIMETYPE_OPENXML_WORDPROCESSING, "docx", "quick.docx"),
//testFile(MIMETYPE_OPENXML_PRESENTATION_SLIDE_MACRO, "sldm", ""),
//testFile("application/vnd.ms-visio.drawing", "", ""),
//testFile(MIMETYPE_OPENXML_PRESENTATION_SLIDESHOW_MACRO, "ppsm", ""),
//testFile(MIMETYPE_OPENXML_PRESENTATION_MACRO, "pptm", ""),
//testFile(MIMETYPE_OPENXML_PRESENTATION_SLIDE, "sldx", ""),
//testFile(MIMETYPE_OPENXML_SPREADSHEET_MACRO, "xlsm", ""),
//testFile(MIMETYPE_OPENXML_WORD_TEMPLATE_MACRO, "dotm", ""),
//testFile(MIMETYPE_OPENXML_WORDPROCESSING_MACRO, "docm", ""),
//testFile(MIMETYPE_OPENXML_PRESENTATION_ADDIN, "ppam", ""),
//testFile(MIMETYPE_OPENXML_SPREADSHEET_TEMPLATE, "xltx", ""),
//testFile("application/vnd.ms-xpsdocument", "", ""),
//testFile("application/vnd.ms-visio.drawing.macroenabled.12", "", ""),
//testFile("application/vnd.ms-visio.template.macroenabled.12", "", ""),
//testFile("model/vnd.dwfx+xps", "", ""),
//testFile(MIMETYPE_OPENXML_PRESENTATION_TEMPLATE, "potx", ""),
TestFileInfo.testFile(MIMETYPE_OPENXML_PRESENTATION, "pptx", "quick.pptx"),
TestFileInfo.testFile(MIMETYPE_OPENXML_SPREADSHEET, "xlsx", "quick.xlsx"),
//testFile("application/vnd.ms-visio.stencil", "", ""),
//testFile("application/vnd.ms-visio.template", "", ""),
//testFile(MIMETYPE_OPENXML_PRESENTATION_SLIDESHOW, "ppsx", ""),
//testFile("application/vnd.ms-visio.stencil.macroenabled.12", "", ""),
//testFile(MIMETYPE_OPENXML_SPREADSHEET_TEMPLATE_MACRO, "xltm", ""),
// TikaAudioMetadataExtractor
TestFileInfo.testFile("video/x-m4v", "m4v", "quick.m4v"),
//testFile("audio/x-oggflac", "", ""),
//testFile("application/mp4", "", ""),
TestFileInfo.testFile(MIMETYPE_VORBIS, "ogg", "quick.ogg"),
TestFileInfo.testFile(MIMETYPE_VIDEO_3GP, "3gp", "quick.3gp"),
//testFile(MIMETYPE_FLAC, "flac", ""),
TestFileInfo.testFile(MIMETYPE_VIDEO_3GP2, "3g2", "quick.3g2"),
TestFileInfo.testFile(MIMETYPE_VIDEO_QUICKTIME, "mov", "quick.mov"),
TestFileInfo.testFile(MIMETYPE_AUDIO_MP4, "m4a", "quick.m4a"),
TestFileInfo.testFile(MIMETYPE_VIDEO_MP4, "mp4", "quick.mp4"),
// TikaAutoMetadataExtractor
// The following <source>_metadata.json files contain null values against author and title.
// This is not new and will be the case in the content repository, but was not tested.
//
// The expected ones are: txt, xml, zip, tar
//
// The unexpected ones are: quick.key, quick.numbers and quick.pages.
//
// quick.bmp, quick.gif, quick.png, quick.3g2, quick.3gp, quick.flv, quick.m4v, quick.mov & quick.mp4
// contain one or more values, but also include nulls. Again this may be correct, a bug or just the
// example quick file rather than a problem with the extractor.
//testFile("application/vnd.ms-htmlhelp", "", ""),
//testFile(MIMETYPE_ATOM, "", ""),
//testFile("audio/midi", "", ""),
//testFile("application/aaigrid", "", ""),
//testFile("application/x-bag", "", ""),
TestFileInfo.testFile(MIMETYPE_IWORK_KEYNOTE, "key", "quick.key"),
//testFile("application/x-quattro-pro; version=9", "", ""),
//testFile("application/x-ibooks+zip", "", ""),
//testFile("audio/wave", "", ""),
//testFile("application/x-midi", "", ""),
TestFileInfo.testFile(MIMETYPE_XML, "xml", "quick.xml"),
//testFile(MIMETYPE_RSS, "rss", ""),
//testFile("application/x-netcdf", "cdf", ""),
//testFile("video/x-daala", "", ""),
//testFile("application/matlab-mat", "", ""),
//testFile("audio/aiff", "", ""),
//testFile("application/jaxa-pal-sar", "", ""),
//testFile("image/x-pcraster", "", ""),
//testFile("image/arg", "", ""),
//testFile("application/x-kro", "", ""),
//testFile("image/x-hdf5-image", "", ""),
//testFile("audio/speex", "", ""),
//testFile("image/big-gif", "", ""),
//testFile("application/zlib", "", ""),
//testFile("application/x-cosar", "", ""),
//testFile("application/x-ntv2", "", ""),
//testFile("application/x-archive", "", ""),
//testFile("application/java-archive", "jar", ""),
//testFile("application/x-vnd.sun.xml.writer", "", ""),
//testFile("application/x-gmt", "", ""),
//testFile("application/x-xml", "", ""),
//testFile("application/gzip-compressed", "", ""),
//testFile("image/ida", "", ""),
//testFile("text/x-groovy", "", ""),
//testFile("image/x-emf", "", ""),
//testFile("application/x-rar", "", ""),
//testFile("image/sar-ceos", "", ""),
//testFile("application/acad", "", ""),
TestFileInfo.testFile(MIMETYPE_ZIP, "zip", "quick.zip"),
//testFile(MIMETYPE_IMAGE_PSD, "psd", ""),
//testFile("application/x-sharedlib", "", ""),
//testFile("audio/x-m4a", "", ""),
//testFile("image/webp", "", ""),
//testFile("application/vnd.wap.xhtml+xml", "", ""),
//testFile("audio/x-aiff", "aiff", ""),
//testFile("application/vnd.ms-spreadsheetml", "", ""),
//testFile("image/x-airsar", "", ""),
//testFile("application/x-pcidsk", "", ""),
//testFile("application/x-java-pack200", "", ""),
//testFile("image/x-fujibas", "", ""),
//testFile("application/x-zmap", "", ""),
//testFile("image/x-bmp", "", ""),
//testFile("image/bpg", "", ""),
//testFile(MIMETYPE_RTF, "rtf", ""),
//testFile("application/x-xz", "", ""),
//testFile("application/x-speex", "", ""),
//testFile("audio/ogg; codecs=speex", "", ""),
//testFile("application/x-l1b", "", ""),
//testFile("application/x-gsbg", "", ""),
//testFile("application/x-sdat", "", ""),
//testFile("application/vnd.ms-visio", "", ""),
//testFile("application/x-coredump", "", ""),
//testFile("application/x-msaccess", "", ""),
//testFile("application/x-dods", "", ""),
TestFileInfo.testFile(MIMETYPE_IMAGE_PNG, "png", "quick.png"),
//testFile("application/vnd.ms-outlook-pst", "", ""),
//testFile("image/bsb", "", ""),
//testFile("application/x-cpio", "cpio", ""),
//testFile("audio/ogg", "oga", ""),
TestFileInfo.testFile("application/x-tar", "tar", "quick.tar"),
//testFile("application/x-dbf", "", ""),
//testFile("video/x-ogm", "", ""),
//testFile("application/x-los-las", "", ""),
//testFile("application/autocad_dwg", "", ""),
//testFile("application/vnd.ms-excel.workspace.3", "", ""),
//testFile("application/vnd.ms-excel.workspace.4", "", ""),
//testFile("image/x-bpg", "", ""),
//testFile("gzip/document", "", ""),
//testFile("text/x-java", "", ""),
//testFile("application/x-brotli", "", ""),
//testFile("application/elas", "", ""),
//testFile("image/x-jb2", "", ""),
//testFile("application/x-cappi", "", ""),
//testFile("application/epub+zip", "", ""),
//testFile("application/x-ace2", "", ""),
//testFile("application/x-sas-data", "", ""),
//testFile("application/x-hdf", "hdf", ""),
//testFile("image/x-mff", "", ""),
//testFile("image/x-srp", "", ""),
TestFileInfo.testFile(MIMETYPE_IMAGE_BMP, "bmp", "quick.bmp"),
//testFile("video/x-ogguvs", "", ""),
//testFile("drawing/dwg", "", ""),
//testFile("application/x-doq2", "", ""),
//testFile("application/x-acad", "", ""),
//testFile("application/x-kml", "", ""),
//testFile("application/x-autocad", "", ""),
//testFile("image/x-mff2", "", ""),
//testFile("application/x-snodas", "", ""),
//testFile("application/terragen", "", ""),
//testFile("application/x-wcs", "", ""),
//testFile("text/x-c++src", "", ""),
//testFile("application/timestamped-data", "", ""),
TestFileInfo.testFile(MIMETYPE_IMAGE_TIFF, "tiff", "quick.tiff"),
//testFile("application/msexcel", "", ""),
//testFile("application/x-asp", "", ""),
//testFile("application/x-rar-compressed", "rar", ""),
//testFile("application/x-envi-hdr", "", ""),
//testFile("text/iso19139+xml", "", ""),
//testFile("application/vnd.ms-tnef", "", ""),
//testFile("application/x-ecrg-toc", "", ""),
//testFile("application/aig", "", ""),
//testFile("audio/x-wav", "wav", ""),
//testFile("image/emf", "", ""),
//testFile("application/x-bzip", "", ""),
//testFile("application/jdem", "", ""),
//testFile("application/x-webp", "", ""),
//testFile("application/x-arj", "", ""),
//testFile("application/x-lzma", "", ""),
//testFile("application/x-java-vm", "", ""),
//testFile("image/envisat", "", ""),
//testFile("application/x-doq1", "", ""),
//testFile("audio/vnd.wave", "", ""),
//testFile("application/x-ppi", "", ""),
//testFile("image/ilwis", "", ""),
//testFile("application/x-gunzip", "", ""),
//testFile("image/x-icon", "", ""),
//testFile("application/ogg", "ogx", ""),
//testFile(MIMETYPE_IMAGE_SVG, "svg", ""),
//testFile("application/x-ms-owner", "", ""),
//testFile("application/x-grib", "", ""),
//testFile("application/ms-tnef", "", ""),
//testFile("image/fits", "", ""),
//testFile("audio/x-mpeg", "", ""),
//testFile("application/x-bzip2", "", ""),
//testFile("text/tsv", "", ""),
//testFile("application/x-fictionbook+xml", "", ""),
//testFile("application/x-p-aux", "", ""),
//testFile("application/x-font-ttf", "", ""),
//testFile("image/x-xcf", "", ""),
//testFile("image/x-ms-bmp", "", ""),
//testFile("image/wmf", "", ""),
//testFile("image/eir", "", ""),
//testFile("application/x-matlab-data", "", ""),
//testFile("application/deflate64", "", ""),
//testFile("audio/wav", "", ""),
//testFile("application/x-rs2", "", ""),
//testFile("application/vnd.ms-word", "", ""),
//testFile("application/x-tsx", "", ""),
//testFile("application/x-lcp", "", ""),
//testFile("application/x-mbtiles", "", ""),
//testFile("audio/x-oggpcm", "", ""),
//testFile("application/x-epsilon", "", ""),
//testFile("application/x-msgn", "", ""),
//testFile(MIMETYPE_TEXT_CSV, "csv", ""),
//testFile("image/x-dimap", "", ""),
//testFile("image/vnd.microsoft.icon", "", ""),
//testFile("application/x-envi", "", ""),
//testFile("application/x-dwg", "", ""),
TestFileInfo.testFile(MIMETYPE_IWORK_NUMBERS, "numbers", "quick.numbers"),
//testFile("application/vnd.ms-word2006ml", "", ""),
//testFile("application/x-bt", "", ""),
//testFile("application/x-font-adobe-metric", "", ""),
//testFile("application/x-rst", "", ""),
//testFile("application/vrt", "", ""),
//testFile("application/x-ctg", "", ""),
//testFile("application/x-e00-grid", "", ""),
//testFile("audio/x-ogg-flac", "", ""),
//testFile("application/x-compress", "z", ""),
//testFile("image/x-psd", "", ""),
//testFile("text/rss", "", ""),
//testFile("application/sdts-raster", "", ""),
//testFile("application/oxps", "", ""),
//testFile("application/leveller", "", ""),
//testFile("application/x-ingr", "", ""),
//testFile("image/sgi", "", ""),
//testFile("application/x-pnm", "", ""),
//testFile("image/raster", "", ""),
//testFile("audio/x-ogg-pcm", "", ""),
//testFile("audio/ogg; codecs=opus", "", ""),
//testFile("application/fits", "", ""),
//testFile("application/x-r", "", ""),
TestFileInfo.testFile(MIMETYPE_IMAGE_GIF, "gif", "quick.gif"),
//testFile("application/java-vm", "", ""),
//testFile("application/mspowerpoint", "", ""),
//testFile("application/x-http", "", ""),
//testFile("application/x-rmf", "", ""),
//testFile("application/x-ogg", "", ""),
//testFile("video/ogg", "ogv", "quick.ogv"),
//testFile(MIMETYPE_APPLEFILE, "", ""),
//testFile("text/rtf", "", ""),
//testFile("image/adrg", "", ""),
//testFile("video/x-ogg-rgb", "", ""),
//testFile("application/x-ngs-geoid", "", ""),
//testFile("application/x-map", "", ""),
//testFile("image/ceos", "", ""),
//testFile("application/xpm", "", ""),
//testFile("application/x-ers", "", ""),
//testFile("video/x-ogg-yuv", "", ""),
//testFile("application/x-isis2", "", ""),
//testFile("application/x-nwt-grd", "", ""),
//testFile("application/x-isis3", "", ""),
//testFile("application/x-nwt-grc", "", ""),
//testFile("video/daala", "", ""),
//testFile("application/x-blx", "", ""),
//testFile("application/x-tnef", "", ""),
//testFile("video/x-dirac", "", ""),
//testFile("application/x-ndf", "", ""),
//testFile("image/vnd.wap.wbmp", "", ""),
//testFile("video/theora", "", ""),
//testFile("application/kate", "", ""),
//testFile("application/pkcs7-mime", "", ""),
//testFile("image/fit", "", ""),
//testFile("application/x-ctable2", "", ""),
//testFile("application/x-executable", "", ""),
//testFile("application/x-isatab", "", ""),
//testFile("application/grass-ascii-grid", "", ""),
TestFileInfo.testFile(MIMETYPE_TEXT_PLAIN, "txt", "quick.txt"),
//testFile("application/gzipped", "", ""),
//testFile("application/x-gxf", "", ""),
//testFile("application/x-cpg", "", ""),
//testFile("application/x-lan", "", ""),
//testFile("application/x-xyz", "", ""),
TestFileInfo.testFile(MIMETYPE_IWORK_PAGES, "pages", "quick.pages"),
//testFile("image/x-jbig2", "", ""),
//testFile("image/nitf", "", ""),
//testFile("application/mbox", "", ""),
//testFile("application/chm", "", ""),
//testFile("application/x-fast", "", ""),
//testFile("application/x-gsc", "", ""),
//testFile("application/x-deflate", "", ""),
//testFile("application/x-grib2", "", ""),
//testFile("image/x-ozi", "", ""),
//testFile("application/x-pds", "", ""),
//testFile("application/vnd.apple.iwork", "", ""),
//testFile("application/x-usgs-dem", "", ""),
//testFile("application/vnd.ms-excel.sheet.2", "", ""),
//testFile("application/vnd.ms-excel.sheet.3", "", ""),
//testFile("application/dif+xml", "", ""),
//testFile("application/vnd.ms-excel.sheet.4", "", ""),
//testFile("application/x-java", "", ""),
//testFile("image/geotiff", "", ""),
//testFile("application/x-gsag", "", ""),
//testFile("application/x-snappy", "", ""),
//testFile("video/x-theora", "", ""),
//testFile("image/ntf", "", ""),
//testFile("application/x-pdf", "", ""),
//testFile("application/xml", "", ""),
//testFile("application/vnd.wordperfect; version=6.x", "", ""),
//testFile("application/pkcs7-signature", "", ""),
//testFile("application/vnd.wordperfect; version=5.1", "", ""),
//testFile("application/vnd.wordperfect; version=5.0", "", ""),
//testFile("application/x-arj-compressed", "", ""),
//testFile("application/geotopic", "", ""),
//testFile("text/x-java-source", "java", ""),
//testFile("audio/basic", "au", ""),
//testFile("application/pcisdk", "", ""),
//testFile("application/x-rik", "", ""),
//testFile("audio/opus", "", ""),
//testFile(MIMETYPE_IMAGE_JP2, "jp2", ""),
//testFile("application/x-gtx", "", ""),
//testFile("application/x-object", "", ""),
//testFile("application/vnd.ms-wordml", "", ""),
//testFile("image/x-wmf", "", ""),
//testFile("application/x-rpf-toc", "", ""),
//testFile("application/x-srtmhgt", "", ""),
//testFile("application/x-generic-bin", "", ""),
//testFile("text/vnd.iptc.anpa", "", ""),
//testFile("application/x-msmetafile", "", ""),
//testFile("application/x-wms", "", ""),
//testFile("video/x-oggrgb", "", ""),
//testFile("image/xcf", "", ""),
//testFile("application/photoshop", "", ""),
//testFile("application/x-lz4", "", ""),
//testFile("application/x-7z-compressed", "", ""),
//testFile("application/gff", "", ""),
//testFile("video/x-oggyuv", "", ""),
//testFile("application/x-msdownload", "", ""),
//testFile("image/icns", "", ""),
//testFile("application/x-emf", "", ""),
//testFile("application/x-geo-pdf", "", ""),
//testFile("video/x-ogg-uvs", "", ""),
TestFileInfo.testFile(MIMETYPE_VIDEO_FLV, "flv", "quick.flv"),
//testFile("application/x-zip-compressed", "", ""),
//testFile("application/gzip", "", ""),
//testFile("application/x-tika-unix-dump", "", ""),
//testFile("application/x-coasp", "", ""),
//testFile("application/x-dipex", "", ""),
//testFile("application/x-til", "", ""),
//testFile("application/x-gzip", "gzip", ""),
//testFile("application/x-gs7bg", "", ""),
//testFile("application/x-unix-archive", "", ""),
//testFile("application/x-elf", "", ""),
//testFile("application/dted", "", ""),
//testFile("application/x-rasterlite", "", ""),
//testFile("audio/x-mp4a", "", ""),
//testFile("application/x-gzip-compressed", "", ""),
//testFile("application/x-chm", "", ""),
//testFile("image/hfa", "", ""),
// Special test cases from the repo tests
// ======================================
// Test for MNT-577: Alfresco is running 100% CPU for over 10 minutes while extracting metadata for
// Word office document
// testFile(MIMETYPE_OPENXML_WORDPROCESSING, "docx", "problemFootnotes2.docx")
// Test MNT-15219 Excel (.xlsx) containing xmls (shapes/drawings) with multi byte characters may
// cause OutOfMemory in Tika Note - doesn't use extractFromMimetype
TestFileInfo.testFile(MIMETYPE_OPENXML_SPREADSHEET, "xlsx", "dmsu1332-reproduced.xlsx")
);
}
@ParameterizedTest
@MethodSource("tika2_2_1_upgradeFailures")
public void testTika_2_2_1_upgradeFailures(TestFileInfo testFileInfo)
{
super.testTransformation(testFileInfo);
}
private static Stream<TestFileInfo> tika2_2_1_upgradeFailures()
{
// When we upgraded to Tika 2.2.1 from 2.2.0:
// - the original OfficeOpenXMLCore.SUBJECT raw metadata value started being null.
// - the replacement TikaCoreProperties.SUBJECT raw metadata changed into a multi value
// The following test files were the ones that failed.
return Stream.of(
TestFileInfo.testFile(MIMETYPE_OPENDOCUMENT_GRAPHICS_TEMPLATE, "otg", "quick.otg"),
TestFileInfo.testFile(MIMETYPE_OPENOFFICE1_WRITER, "sxw", "quick.sxw"),
TestFileInfo.testFile(MIMETYPE_OPENDOCUMENT_GRAPHICS, "odg", "quick.odg"),
TestFileInfo.testFile(MIMETYPE_OPENDOCUMENT_TEXT, "odt", "quick.odt"),
TestFileInfo.testFile(MIMETYPE_OPENDOCUMENT_TEXT_TEMPLATE, "ott", "quick.ott"),
TestFileInfo.testFile(MIMETYPE_OPENDOCUMENT_FORMULA, "odf", "quick.odf"),
TestFileInfo.testFile(MIMETYPE_PDF, "pdf", "quick.pdf")
);
}
}

View File

@@ -0,0 +1,60 @@
/*
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2022 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
* -
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* -
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
* -
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transform.tika;
import static org.alfresco.transform.common.Mimetype.MIMETYPE_OPENXML_WORDPROCESSING;
import static org.alfresco.transform.common.Mimetype.MIMETYPE_TEXT_PLAIN;
import java.util.UUID;
import org.alfresco.transform.client.model.TransformRequest;
import org.alfresco.transform.base.AbstractQueueTransformServiceIT;
import org.springframework.boot.test.context.SpringBootTest;
/**
* @author Lucian Tuca
* created on 15/01/2019
*/
@SpringBootTest(webEnvironment = SpringBootTest.WebEnvironment.RANDOM_PORT,
properties = {"activemq.url=nio://localhost:61616"})
public class TikaQueueTransformServiceIT extends AbstractQueueTransformServiceIT
{
@Override
protected TransformRequest buildRequest()
{
return TransformRequest
.builder()
.withRequestId(UUID.randomUUID().toString())
.withSourceMediaType(MIMETYPE_OPENXML_WORDPROCESSING)
.withTargetMediaType(MIMETYPE_TEXT_PLAIN)
.withTargetExtension("txt")
.withSchema(1)
.withClientData("ACS")
.withSourceReference(UUID.randomUUID().toString())
.withSourceSize(32L).build();
}
}

View File

@@ -0,0 +1,177 @@
/*
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2021 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
* -
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* -
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
* -
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transform.tika;
import static java.text.MessageFormat.format;
import static java.util.function.Function.identity;
import static org.alfresco.transform.base.EngineClient.sendTRequest;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.fail;
import static org.springframework.http.HttpStatus.OK;
import java.util.Map;
import java.util.stream.Stream;
import com.google.common.collect.ImmutableMap;
import org.alfresco.transform.base.EngineClient;
import org.apache.commons.lang3.tuple.Triple;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.MethodSource;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.core.io.Resource;
import org.springframework.http.ResponseEntity;
/**
* @author Cezar Leahu
*/
public class TikaTransformationIT
{
private static final Logger logger = LoggerFactory.getLogger(TikaTransformationIT.class);
private static final String ENGINE_URL = "http://localhost:8090";
private static final Map<String, String> extensionMimetype = ImmutableMap.of(
"html", "text/html",
"txt", "text/plain",
"xhtml", "application/xhtml+xml",
"xml", "text/xml");
@ParameterizedTest
@MethodSource("engineTransformations")
public void testTransformation(Triple<String, String, String> entry)
{
final String sourceFile = entry.getLeft();
final String sourceMimetype = entry.getRight();
final String targetExtension = entry.getMiddle();
String targetMimetype;
//Single test to cover pdf-->csv
if (sourceFile.contains("pdf") && targetExtension.contains("csv"))
{
targetMimetype = "text/csv";
}
else
{
targetMimetype = extensionMimetype.get(entry.getMiddle());
}
final String descriptor = format("Transform ({0}, {1} -> {2}, {3})",
sourceFile, sourceMimetype, targetMimetype, targetExtension);
try
{
final ResponseEntity<Resource> response = EngineClient.sendTRequest(ENGINE_URL, sourceFile, null,
targetMimetype, targetExtension, ImmutableMap.of(
"targetEncoding", "UTF-8",
"sourceMimetype", sourceMimetype));
assertEquals(OK, response.getStatusCode(), descriptor);
}
catch (Exception e)
{
fail(descriptor + " exception: " + e.getMessage());
}
}
private static Stream<Triple<String, String, String>> allTargets(final String sourceFile,
final String sourceMimetype)
{
return extensionMimetype
.keySet()
.stream()
.map(k -> Triple.of(sourceFile, k, sourceMimetype));
}
// TODO unit tests for the following file types (for which is difficult to find file samples):
// *.ogx (application/ogg)
// *.cpio (application/x-cpio)
// *.cdf (application/x-netcdf)
// *.hdf (application/x-hdf)
public static Stream<Triple<String, String, String>> engineTransformations()
{
return Stream
.of(
allTargets("quick.doc", "application/msword"),
allTargets("quick.docx",
"application/vnd.openxmlformats-officedocument.wordprocessingml.document"),
allTargets("quick.html", "text/html"),
allTargets("quick.jar", "application/java-archive"),
allTargets("quick.java", "text/x-java-source"),
Stream.of(
Triple.of("quick.key", "html", "application/vnd.apple.keynote"),
// Does not work, alfresco-docker-sourceMimetype-misc can handle this target mimetype, removed from engine_config.json
Triple.of("quick.key", "txt", "application/vnd.apple.keynote"),
Triple.of("quick.key", "xhtml", "application/vnd.apple.keynote"),
Triple.of("quick.key", "xml", "application/vnd.apple.keynote")
),
allTargets("quick.msg", "application/vnd.ms-outlook"),
Stream.of(
Triple.of("quick.numbers", "html", "application/vnd.apple.numbers"),
// Does not work, alfresco-docker-sourceMimetype-misc can handle this target mimetype, removed from engine_config.json
// Triple.of("quick.numbers", "txt", "TikaAuto"),
Triple.of("quick.numbers", "xhtml", "application/vnd.apple.numbers"),
Triple.of("quick.numbers", "xml", "application/vnd.apple.numbers")
),
Stream.of(
Triple.of("quick.pdf", "csv", "application/pdf")
),
allTargets("quick.odp", "application/vnd.oasis.opendocument.presentation"),
allTargets("quick.ods", "application/vnd.oasis.opendocument.spreadsheet"),
allTargets("quick.odt", "application/vnd.oasis.opendocument.text"),
allTargets("quick.otp", "application/vnd.oasis.opendocument.presentation-template"),
allTargets("quick.ots", "application/vnd.oasis.opendocument.spreadsheet-template"),
allTargets("quick.ott", "application/vnd.oasis.opendocument.text-template"),
Stream.of(
Triple.of("quick.pages", "html", "application/vnd.apple.pages"),
// Does not work, alfresco-docker-sourceMimetype-misc can handle this target mimetype, removed from engine_config.json
// Triple.of("quick.pages", "txt", "TikaAuto"),
Triple.of("quick.pages", "xhtml", "application/vnd.apple.pages"),
Triple.of("quick.pages", "xml", "application/vnd.apple.pages")
),
allTargets("quick.pdf", "application/pdf"),
allTargets("quick.ppt", "application/vnd.ms-powerpoint"),
allTargets("quick.pptx",
"application/vnd.openxmlformats-officedocument.presentationml.presentation"),
allTargets("quick.sxw", "application/vnd.sun.xml.writer"),
allTargets("quick.txt", "text/plain"),
allTargets("quick.vsd", "application/vnd.visio"),
allTargets("quick.xls", "application/vnd.ms-excel"),
allTargets("quick.xslx",
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"),
allTargets("quick.zip", "application/zip"),
allTargets("quick.tar", "application/x-tar"),
allTargets("sample.rtf", "application/rtf"),
allTargets("quick.xml", "text/xml"),
allTargets("sample.xhtml.txt", "application/xhtml+xml"),
allTargets("sample.rss", "application/rss+xml"),
//allTargets("quick.rar", "application/x-rar-compressed"),
allTargets("quick.z", "application/x-compress"),
allTargets("quick.csv", "text/csv"),
allTargets("quick.tar.gz", "application/x-gzip"))
.flatMap(identity());
}
}

View File

@@ -0,0 +1,46 @@
/*
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2022 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
* -
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* -
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
* -
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transform.tika.metadataExtractors;
import static org.junit.jupiter.api.Assertions.assertArrayEquals;
import org.junit.jupiter.api.Test;
public class IPTCMetadataExtractorTest
{
IPTCMetadataExtractor extractor = new IPTCMetadataExtractor();
@Test
public void testIptcToIso8601DateStrings() {
String[] testStrings = { "1890:01:01", "1901:02:01 00:00:00.000Z", "1901-02-01 00:00:00.000Z",
"1901-02-01T00:00:00.000Z", "1901:02:01T00:00+00:00", "1901:02:01 00:00+00:00" };
String[] expected = { "1890-01-01", "1901-02-01T00:00:00.000Z", "1901-02-01T00:00:00.000Z",
"1901-02-01T00:00:00.000Z", "1901-02-01T00:00+00:00", "1901-02-01T00:00+00:00" };
assertArrayEquals(expected, extractor.iptcToIso8601DateStrings(testStrings));
}
}

View File

@@ -0,0 +1,59 @@
/*
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2022 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
* -
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* -
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
* -
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transform.tika.parsers;
import static org.junit.jupiter.api.Assertions.assertEquals;
import org.junit.jupiter.api.Test;
public class ExifToolParserTest {
ExifToolParser exifToolParser = new ExifToolParser();
@Test
public void testFindSeparator() {
String testCommand = "env FOO=${OUTPUT} exiftool -args -G1 " + ExifToolParser.SEPARATOR_SETTING
+ " \"|||\" ${INPUT}";
String expected = "|||";
String actual = exifToolParser.findSeparator(testCommand);
assertEquals(expected, actual);
expected = "TESTWITHOUTQUOTES";
testCommand = "nothing matters until the " + ExifToolParser.SEPARATOR_SETTING + " " + expected
+ " now all this extra should be ignored";
actual = exifToolParser.findSeparator(testCommand);
assertEquals(expected, actual);
expected = "Test something bonkers 112!£$%^£$^";
testCommand = ExifToolParser.SEPARATOR_SETTING + " \""+expected+"\"";
actual = exifToolParser.findSeparator(testCommand);
assertEquals(expected, actual);
}
}

View File

@@ -0,0 +1,140 @@
/*
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2022 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
* -
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* -
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
* -
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transform.tika.transformers;
import static org.alfresco.transform.tika.transformers.Tika.NOT_EXTRACT_BOOKMARKS_TEXT;
import static org.alfresco.transform.tika.transformers.Tika.TARGET_ENCODING;
import static org.alfresco.transform.tika.transformers.Tika.TARGET_MIMETYPE;
import static org.mockito.ArgumentMatchers.any;
import static org.mockito.Mockito.clearInvocations;
import static org.mockito.Mockito.lenient;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.spy;
import static org.mockito.Mockito.times;
import static org.mockito.Mockito.verify;
import java.io.File;
import java.util.HashMap;
import java.util.Map;
import org.apache.tika.parser.Parser;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.extension.ExtendWith;
import org.mockito.junit.jupiter.MockitoExtension;
@ExtendWith(MockitoExtension.class)
public class GenericTikaTransformerTest
{
private class TikaTestTransformer extends GenericTikaTransformer
{
@Override
protected Parser getParser()
{
return null;
}
TikaTestTransformer(boolean notExtractBookmarksTextDefault)
{
this.notExtractBookmarksTextDefault = notExtractBookmarksTextDefault;
}
};
@Test
public void testNotExtractBookmarkTextDefault() throws Exception
{
GenericTikaTransformer executorSpyDefaultTrue = spy(new TikaTestTransformer(true));
GenericTikaTransformer executorSpyDefaultFalse = spy(new TikaTestTransformer(false));
File mockSourceFile = mock(File.class);
File mockTargetFile = mock(File.class);
String transformName = "transformName";
String sourceMimetype = "sourceMimetype";
String targetMimetype = "targetMimetype";
String defaultEncoding = "UTF-8";
// no need to continue execution passed here or check values as we're checking the correct params passed to this method later.
lenient().doNothing().when(executorSpyDefaultTrue).call(any(), any(), any(), any(), any(), any(), any());
lenient().doNothing().when(executorSpyDefaultFalse).call(any(), any(), any(), any(), any(), any(), any());
Map<String, String> transformOptions = new HashMap<>();
// use empty transformOptions to test defaults
executorSpyDefaultTrue.transform(transformName, sourceMimetype, targetMimetype, transformOptions,
mockSourceFile, mockTargetFile);
executorSpyDefaultFalse.transform(transformName, sourceMimetype, targetMimetype, transformOptions,
mockSourceFile, mockTargetFile);
// when default set to true, with no options passed we should get a call method with NOT_EXTRACT_BOOKMARKS_TEXT
verify(executorSpyDefaultTrue, times(1)).call(mockSourceFile, mockTargetFile, transformName, null,
NOT_EXTRACT_BOOKMARKS_TEXT, TARGET_MIMETYPE + targetMimetype, TARGET_ENCODING + defaultEncoding);
// when default set to false, with no options passed we should get a call method without NOT_EXTRACT_BOOKMARKS_TEXT
verify(executorSpyDefaultFalse, times(1)).call(mockSourceFile, mockTargetFile, transformName, null, null,
TARGET_MIMETYPE + targetMimetype, TARGET_ENCODING + defaultEncoding);
// use transforms with notExtractBookmarksText set to true
clearInvocations(executorSpyDefaultTrue, executorSpyDefaultFalse);
transformOptions.put("notExtractBookmarksText", "true");
executorSpyDefaultTrue.transform(transformName, sourceMimetype, targetMimetype, transformOptions,
mockSourceFile, mockTargetFile);
executorSpyDefaultFalse.transform(transformName, sourceMimetype, targetMimetype, transformOptions,
mockSourceFile, mockTargetFile);
// both call methods should have NOT_EXTRACT_BOOKMARKS_TEXT
verify(executorSpyDefaultTrue, times(1)).call(mockSourceFile, mockTargetFile, transformName, null,
NOT_EXTRACT_BOOKMARKS_TEXT, TARGET_MIMETYPE + targetMimetype, TARGET_ENCODING + defaultEncoding);
verify(executorSpyDefaultFalse, times(1)).call(mockSourceFile, mockTargetFile, transformName, null,
NOT_EXTRACT_BOOKMARKS_TEXT, TARGET_MIMETYPE + targetMimetype, TARGET_ENCODING + defaultEncoding);
// use transforms with notExtractBookmarksText set to false
clearInvocations(executorSpyDefaultTrue, executorSpyDefaultFalse);
transformOptions.replace("notExtractBookmarksText", "true", "false");
executorSpyDefaultTrue.transform(transformName, sourceMimetype, targetMimetype, transformOptions, mockSourceFile, mockTargetFile);
executorSpyDefaultFalse.transform(transformName, sourceMimetype, targetMimetype, transformOptions, mockSourceFile, mockTargetFile);
// both call methods should have NOT_EXTRACT_BOOKMARKS_TEXT
verify(executorSpyDefaultTrue, times(1)).call(mockSourceFile, mockTargetFile, transformName, null, null,
TARGET_MIMETYPE + targetMimetype, TARGET_ENCODING + defaultEncoding);
verify(executorSpyDefaultFalse, times(1)).call(mockSourceFile, mockTargetFile, transformName, null, null,
TARGET_MIMETYPE + targetMimetype, TARGET_ENCODING + defaultEncoding);
// use full set of pdfbox transformOptions just to be safe
clearInvocations(executorSpyDefaultTrue, executorSpyDefaultFalse);
transformOptions.put("targetEncoding", "anyEncoding");
executorSpyDefaultTrue.transform(transformName, sourceMimetype, targetMimetype, transformOptions, mockSourceFile, mockTargetFile);
executorSpyDefaultFalse.transform(transformName, sourceMimetype, targetMimetype, transformOptions, mockSourceFile, mockTargetFile);
// both call methods should have NOT_EXTRACT_BOOKMARKS_TEXT but the encoding will change
verify(executorSpyDefaultTrue, times(1)).call(mockSourceFile, mockTargetFile, transformName, null, null,
TARGET_MIMETYPE + targetMimetype, TARGET_ENCODING + "anyEncoding");
verify(executorSpyDefaultFalse, times(1)).call(mockSourceFile, mockTargetFile, transformName, null, null,
TARGET_MIMETYPE + targetMimetype, TARGET_ENCODING + "anyEncoding");
}
}

View File

@@ -0,0 +1,92 @@
{
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}LocationCreatedSublocation" : "Kidlington",
"{http://iptc.org/std/Iptc4xmpCore/1.0/xmlns/}CiAdrCity" : "Oxford",
"{http://ns.useplus.org/ldf/xmp/1.0/}CopyrightOwnerName" : "Ayman",
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}DigitalSourceType" : "ARW",
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}IptcLastEdited" : "2021:05:27",
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}LocationCreatedCountryCode" : "+44",
"{http://iptc.org/std/Iptc4xmpCore/1.0/xmlns/}CiAdrCtry" : "England",
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}ModelAge" : [ "25", "153", "3" ],
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}LocationCreatedProvinceState" : "New York",
"{http://ns.useplus.org/ldf/xmp/1.0/}LicensorStreetAddress" : "63 Windsor Road",
"{http://www.alfresco.org/model/content/1.0}title" : null,
"{http://ns.adobe.com/photoshop/1.0/}TransmissionReference" : "The quick brown fox jumps over the lazy dog",
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}LocationShownSublocation" : "Kidlington",
"{http://purl.org/dc/elements/1.1/}description" : "The quick brown fox jumps over the lazy dog",
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}AOSource" : "Fox sources",
"{http://ns.adobe.com/photoshop/1.0/}State" : "New York",
"{http://ns.useplus.org/ldf/xmp/1.0/}LicensorRegion" : "Maidenhead",
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}RegOrgId" : "id48485",
"{http://iptc.org/std/Iptc4xmpCore/1.0/xmlns/}CiTelWork" : "01865 513465",
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}DigitalSourcefileType" : "ARW",
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}AOCopyrightNotice" : "The quick brown fox jumps over the lazy dog",
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}AOSourceInvNo" : "4845484",
"{http://ns.useplus.org/ldf/xmp/1.0/}ImageCreatorName" : "Ayman",
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}CVterm" : "The quick brown fox jumps over the lazy dog",
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}LocationCreatedCountryName" : "England",
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}LocationShownCity" : "Oxford",
"{http://ns.useplus.org/ldf/xmp/1.0/}CopyrightOwnerID" : "id54154",
"{http://ns.adobe.com/photoshop/1.0/}Category" : "Comedy",
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}OrganisationInImageName" : "Alfresco",
"{http://ns.useplus.org/ldf/xmp/1.0/}ImageSupplierName" : "Alfresco",
"{http://ns.adobe.com/photoshop/1.0/}SupplementalCategories" : "The quick brown fox jumps over the lazy dog",
"{http://iptc.org/std/Iptc4xmpCore/1.0/xmlns/}Scene" : "The quick brown fox jumps over the lazy dog",
"{http://iptc.org/std/Iptc4xmpCore/1.0/xmlns/}CiAdrExtadr" : "Kidlington",
"{http://ns.adobe.com/photoshop/1.0/}Headline" : "The quick brown fox jumps over the lazy dog",
"{http://iptc.org/std/Iptc4xmpCore/1.0/xmlns/}SubjectCode" : "fodijfsij5454",
"{http://ns.adobe.com/photoshop/1.0/}City" : "Oxford",
"{http://ns.useplus.org/ldf/xmp/1.0/}LicensorEmail" : "DiegoS@alfrescodev.onmicrosoft.com",
"{http://ns.useplus.org/ldf/xmp/1.0/}Version" : "1.1",
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}AddlModelInfo" : "The quick brown fox jumps over the lazy dog",
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}LocationShownWorldRegion" : "Soth West",
"{http://ns.useplus.org/ldf/xmp/1.0/}ImageCreatorID" : "id548454",
"{http://ns.useplus.org/ldf/xmp/1.0/}ModelReleaseID" : "id4845151",
"{http://purl.org/dc/elements/1.1/}subject" : "-fox -dog -lazy -jumping",
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}DigImageGUID" : "49848484",
"{http://ns.adobe.com/photoshop/1.0/}CaptionWriter" : "Ayman",
"{http://ns.useplus.org/ldf/xmp/1.0/}LicensorExtendedAddress" : "DiegoS@alfrescodev.onmicrosoft.com",
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}AOTitle" : "The quick brown fox jumps over the lazy dog",
"{http://ns.adobe.com/photoshop/1.0/}DateCreated" : "2021-05-27",
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}LocationCreatedCity" : "Oxford",
"{http://ns.useplus.org/ldf/xmp/1.0/}ImageSupplierImageID" : "id8454841",
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}LocationCreatedWorldRegion" : "South West",
"{http://ns.useplus.org/ldf/xmp/1.0/}LicensorTelephone1" : "01865 465986",
"{http://ns.adobe.com/photoshop/1.0/}Source" : "Fox sources",
"{http://ns.useplus.org/ldf/xmp/1.0/}ImageSupplierID" : "id659568",
"{http://ns.useplus.org/ldf/xmp/1.0/}LicensorTelephone2" : "01865 465986",
"{http://iptc.org/std/Iptc4xmpCore/1.0/xmlns/}CiAdrRegion" : "Oxon",
"{http://iptc.org/std/Iptc4xmpCore/1.0/xmlns/}CiAdrPcode" : "OX132EN",
"{http://ns.useplus.org/ldf/xmp/1.0/}LicensorPostalCode" : "OX136XN",
"{http://ns.adobe.com/photoshop/1.0/}Urgency " : "5 (normal urgency)",
"{http://ns.useplus.org/ldf/xmp/1.0/}LicensorID" : "id87515454",
"{http://iptc.org/std/Iptc4xmpCore/1.0/xmlns/}CiEmailWork" : "DiegoS@alfrescodev.onmicrosoft.com",
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}LocationShownCountryName" : "England",
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}OrganisationInImageCode" : "546851381",
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}AODateCreated" : "2021-05-27",
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}PersonInImage" : "Ayman",
"{http://ns.useplus.org/ldf/xmp/1.0/}LicensorCity" : "Maidenhead",
"{http://purl.org/dc/elements/1.1/}creator" : "Ayman",
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}LocationShownProvinceState" : "New York",
"{http://iptc.org/std/Iptc4xmpCore/1.0/xmlns/}CountryCode" : "+44",
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}Event" : "The quick brown fox jumps over the lazy dog",
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}RegItemId" : "id48485",
"{http://ns.useplus.org/ldf/xmp/1.0/}LicensorCountry" : "England",
"{http://ns.adobe.com/photoshop/1.0/}AuthorsPosition" : "Left Center",
"{http://ns.adobe.com/photoshop/1.0/}Credit" : "Visa",
"{http://iptc.org/std/Iptc4xmpCore/1.0/xmlns/}IntellectualGenre" : "Comedy",
"{http://purl.org/dc/elements/1.1/}rights" : "Fox rights",
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}MaxAvailWidth" : "300",
"{http://purl.org/dc/elements/1.1/}title" : "The quick brown fox jumps over the lazy dog",
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}LocationShownCountryCode" : "+44",
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}MaxAvailHeight" : "400",
"{http://www.alfresco.org/model/content/1.0}author" : null,
"{http://ns.adobe.com/xap/1.0/rights/}UsageTerms" : "The quick brown fox jumps over the lazy dog",
"{http://iptc.org/std/Iptc4xmpCore/1.0/xmlns/}Location" : "Oxford",
"{http://ns.useplus.org/ldf/xmp/1.0/}PropertyReleaseID" : "id841584",
"{http://ns.adobe.com/photoshop/1.0/}Country" : "England",
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}AOCreator" : "Ayman",
"{http://iptc.org/std/Iptc4xmpCore/1.0/xmlns/}CiUrlWork" : "01865 513465",
"{http://ns.adobe.com/photoshop/1.0/}Instructions" : "The quick brown fox jumps over the lazy dog",
"{http://ns.useplus.org/ldf/xmp/1.0/}LicensorName" : "Alfresco",
"{http://ns.useplus.org/ldf/xmp/1.0/}LicensorURL" : "www.alfresco.com"
}

View File

@@ -0,0 +1,92 @@
{
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}LocationCreatedSublocation" : "Kidlington",
"{http://iptc.org/std/Iptc4xmpCore/1.0/xmlns/}CiAdrCity" : "Oxford",
"{http://ns.useplus.org/ldf/xmp/1.0/}CopyrightOwnerName" : "Ayman",
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}DigitalSourceType" : "RW2",
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}IptcLastEdited" : "2021:05:27",
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}LocationCreatedCountryCode" : "+44",
"{http://iptc.org/std/Iptc4xmpCore/1.0/xmlns/}CiAdrCtry" : "England",
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}ModelAge" : [ "25", "153", "3" ],
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}LocationCreatedProvinceState" : "New York",
"{http://ns.useplus.org/ldf/xmp/1.0/}LicensorStreetAddress" : "63 Windsor Road",
"{http://www.alfresco.org/model/content/1.0}title" : null,
"{http://ns.adobe.com/photoshop/1.0/}TransmissionReference" : "The quick brown fox jumps over the lazy dog",
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}LocationShownSublocation" : "Kidlington",
"{http://purl.org/dc/elements/1.1/}description" : "The quick brown fox jumps over the lazy dog",
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}AOSource" : "Fox sources",
"{http://ns.adobe.com/photoshop/1.0/}State" : "New York",
"{http://ns.useplus.org/ldf/xmp/1.0/}LicensorRegion" : "Maidenhead",
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}RegOrgId" : "id48485",
"{http://iptc.org/std/Iptc4xmpCore/1.0/xmlns/}CiTelWork" : "01865 513465",
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}DigitalSourcefileType" : "RW2",
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}AOCopyrightNotice" : "The quick brown fox jumps over the lazy dog",
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}AOSourceInvNo" : "4845484",
"{http://ns.useplus.org/ldf/xmp/1.0/}ImageCreatorName" : "Ayman",
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}CVterm" : "The quick brown fox jumps over the lazy dog",
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}LocationCreatedCountryName" : "England",
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}LocationShownCity" : "Oxford",
"{http://ns.useplus.org/ldf/xmp/1.0/}CopyrightOwnerID" : "id54154",
"{http://ns.adobe.com/photoshop/1.0/}Category" : "Comedy",
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}OrganisationInImageName" : "Alfresco",
"{http://ns.useplus.org/ldf/xmp/1.0/}ImageSupplierName" : "Alfresco",
"{http://ns.adobe.com/photoshop/1.0/}SupplementalCategories" : "The quick brown fox jumps over the lazy dog",
"{http://iptc.org/std/Iptc4xmpCore/1.0/xmlns/}Scene" : "The quick brown fox jumps over the lazy dog",
"{http://iptc.org/std/Iptc4xmpCore/1.0/xmlns/}CiAdrExtadr" : "Kidlington",
"{http://ns.adobe.com/photoshop/1.0/}Headline" : "The quick brown fox jumps over the lazy dog",
"{http://iptc.org/std/Iptc4xmpCore/1.0/xmlns/}SubjectCode" : "fodijfsij5454",
"{http://ns.adobe.com/photoshop/1.0/}City" : "Oxford",
"{http://ns.useplus.org/ldf/xmp/1.0/}LicensorEmail" : "DiegoS@alfrescodev.onmicrosoft.com",
"{http://ns.useplus.org/ldf/xmp/1.0/}Version" : "1.1",
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}AddlModelInfo" : "The quick brown fox jumps over the lazy dog",
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}LocationShownWorldRegion" : "Soth West",
"{http://ns.useplus.org/ldf/xmp/1.0/}ImageCreatorID" : "id548454",
"{http://ns.useplus.org/ldf/xmp/1.0/}ModelReleaseID" : "id4845151",
"{http://purl.org/dc/elements/1.1/}subject" : "-fox -dog -lazy -jumping",
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}DigImageGUID" : "49848484",
"{http://ns.adobe.com/photoshop/1.0/}CaptionWriter" : "Ayman",
"{http://ns.useplus.org/ldf/xmp/1.0/}LicensorExtendedAddress" : "DiegoS@alfrescodev.onmicrosoft.com",
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}AOTitle" : "The quick brown fox jumps over the lazy dog",
"{http://ns.adobe.com/photoshop/1.0/}DateCreated" : "2021-05-27",
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}LocationCreatedCity" : "Oxford",
"{http://ns.useplus.org/ldf/xmp/1.0/}ImageSupplierImageID" : "id8454841",
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}LocationCreatedWorldRegion" : "South West",
"{http://ns.useplus.org/ldf/xmp/1.0/}LicensorTelephone1" : "01865 465986",
"{http://ns.adobe.com/photoshop/1.0/}Source" : "Fox sources",
"{http://ns.useplus.org/ldf/xmp/1.0/}ImageSupplierID" : "id659568",
"{http://ns.useplus.org/ldf/xmp/1.0/}LicensorTelephone2" : "01865 465986",
"{http://iptc.org/std/Iptc4xmpCore/1.0/xmlns/}CiAdrRegion" : "Oxon",
"{http://iptc.org/std/Iptc4xmpCore/1.0/xmlns/}CiAdrPcode" : "OX132EN",
"{http://ns.useplus.org/ldf/xmp/1.0/}LicensorPostalCode" : "OX136XN",
"{http://ns.adobe.com/photoshop/1.0/}Urgency " : "5 (normal urgency)",
"{http://ns.useplus.org/ldf/xmp/1.0/}LicensorID" : "id87515454",
"{http://iptc.org/std/Iptc4xmpCore/1.0/xmlns/}CiEmailWork" : "DiegoS@alfrescodev.onmicrosoft.com",
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}LocationShownCountryName" : "England",
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}OrganisationInImageCode" : "546851381",
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}AODateCreated" : "2021-05-27",
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}PersonInImage" : "Ayman",
"{http://ns.useplus.org/ldf/xmp/1.0/}LicensorCity" : "Maidenhead",
"{http://purl.org/dc/elements/1.1/}creator" : "Ayman",
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}LocationShownProvinceState" : "New York",
"{http://iptc.org/std/Iptc4xmpCore/1.0/xmlns/}CountryCode" : "+44",
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}Event" : "The quick brown fox jumps over the lazy dog",
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}RegItemId" : "id48485",
"{http://ns.useplus.org/ldf/xmp/1.0/}LicensorCountry" : "England",
"{http://ns.adobe.com/photoshop/1.0/}AuthorsPosition" : "Left Center",
"{http://ns.adobe.com/photoshop/1.0/}Credit" : "Visa",
"{http://iptc.org/std/Iptc4xmpCore/1.0/xmlns/}IntellectualGenre" : "Comedy",
"{http://purl.org/dc/elements/1.1/}rights" : "Fox rights",
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}MaxAvailWidth" : "300",
"{http://purl.org/dc/elements/1.1/}title" : "The quick brown fox jumps over the lazy dog",
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}LocationShownCountryCode" : "+44",
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}MaxAvailHeight" : "400",
"{http://www.alfresco.org/model/content/1.0}author" : null,
"{http://ns.adobe.com/xap/1.0/rights/}UsageTerms" : "The quick brown fox jumps over the lazy dog",
"{http://iptc.org/std/Iptc4xmpCore/1.0/xmlns/}Location" : "Oxford",
"{http://ns.useplus.org/ldf/xmp/1.0/}PropertyReleaseID" : "id841584",
"{http://ns.adobe.com/photoshop/1.0/}Country" : "England",
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}AOCreator" : "Ayman",
"{http://iptc.org/std/Iptc4xmpCore/1.0/xmlns/}CiUrlWork" : "01865 513465",
"{http://ns.adobe.com/photoshop/1.0/}Instructions" : "The quick brown fox jumps over the lazy dog",
"{http://ns.useplus.org/ldf/xmp/1.0/}LicensorName" : "Alfresco",
"{http://ns.useplus.org/ldf/xmp/1.0/}LicensorURL" : "www.alfresco.com"
}

View File

@@ -0,0 +1,92 @@
{
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}LocationCreatedSublocation" : "Kidlington",
"{http://iptc.org/std/Iptc4xmpCore/1.0/xmlns/}CiAdrCity" : "Oxford",
"{http://ns.useplus.org/ldf/xmp/1.0/}CopyrightOwnerName" : "Ayman",
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}DigitalSourceType" : "CR2",
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}IptcLastEdited" : "2021:05:27",
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}LocationCreatedCountryCode" : "+44",
"{http://iptc.org/std/Iptc4xmpCore/1.0/xmlns/}CiAdrCtry" : "England",
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}ModelAge" : [ "25", "153", "3" ],
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}LocationCreatedProvinceState" : "New York",
"{http://ns.useplus.org/ldf/xmp/1.0/}LicensorStreetAddress" : "63 Windsor Road",
"{http://www.alfresco.org/model/content/1.0}title" : null,
"{http://ns.adobe.com/photoshop/1.0/}TransmissionReference" : "The quick brown fox jumps over the lazy dog",
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}LocationShownSublocation" : "Kidlington",
"{http://purl.org/dc/elements/1.1/}description" : "The quick brown fox jumps over the lazy dog",
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}AOSource" : "Fox sources",
"{http://ns.adobe.com/photoshop/1.0/}State" : "New York",
"{http://ns.useplus.org/ldf/xmp/1.0/}LicensorRegion" : "Maidenhead",
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}RegOrgId" : "id48485",
"{http://iptc.org/std/Iptc4xmpCore/1.0/xmlns/}CiTelWork" : "01865 513465",
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}DigitalSourcefileType" : "CR2",
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}AOCopyrightNotice" : "The quick brown fox jumps over the lazy dog",
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}AOSourceInvNo" : "4845484",
"{http://ns.useplus.org/ldf/xmp/1.0/}ImageCreatorName" : "Ayman",
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}CVterm" : "The quick brown fox jumps over the lazy dog",
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}LocationCreatedCountryName" : "England",
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}LocationShownCity" : "Oxford",
"{http://ns.useplus.org/ldf/xmp/1.0/}CopyrightOwnerID" : "id54154",
"{http://ns.adobe.com/photoshop/1.0/}Category" : "Comedy",
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}OrganisationInImageName" : "Alfresco",
"{http://ns.useplus.org/ldf/xmp/1.0/}ImageSupplierName" : "Alfresco",
"{http://ns.adobe.com/photoshop/1.0/}SupplementalCategories" : "The quick brown fox jumps over the lazy dog",
"{http://iptc.org/std/Iptc4xmpCore/1.0/xmlns/}Scene" : "The quick brown fox jumps over the lazy dog",
"{http://iptc.org/std/Iptc4xmpCore/1.0/xmlns/}CiAdrExtadr" : "Kidlington",
"{http://ns.adobe.com/photoshop/1.0/}Headline" : "The quick brown fox jumps over the lazy dog",
"{http://iptc.org/std/Iptc4xmpCore/1.0/xmlns/}SubjectCode" : "fodijfsij5454",
"{http://ns.adobe.com/photoshop/1.0/}City" : "Oxford",
"{http://ns.useplus.org/ldf/xmp/1.0/}LicensorEmail" : "DiegoS@alfrescodev.onmicrosoft.com",
"{http://ns.useplus.org/ldf/xmp/1.0/}Version" : "1.1",
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}AddlModelInfo" : "The quick brown fox jumps over the lazy dog",
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}LocationShownWorldRegion" : "Soth West",
"{http://ns.useplus.org/ldf/xmp/1.0/}ImageCreatorID" : "id548454",
"{http://ns.useplus.org/ldf/xmp/1.0/}ModelReleaseID" : "id4845151",
"{http://purl.org/dc/elements/1.1/}subject" : "-fox -dog -lazy -jumping",
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}DigImageGUID" : "49848484",
"{http://ns.adobe.com/photoshop/1.0/}CaptionWriter" : "Ayman",
"{http://ns.useplus.org/ldf/xmp/1.0/}LicensorExtendedAddress" : "DiegoS@alfrescodev.onmicrosoft.com",
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}AOTitle" : "The quick brown fox jumps over the lazy dog",
"{http://ns.adobe.com/photoshop/1.0/}DateCreated" : "2021-05-27",
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}LocationCreatedCity" : "Oxford",
"{http://ns.useplus.org/ldf/xmp/1.0/}ImageSupplierImageID" : "id8454841",
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}LocationCreatedWorldRegion" : "South West",
"{http://ns.useplus.org/ldf/xmp/1.0/}LicensorTelephone1" : "01865 465986",
"{http://ns.adobe.com/photoshop/1.0/}Source" : "Fox sources",
"{http://ns.useplus.org/ldf/xmp/1.0/}ImageSupplierID" : "id659568",
"{http://ns.useplus.org/ldf/xmp/1.0/}LicensorTelephone2" : "01865 465986",
"{http://iptc.org/std/Iptc4xmpCore/1.0/xmlns/}CiAdrRegion" : "Oxon",
"{http://iptc.org/std/Iptc4xmpCore/1.0/xmlns/}CiAdrPcode" : "OX132EN",
"{http://ns.useplus.org/ldf/xmp/1.0/}LicensorPostalCode" : "OX136XN",
"{http://ns.adobe.com/photoshop/1.0/}Urgency " : "5 (normal urgency)",
"{http://ns.useplus.org/ldf/xmp/1.0/}LicensorID" : "id87515454",
"{http://iptc.org/std/Iptc4xmpCore/1.0/xmlns/}CiEmailWork" : "DiegoS@alfrescodev.onmicrosoft.com",
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}LocationShownCountryName" : "England",
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}OrganisationInImageCode" : "546851381",
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}AODateCreated" : "2021-05-27",
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}PersonInImage" : "Ayman",
"{http://ns.useplus.org/ldf/xmp/1.0/}LicensorCity" : "Maidenhead",
"{http://purl.org/dc/elements/1.1/}creator" : "Ayman",
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}LocationShownProvinceState" : "New York",
"{http://iptc.org/std/Iptc4xmpCore/1.0/xmlns/}CountryCode" : "+44",
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}Event" : "The quick brown fox jumps over the lazy dog",
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}RegItemId" : "id48485",
"{http://ns.useplus.org/ldf/xmp/1.0/}LicensorCountry" : "England",
"{http://ns.adobe.com/photoshop/1.0/}AuthorsPosition" : "Left Center",
"{http://ns.adobe.com/photoshop/1.0/}Credit" : "Visa",
"{http://iptc.org/std/Iptc4xmpCore/1.0/xmlns/}IntellectualGenre" : "Comedy",
"{http://purl.org/dc/elements/1.1/}rights" : "Fox rights",
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}MaxAvailWidth" : "300",
"{http://purl.org/dc/elements/1.1/}title" : "The quick brown fox jumps over the lazy dog",
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}LocationShownCountryCode" : "+44",
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}MaxAvailHeight" : "400",
"{http://www.alfresco.org/model/content/1.0}author" : null,
"{http://ns.adobe.com/xap/1.0/rights/}UsageTerms" : "The quick brown fox jumps over the lazy dog",
"{http://iptc.org/std/Iptc4xmpCore/1.0/xmlns/}Location" : "Oxford",
"{http://ns.useplus.org/ldf/xmp/1.0/}PropertyReleaseID" : "id841584",
"{http://ns.adobe.com/photoshop/1.0/}Country" : "England",
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}AOCreator" : "Ayman",
"{http://iptc.org/std/Iptc4xmpCore/1.0/xmlns/}CiUrlWork" : "01865 513465",
"{http://ns.adobe.com/photoshop/1.0/}Instructions" : "The quick brown fox jumps over the lazy dog",
"{http://ns.useplus.org/ldf/xmp/1.0/}LicensorName" : "Alfresco",
"{http://ns.useplus.org/ldf/xmp/1.0/}LicensorURL" : "www.alfresco.com"
}

View File

@@ -0,0 +1,92 @@
{
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}LocationCreatedSublocation" : "Kidlington",
"{http://iptc.org/std/Iptc4xmpCore/1.0/xmlns/}CiAdrCity" : "Oxford",
"{http://ns.useplus.org/ldf/xmp/1.0/}CopyrightOwnerName" : "Ayman",
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}DigitalSourceType" : "NEF",
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}IptcLastEdited" : "2021:05:27",
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}LocationCreatedCountryCode" : "+44",
"{http://iptc.org/std/Iptc4xmpCore/1.0/xmlns/}CiAdrCtry" : "England",
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}ModelAge" : [ "25", "153", "3" ],
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}LocationCreatedProvinceState" : "New York",
"{http://ns.useplus.org/ldf/xmp/1.0/}LicensorStreetAddress" : "63 Windsor Road",
"{http://www.alfresco.org/model/content/1.0}title" : null,
"{http://ns.adobe.com/photoshop/1.0/}TransmissionReference" : "The quick brown fox jumps over the lazy dog",
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}LocationShownSublocation" : "Kidlington",
"{http://purl.org/dc/elements/1.1/}description" : "The quick brown fox jumps over the lazy dog",
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}AOSource" : "Fox sources",
"{http://ns.adobe.com/photoshop/1.0/}State" : "New York",
"{http://ns.useplus.org/ldf/xmp/1.0/}LicensorRegion" : "Maidenhead",
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}RegOrgId" : "id48485",
"{http://iptc.org/std/Iptc4xmpCore/1.0/xmlns/}CiTelWork" : "01865 513465",
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}DigitalSourcefileType" : "NEF",
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}AOCopyrightNotice" : "The quick brown fox jumps over the lazy dog",
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}AOSourceInvNo" : "4845484",
"{http://ns.useplus.org/ldf/xmp/1.0/}ImageCreatorName" : "Ayman",
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}CVterm" : "The quick brown fox jumps over the lazy dog",
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}LocationCreatedCountryName" : "England",
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}LocationShownCity" : "Oxford",
"{http://ns.useplus.org/ldf/xmp/1.0/}CopyrightOwnerID" : "id54154",
"{http://ns.adobe.com/photoshop/1.0/}Category" : "Comedy",
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}OrganisationInImageName" : "Alfresco",
"{http://ns.useplus.org/ldf/xmp/1.0/}ImageSupplierName" : "Alfresco",
"{http://ns.adobe.com/photoshop/1.0/}SupplementalCategories" : "The quick brown fox jumps over the lazy dog",
"{http://iptc.org/std/Iptc4xmpCore/1.0/xmlns/}Scene" : "The quick brown fox jumps over the lazy dog",
"{http://iptc.org/std/Iptc4xmpCore/1.0/xmlns/}CiAdrExtadr" : "Kidlington",
"{http://ns.adobe.com/photoshop/1.0/}Headline" : "The quick brown fox jumps over the lazy dog",
"{http://iptc.org/std/Iptc4xmpCore/1.0/xmlns/}SubjectCode" : "fodijfsij5454",
"{http://ns.adobe.com/photoshop/1.0/}City" : "Oxford",
"{http://ns.useplus.org/ldf/xmp/1.0/}LicensorEmail" : "DiegoS@alfrescodev.onmicrosoft.com",
"{http://ns.useplus.org/ldf/xmp/1.0/}Version" : "1.1",
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}AddlModelInfo" : "The quick brown fox jumps over the lazy dog",
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}LocationShownWorldRegion" : "Soth West",
"{http://ns.useplus.org/ldf/xmp/1.0/}ImageCreatorID" : "id548454",
"{http://ns.useplus.org/ldf/xmp/1.0/}ModelReleaseID" : "id4845151",
"{http://purl.org/dc/elements/1.1/}subject" : "-fox -dog -lazy -jumping",
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}DigImageGUID" : "49848484",
"{http://ns.adobe.com/photoshop/1.0/}CaptionWriter" : "Ayman",
"{http://ns.useplus.org/ldf/xmp/1.0/}LicensorExtendedAddress" : "DiegoS@alfrescodev.onmicrosoft.com",
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}AOTitle" : "The quick brown fox jumps over the lazy dog",
"{http://ns.adobe.com/photoshop/1.0/}DateCreated" : "2021-05-27",
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}LocationCreatedCity" : "Oxford",
"{http://ns.useplus.org/ldf/xmp/1.0/}ImageSupplierImageID" : "id8454841",
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}LocationCreatedWorldRegion" : "South West",
"{http://ns.useplus.org/ldf/xmp/1.0/}LicensorTelephone1" : "01865 465986",
"{http://ns.adobe.com/photoshop/1.0/}Source" : "Fox sources",
"{http://ns.useplus.org/ldf/xmp/1.0/}ImageSupplierID" : "id659568",
"{http://ns.useplus.org/ldf/xmp/1.0/}LicensorTelephone2" : "01865 465986",
"{http://iptc.org/std/Iptc4xmpCore/1.0/xmlns/}CiAdrRegion" : "Oxon",
"{http://iptc.org/std/Iptc4xmpCore/1.0/xmlns/}CiAdrPcode" : "OX132EN",
"{http://ns.useplus.org/ldf/xmp/1.0/}LicensorPostalCode" : "OX136XN",
"{http://ns.adobe.com/photoshop/1.0/}Urgency " : "5 (normal urgency)",
"{http://ns.useplus.org/ldf/xmp/1.0/}LicensorID" : "id87515454",
"{http://iptc.org/std/Iptc4xmpCore/1.0/xmlns/}CiEmailWork" : "DiegoS@alfrescodev.onmicrosoft.com",
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}LocationShownCountryName" : "England",
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}OrganisationInImageCode" : "546851381",
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}AODateCreated" : "2021-05-27",
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}PersonInImage" : "Ayman",
"{http://ns.useplus.org/ldf/xmp/1.0/}LicensorCity" : "Maidenhead",
"{http://purl.org/dc/elements/1.1/}creator" : "Ayman",
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}LocationShownProvinceState" : "New York",
"{http://iptc.org/std/Iptc4xmpCore/1.0/xmlns/}CountryCode" : "+44",
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}Event" : "The quick brown fox jumps over the lazy dog",
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}RegItemId" : "id48485",
"{http://ns.useplus.org/ldf/xmp/1.0/}LicensorCountry" : "England",
"{http://ns.adobe.com/photoshop/1.0/}AuthorsPosition" : "Left Center",
"{http://ns.adobe.com/photoshop/1.0/}Credit" : "Visa",
"{http://iptc.org/std/Iptc4xmpCore/1.0/xmlns/}IntellectualGenre" : "Comedy",
"{http://purl.org/dc/elements/1.1/}rights" : "Fox rights",
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}MaxAvailWidth" : "300",
"{http://purl.org/dc/elements/1.1/}title" : "The quick brown fox jumps over the lazy dog",
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}LocationShownCountryCode" : "+44",
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}MaxAvailHeight" : "400",
"{http://www.alfresco.org/model/content/1.0}author" : null,
"{http://ns.adobe.com/xap/1.0/rights/}UsageTerms" : "The quick brown fox jumps over the lazy dog",
"{http://iptc.org/std/Iptc4xmpCore/1.0/xmlns/}Location" : "Oxford",
"{http://ns.useplus.org/ldf/xmp/1.0/}PropertyReleaseID" : "id841584",
"{http://ns.adobe.com/photoshop/1.0/}Country" : "England",
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}AOCreator" : "Ayman",
"{http://iptc.org/std/Iptc4xmpCore/1.0/xmlns/}CiUrlWork" : "01865 513465",
"{http://ns.adobe.com/photoshop/1.0/}Instructions" : "The quick brown fox jumps over the lazy dog",
"{http://ns.useplus.org/ldf/xmp/1.0/}LicensorName" : "Alfresco",
"{http://ns.useplus.org/ldf/xmp/1.0/}LicensorURL" : "www.alfresco.com"
}

View File

@@ -0,0 +1,92 @@
{
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}LocationCreatedSublocation" : "Kidlington",
"{http://iptc.org/std/Iptc4xmpCore/1.0/xmlns/}CiAdrCity" : "Oxford",
"{http://ns.useplus.org/ldf/xmp/1.0/}CopyrightOwnerName" : "Ayman",
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}DigitalSourceType" : "RAF",
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}IptcLastEdited" : "2021:05:27",
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}LocationCreatedCountryCode" : "+44",
"{http://iptc.org/std/Iptc4xmpCore/1.0/xmlns/}CiAdrCtry" : "England",
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}ModelAge" : [ "25", "153", "3" ],
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}LocationCreatedProvinceState" : "New York",
"{http://ns.useplus.org/ldf/xmp/1.0/}LicensorStreetAddress" : "63 Windsor Road",
"{http://www.alfresco.org/model/content/1.0}title" : null,
"{http://ns.adobe.com/photoshop/1.0/}TransmissionReference" : "The quick brown fox jumps over the lazy dog",
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}LocationShownSublocation" : "Kidlington",
"{http://purl.org/dc/elements/1.1/}description" : "The quick brown fox jumps over the lazy dog",
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}AOSource" : "Fox sources",
"{http://ns.adobe.com/photoshop/1.0/}State" : "New York",
"{http://ns.useplus.org/ldf/xmp/1.0/}LicensorRegion" : "Maidenhead",
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}RegOrgId" : "id48485",
"{http://iptc.org/std/Iptc4xmpCore/1.0/xmlns/}CiTelWork" : "01865 513465",
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}DigitalSourcefileType" : "RAF",
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}AOCopyrightNotice" : "The quick brown fox jumps over the lazy dog",
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}AOSourceInvNo" : "4845484",
"{http://ns.useplus.org/ldf/xmp/1.0/}ImageCreatorName" : "Ayman",
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}CVterm" : "The quick brown fox jumps over the lazy dog",
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}LocationCreatedCountryName" : "England",
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}LocationShownCity" : "Oxford",
"{http://ns.useplus.org/ldf/xmp/1.0/}CopyrightOwnerID" : "id54154",
"{http://ns.adobe.com/photoshop/1.0/}Category" : "Comedy",
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}OrganisationInImageName" : "Alfresco",
"{http://ns.useplus.org/ldf/xmp/1.0/}ImageSupplierName" : "Alfresco",
"{http://ns.adobe.com/photoshop/1.0/}SupplementalCategories" : "The quick brown fox jumps over the lazy dog",
"{http://iptc.org/std/Iptc4xmpCore/1.0/xmlns/}Scene" : "The quick brown fox jumps over the lazy dog",
"{http://iptc.org/std/Iptc4xmpCore/1.0/xmlns/}CiAdrExtadr" : "Kidlington",
"{http://ns.adobe.com/photoshop/1.0/}Headline" : "The quick brown fox jumps over the lazy dog",
"{http://iptc.org/std/Iptc4xmpCore/1.0/xmlns/}SubjectCode" : "fodijfsij5454",
"{http://ns.adobe.com/photoshop/1.0/}City" : "Oxford",
"{http://ns.useplus.org/ldf/xmp/1.0/}LicensorEmail" : "DiegoS@alfrescodev.onmicrosoft.com",
"{http://ns.useplus.org/ldf/xmp/1.0/}Version" : "1.1",
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}AddlModelInfo" : "The quick brown fox jumps over the lazy dog",
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}LocationShownWorldRegion" : "Soth West",
"{http://ns.useplus.org/ldf/xmp/1.0/}ImageCreatorID" : "id548454",
"{http://ns.useplus.org/ldf/xmp/1.0/}ModelReleaseID" : "id4845151",
"{http://purl.org/dc/elements/1.1/}subject" : "-fox -dog -lazy -jumping",
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}DigImageGUID" : "49848484",
"{http://ns.adobe.com/photoshop/1.0/}CaptionWriter" : "Ayman",
"{http://ns.useplus.org/ldf/xmp/1.0/}LicensorExtendedAddress" : "DiegoS@alfrescodev.onmicrosoft.com",
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}AOTitle" : "The quick brown fox jumps over the lazy dog",
"{http://ns.adobe.com/photoshop/1.0/}DateCreated" : "2021-05-27",
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}LocationCreatedCity" : "Oxford",
"{http://ns.useplus.org/ldf/xmp/1.0/}ImageSupplierImageID" : "id8454841",
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}LocationCreatedWorldRegion" : "South West",
"{http://ns.useplus.org/ldf/xmp/1.0/}LicensorTelephone1" : "01865 465986",
"{http://ns.adobe.com/photoshop/1.0/}Source" : "Fox sources",
"{http://ns.useplus.org/ldf/xmp/1.0/}ImageSupplierID" : "id659568",
"{http://ns.useplus.org/ldf/xmp/1.0/}LicensorTelephone2" : "01865 465986",
"{http://iptc.org/std/Iptc4xmpCore/1.0/xmlns/}CiAdrRegion" : "Oxon",
"{http://iptc.org/std/Iptc4xmpCore/1.0/xmlns/}CiAdrPcode" : "OX132EN",
"{http://ns.useplus.org/ldf/xmp/1.0/}LicensorPostalCode" : "OX136XN",
"{http://ns.adobe.com/photoshop/1.0/}Urgency " : "5 (normal urgency)",
"{http://ns.useplus.org/ldf/xmp/1.0/}LicensorID" : "id87515454",
"{http://iptc.org/std/Iptc4xmpCore/1.0/xmlns/}CiEmailWork" : "DiegoS@alfrescodev.onmicrosoft.com",
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}LocationShownCountryName" : "England",
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}OrganisationInImageCode" : "546851381",
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}AODateCreated" : "2021-05-27",
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}PersonInImage" : "Ayman",
"{http://ns.useplus.org/ldf/xmp/1.0/}LicensorCity" : "Maidenhead",
"{http://purl.org/dc/elements/1.1/}creator" : "Ayman",
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}LocationShownProvinceState" : "New York",
"{http://iptc.org/std/Iptc4xmpCore/1.0/xmlns/}CountryCode" : "+44",
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}Event" : "The quick brown fox jumps over the lazy dog",
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}RegItemId" : "id48485",
"{http://ns.useplus.org/ldf/xmp/1.0/}LicensorCountry" : "England",
"{http://ns.adobe.com/photoshop/1.0/}AuthorsPosition" : "Left Center",
"{http://ns.adobe.com/photoshop/1.0/}Credit" : "Visa",
"{http://iptc.org/std/Iptc4xmpCore/1.0/xmlns/}IntellectualGenre" : "Comedy",
"{http://purl.org/dc/elements/1.1/}rights" : "Fox rights",
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}MaxAvailWidth" : "300",
"{http://purl.org/dc/elements/1.1/}title" : "The quick brown fox jumps over the lazy dog",
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}LocationShownCountryCode" : "+44",
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}MaxAvailHeight" : "400",
"{http://www.alfresco.org/model/content/1.0}author" : null,
"{http://ns.adobe.com/xap/1.0/rights/}UsageTerms" : "The quick brown fox jumps over the lazy dog",
"{http://iptc.org/std/Iptc4xmpCore/1.0/xmlns/}Location" : "Oxford",
"{http://ns.useplus.org/ldf/xmp/1.0/}PropertyReleaseID" : "id841584",
"{http://ns.adobe.com/photoshop/1.0/}Country" : "England",
"{http://iptc.org/std/Iptc4xmpExt/2008-02-29/}AOCreator" : "Ayman",
"{http://iptc.org/std/Iptc4xmpCore/1.0/xmlns/}CiUrlWork" : "01865 513465",
"{http://ns.adobe.com/photoshop/1.0/}Instructions" : "The quick brown fox jumps over the lazy dog",
"{http://ns.useplus.org/ldf/xmp/1.0/}LicensorName" : "Alfresco",
"{http://ns.useplus.org/ldf/xmp/1.0/}LicensorURL" : "www.alfresco.com"
}

View File

@@ -0,0 +1,5 @@
{
"{http://www.alfresco.org/model/content/1.0}created" : "2016-03-29T21:01:55Z",
"{http://www.alfresco.org/model/content/1.0}author" : "Udintsev, Anton (external - Project)",
"{http://www.alfresco.org/model/content/1.0}title" : null
}

View File

@@ -0,0 +1,22 @@
{
"transformOptions": {
"engineXOptions": [
{"value": {"name": "page"}},
{"value": {"name": "width"}},
{"group": {"transformOptions": [
{"value": {"name": "cropGravity"}}
]}}
]
},
"transformers": [
{
"transformerName": "engineX",
"supportedSourceAndTargetList": [
{"sourceMediaType": "application/pdf", "targetMediaType": "image/png" }
],
"transformOptions": [
"engineXOptions"
]
}
]
}

View File

@@ -0,0 +1,10 @@
{
"transformOptions": {},
"transformers": [
{
"supportedSourceAndTargetList": [
{"sourceMediaType": "application/pdf", "targetMediaType": "image/png" }
]
}
]
}

View File

@@ -0,0 +1,10 @@
{
"transformers": [
{
"transformerName": "engineX",
"supportedSourceAndTargetList": [
{"sourceMediaType": "application/pdf", "targetMediaType": "image/png" }
]
}
]
}

View File

@@ -0,0 +1,26 @@
{
"transformOptions": {
"engineXOptions": [
{"value": {"name": "page"}},
{"value": {"name": "page"}},
{"value": {"name": "width"}},
{"group": {"transformOptions": [
{"value": {"name": "cropGravity"}}
]}}
]
},
"transformers": [
{
"transformerName": "engineX",
"supportedSourceAndTargetList": [
{"sourceMediaType": "application/pdf", "targetMediaType": "image/png" },
{"sourceMediaType": "application/pdf", "targetMediaType": "image/png" },
{"sourceMediaType": "application/pdf", "targetMediaType": "image/png" }
],
"transformOptions": [
"engineXOptions",
"engineXOptions"
]
}
]
}

Binary file not shown.

Binary file not shown.

View File

@@ -0,0 +1,9 @@
{
"{http://www.alfresco.org/model/content/1.0}description" : null,
"{http://www.alfresco.org/model/audio/1.0}releaseDate" : null,
"{http://www.alfresco.org/model/content/1.0}created" : null,
"{http://www.alfresco.org/model/audio/1.0}sampleRate" : "8000",
"{http://www.alfresco.org/model/content/1.0}author" : null,
"{http://www.alfresco.org/model/content/1.0}title" : null,
"{http://www.alfresco.org/model/audio/1.0}channelType" : "Stereo"
}

Binary file not shown.

View File

@@ -0,0 +1,9 @@
{
"{http://www.alfresco.org/model/content/1.0}description" : null,
"{http://www.alfresco.org/model/audio/1.0}releaseDate" : null,
"{http://www.alfresco.org/model/content/1.0}created" : null,
"{http://www.alfresco.org/model/audio/1.0}sampleRate" : "8000",
"{http://www.alfresco.org/model/content/1.0}author" : null,
"{http://www.alfresco.org/model/content/1.0}title" : null,
"{http://www.alfresco.org/model/audio/1.0}channelType" : "Stereo"
}

File diff suppressed because one or more lines are too long

View File

@@ -0,0 +1,5 @@
{
"{http://www.alfresco.org/model/content/1.0}created" : "2011-05-17T13:34:11Z",
"{http://www.alfresco.org/model/content/1.0}author" : "Nevin Nollop",
"{http://www.alfresco.org/model/content/1.0}title" : "test file cs5"
}

Binary file not shown.

After

Width:  |  Height:  |  Size: 110 KiB

View File

@@ -0,0 +1,6 @@
{
"{http://www.alfresco.org/model/exif/1.0}pixelYDimension" : "92",
"{http://www.alfresco.org/model/exif/1.0}pixelXDimension" : "409",
"{http://www.alfresco.org/model/content/1.0}author" : null,
"{http://www.alfresco.org/model/content/1.0}title" : null
}

View File

@@ -0,0 +1 @@
"The quick brown fox jumps over the lazy dog"
1 The quick brown fox jumps over the lazy dog

Binary file not shown.

View File

@@ -0,0 +1,7 @@
{
"{http://www.alfresco.org/model/content/1.0}modified" : "2005-09-20T17:25:00Z",
"{http://www.alfresco.org/model/content/1.0}description" : "Gym class featuring a brown fox and lazy dog",
"{http://www.alfresco.org/model/content/1.0}created" : "2005-05-26T12:57:00Z",
"{http://www.alfresco.org/model/content/1.0}author" : "Nevin Nollop",
"{http://www.alfresco.org/model/content/1.0}title" : "The quick brown fox jumps over the lazy dog"
}

Binary file not shown.

View File

@@ -0,0 +1,6 @@
{
"{http://www.alfresco.org/model/content/1.0}description" : "Gym class featuring a brown fox and lazy dog",
"{http://www.alfresco.org/model/content/1.0}created" : "2010-01-06T17:32:00Z",
"{http://www.alfresco.org/model/content/1.0}author" : "Nevin Nollop",
"{http://www.alfresco.org/model/content/1.0}title" : "The quick brown fox jumps over the lazy dog"
}

Binary file not shown.

View File

@@ -0,0 +1,4 @@
{
"{http://www.alfresco.org/model/content/1.0}author" : null,
"{http://www.alfresco.org/model/content/1.0}title" : null
}

Binary file not shown.

After

Width:  |  Height:  |  Size: 11 KiB

View File

@@ -0,0 +1,6 @@
{
"{http://www.alfresco.org/model/exif/1.0}pixelYDimension" : "92",
"{http://www.alfresco.org/model/exif/1.0}pixelXDimension" : "409",
"{http://www.alfresco.org/model/content/1.0}author" : null,
"{http://www.alfresco.org/model/content/1.0}title" : null
}

View File

@@ -0,0 +1,17 @@
<html>
<head>
<meta http-equiv=Content-Type content="text/html; charset=windows-1252">
<title>The quick brown fox jumps over the lazy dog</title>
<meta name="author" content="Nevin Nollop">
<meta name="keywords" content="Pangram, fox, dog">
<meta name="description" content="Gym class featuring a brown fox and lazy dog">
</head>
<body lang=EN-US>
The quick brown fox jumps over the lazy dog
</body>
</html>

Binary file not shown.

View File

@@ -0,0 +1,31 @@
/*
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2019 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
* -
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* -
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
* -
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
public class quick
{
}

Binary file not shown.

After

Width:  |  Height:  |  Size: 26 KiB

View File

@@ -0,0 +1,6 @@
{
"{http://www.alfresco.org/model/exif/1.0}pixelYDimension" : "92",
"{http://www.alfresco.org/model/exif/1.0}pixelXDimension" : "409",
"{http://www.alfresco.org/model/content/1.0}author" : null,
"{http://www.alfresco.org/model/content/1.0}title" : null
}

Binary file not shown.

View File

@@ -0,0 +1,4 @@
{
"{http://www.alfresco.org/model/content/1.0}author" : null,
"{http://www.alfresco.org/model/content/1.0}title" : null
}

Binary file not shown.

View File

@@ -0,0 +1,13 @@
{
"{http://www.alfresco.org/model/audio/1.0}compressor" : "M4A",
"{http://www.alfresco.org/model/audio/1.0}artist" : "Hauskaz",
"{http://www.alfresco.org/model/audio/1.0}genre" : "Foxtrot",
"{http://www.alfresco.org/model/content/1.0}description" : "The quick brown fox jumps over the lazy dog - About a dog and a fox (Hauskaz)",
"{http://www.alfresco.org/model/audio/1.0}releaseDate" : 1230768000000,
"{http://www.alfresco.org/model/audio/1.0}channelType" : "Stereo",
"{http://www.alfresco.org/model/content/1.0}created" : 1230768000000,
"{http://www.alfresco.org/model/audio/1.0}sampleRate" : "44100",
"{http://www.alfresco.org/model/content/1.0}author" : "Hauskaz",
"{http://www.alfresco.org/model/audio/1.0}album" : "About a dog and a fox",
"{http://www.alfresco.org/model/content/1.0}title" : "The quick brown fox jumps over the lazy dog"
}

Binary file not shown.

View File

@@ -0,0 +1,9 @@
{
"{http://www.alfresco.org/model/content/1.0}description" : null,
"{http://www.alfresco.org/model/audio/1.0}releaseDate" : null,
"{http://www.alfresco.org/model/content/1.0}created" : null,
"{http://www.alfresco.org/model/audio/1.0}sampleRate" : "22050",
"{http://www.alfresco.org/model/content/1.0}author" : null,
"{http://www.alfresco.org/model/content/1.0}title" : null,
"{http://www.alfresco.org/model/audio/1.0}channelType" : "Stereo"
}

Binary file not shown.

View File

@@ -0,0 +1,9 @@
{
"{http://www.alfresco.org/model/content/1.0}description" : null,
"{http://www.alfresco.org/model/audio/1.0}releaseDate" : null,
"{http://www.alfresco.org/model/content/1.0}created" : null,
"{http://www.alfresco.org/model/audio/1.0}sampleRate" : "22050",
"{http://www.alfresco.org/model/content/1.0}author" : null,
"{http://www.alfresco.org/model/content/1.0}title" : null,
"{http://www.alfresco.org/model/audio/1.0}channelType" : "Mono"
}

Binary file not shown.

View File

@@ -0,0 +1,13 @@
{
"{http://www.alfresco.org/model/audio/1.0}compressor" : "MP3",
"{http://www.alfresco.org/model/audio/1.0}artist" : "Hauskaz",
"{http://www.alfresco.org/model/audio/1.0}genre" : "Foxtrot",
"{http://www.alfresco.org/model/content/1.0}description" : "The quick brown fox jumps over the lazy dog - About a dog and a fox (Hauskaz)",
"{http://www.alfresco.org/model/audio/1.0}releaseDate" : 1230768000000,
"{http://www.alfresco.org/model/audio/1.0}channelType" : "Stereo",
"{http://www.alfresco.org/model/content/1.0}created" : 1230768000000,
"{http://www.alfresco.org/model/audio/1.0}sampleRate" : "44100",
"{http://www.alfresco.org/model/content/1.0}author" : "Hauskaz",
"{http://www.alfresco.org/model/audio/1.0}album" : "About a dog and a fox",
"{http://www.alfresco.org/model/content/1.0}title" : "The quick brown fox jumps over the lazy dog"
}

Binary file not shown.

Some files were not shown because too many files have changed in this diff Show More