diff --git a/engines/base/src/main/java/org/alfresco/transform/base/TransformManager.java b/engines/base/src/main/java/org/alfresco/transform/base/TransformManager.java
index 66560209..680e3195 100644
--- a/engines/base/src/main/java/org/alfresco/transform/base/TransformManager.java
+++ b/engines/base/src/main/java/org/alfresco/transform/base/TransformManager.java
@@ -38,7 +38,8 @@ public interface TransformManager
{
/**
* Allows a {@link CustomTransformer} to use a local source {@code File} rather than the supplied {@code InputStream}.
- * The file will be deleted once the request is completed.
+ * The file will be deleted once the request is completed. To avoid creating extra files, if a File has already
+ * been created by the base t-engine, it is returned.
* If possible this method should be avoided as it is better not to leave content on disk.
* @throws IllegalStateException if this method has already been called.
*/
@@ -46,14 +47,14 @@ public interface TransformManager
/**
* Allows a {@link CustomTransformer} to use a local target {@code File} rather than the supplied {@code OutputStream}.
- * The file will be deleted once the request is completed.
+ * The file will be deleted once the request is completed. To avoid creating extra files, if a File has already
+ * been created by the base t-engine, it is returned.
* If possible this method should be avoided as it is better not to leave content on disk.
* @throws IllegalStateException if this method has already been called. A call to {@link #respondWithFragment(Integer)}
* allows the method to be called again.
*/
File createTargetFile();
- // TODO: Do we want to support the following?
/**
* Allows a single transform request to have multiple transform responses. For example images from a video at
* different time offsets or different pages of a document. Following a call to this method a transform response is
diff --git a/engines/base/src/main/java/org/alfresco/transform/base/metadataExtractors/AbstractMetadataExtractor.java b/engines/base/src/main/java/org/alfresco/transform/base/metadataExtractors/AbstractMetadataExtractor.java
index 9ed892bc..7d765f92 100644
--- a/engines/base/src/main/java/org/alfresco/transform/base/metadataExtractors/AbstractMetadataExtractor.java
+++ b/engines/base/src/main/java/org/alfresco/transform/base/metadataExtractors/AbstractMetadataExtractor.java
@@ -33,7 +33,6 @@ import org.alfresco.transform.base.CustomTransformer;
import org.alfresco.transform.base.TransformManager;
import org.slf4j.Logger;
-import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
@@ -64,19 +63,17 @@ import static org.alfresco.transform.base.metadataExtractors.AbstractMetadataExt
*
* The transform results in a Map of extracted properties encoded as json being returned to the content repository.
*
- * - The content repository will use a transform in preference to any metadata extractors it might have defined
- * locally for the same MIMETYPE.
- * - The T-Engine's Controller class will call a method in a class that extends {@link AbstractMetadataExtractor}
- * based on the source and target mediatypes in the normal way.
- * - The method extracts ALL available metadata is extracted from the document and then calls
- * {@link #mapMetadataAndWrite(File, Map, Map)}.
+ * - The method extracts ALL available metadata from the document with
+ * {@link #extractMetadata(String, InputStream, String, OutputStream, Map, TransformManager)} and then calls
+ * {@link #mapMetadataAndWrite(OutputStream, Map, Map)}.
* - Selected values from the available metadata are mapped into content repository property names and values,
* depending on what is defined in a {@code "_metadata_extract.properties"} file.
* - The selected values are set back to the content repository as a JSON representation of a Map, where the values
* are applied to the source node.
*
* To support the same functionality as metadata extractors configured inside the content repository,
- * extra key value pairs may be returned from {@link #extractMetadata}. These are:
+ * extra key value pairs may be returned from {@link #extractMetadata(String, InputStream, String, OutputStream, Map, TransformManager)}.
+ * These are:
*
* - {@code "sys:overwritePolicy"} which can specify the
* {@code org.alfresco.repo.content.metadata.MetadataExtracter.OverwritePolicy} name. Defaults to "PRAGMATIC".
@@ -89,7 +86,8 @@ import static org.alfresco.transform.base.metadataExtractors.AbstractMetadataExt
* If a transform specifies that it can convert from {@code ""} to {@code "alfresco-metadata-embed"}, it is
* indicating that it can embed metadata in {@code }.
*
- * The transform results in a new version of supplied source file that contains the metadata supplied in the transform
+ * The transform calls {@link #embedMetadata(String, InputStream, String, OutputStream, Map, TransformManager)}
+ * which should results in a new version of supplied source file that contains the metadata supplied in the transform
* options.
*
* @author Jesper Steen Møller
@@ -162,24 +160,13 @@ public abstract class AbstractMetadataExtractor implements CustomTransformer
}
else
{
- extractMetadata(sourceMimetype, inputStream, targetMimetype, outputStream, transformOptions, transformManager);
+ extractMapAndWriteMetadata(sourceMimetype, inputStream, targetMimetype, outputStream, transformOptions, transformManager);
}
}
- public void embedMetadata(String sourceMimetype, InputStream inputStream,
- String targetMimetype, OutputStream outputStream,
- Map transformOptions, TransformManager transformManager) throws Exception
- {
- File sourceFile = transformManager.createSourceFile();
- File targetFile = transformManager.createTargetFile();
- embedMetadata(sourceMimetype, targetMimetype, transformOptions, sourceFile, targetFile);
- }
-
- public void embedMetadata(String sourceMimetype, String targetMimetype, Map transformOptions,
- File sourceFile, File targetFile) throws Exception
- {
- // Default nothing, as embedding is not supported in most cases
- }
+ public abstract void embedMetadata(String sourceMimetype, InputStream inputStream, String targetMimetype,
+ OutputStream outputStream, Map transformOptions, TransformManager transformManager)
+ throws Exception;
protected Map getMetadata(Map transformOptions)
{
@@ -507,31 +494,18 @@ public abstract class AbstractMetadataExtractor implements CustomTransformer
return true;
}
- public void extractMetadata(String sourceMimetype, InputStream inputStream,
- String targetMimetype, OutputStream outputStream,
- Map transformOptions, TransformManager transformManager) throws Exception
+ private void extractMapAndWriteMetadata(String sourceMimetype, InputStream inputStream, String targetMimetype,
+ OutputStream outputStream, Map transformOptions, TransformManager transformManager)
+ throws Exception
{
- File sourceFile = transformManager.createSourceFile();
- File targetFile = transformManager.createTargetFile();
- extractMetadata(sourceMimetype, transformOptions, sourceFile, targetFile);
- }
-
- /**
- * The {@code transformOptions} may contain a replacement set of mappings. These will be used in place of the
- * default mappings from read from file if supplied.
- */
- public void extractMetadata(String sourceMimetype, Map transformOptions, File sourceFile,
- File targetFile) throws Exception
- {
- Map> mapping = getExtractMappingFromOptions(transformOptions, defaultExtractMapping);
-
// Use a ThreadLocal to avoid changing method signatures of methods that currently call getExtractMapping.
+ Map> mapping = getExtractMappingFromOptions(transformOptions, defaultExtractMapping);
try
{
extractMapping.set(mapping);
- Map metadata = extractMetadata(sourceMimetype, transformOptions, sourceFile);
- mapMetadataAndWrite(targetFile, metadata, mapping);
-
+ Map metadata = extractMetadata(sourceMimetype, inputStream, targetMimetype,
+ outputStream, transformOptions, transformManager);
+ mapMetadataAndWrite(outputStream, metadata, mapping);
}
finally
{
@@ -539,8 +513,9 @@ public abstract class AbstractMetadataExtractor implements CustomTransformer
}
}
- public abstract Map extractMetadata(String sourceMimetype, Map transformOptions,
- File sourceFile) throws Exception;
+ public abstract Map extractMetadata(String sourceMimetype, InputStream inputStream,
+ String targetMimetype, OutputStream outputStream, Map transformOptions,
+ TransformManager transformManager) throws Exception;
private Map> getExtractMappingFromOptions(Map transformOptions, Map> defaultExtractMapping)
@@ -561,17 +536,7 @@ public abstract class AbstractMetadataExtractor implements CustomTransformer
return defaultExtractMapping;
}
- /**
- * @deprecated use {@link #extractMetadata(String, Map, File, File)} rather than calling this method.
- * By default call the overloaded method with the default {@code extractMapping}.
- */
- @Deprecated
- public void mapMetadataAndWrite(File targetFile, Map metadata) throws IOException
- {
- mapMetadataAndWrite(targetFile, metadata, defaultExtractMapping);
- }
-
- public void mapMetadataAndWrite(File targetFile, Map metadata,
+ public void mapMetadataAndWrite(OutputStream outputStream, Map metadata,
Map> extractMapping) throws IOException
{
if (logger.isDebugEnabled())
@@ -581,7 +546,7 @@ public abstract class AbstractMetadataExtractor implements CustomTransformer
}
metadata = mapRawToSystem(metadata, extractMapping);
- writeMetadata(targetFile, metadata);
+ writeMetadata(outputStream, metadata);
}
/**
@@ -633,9 +598,9 @@ public abstract class AbstractMetadataExtractor implements CustomTransformer
return new TreeMap(systemProperties);
}
- private void writeMetadata(File targetFile, Map results)
+ private void writeMetadata(OutputStream outputStream, Map results)
throws IOException
{
- jsonObjectMapper.writeValue(targetFile, results);
+ jsonObjectMapper.writeValue(outputStream, results);
}
}
diff --git a/engines/misc/src/main/java/org/alfresco/transform/misc/metadataExtractors/HtmlMetadataExtractor.java b/engines/misc/src/main/java/org/alfresco/transform/misc/metadataExtractors/HtmlMetadataExtractor.java
index 075216d2..72670748 100644
--- a/engines/misc/src/main/java/org/alfresco/transform/misc/metadataExtractors/HtmlMetadataExtractor.java
+++ b/engines/misc/src/main/java/org/alfresco/transform/misc/metadataExtractors/HtmlMetadataExtractor.java
@@ -29,7 +29,6 @@ package org.alfresco.transform.misc.metadataExtractors;
import org.alfresco.transform.base.CustomTransformer;
import org.alfresco.transform.base.TransformManager;
import org.alfresco.transform.base.metadataExtractors.AbstractMetadataExtractor;
-import org.alfresco.transform.common.TransformException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -87,8 +86,17 @@ public class HtmlMetadataExtractor extends AbstractMetadataExtractor implements
}
@Override
- public Map extractMetadata(String sourceMimetype, Map transformOptions,
- File sourceFile) throws Exception
+ public void embedMetadata(String sourceMimetype, InputStream inputStream, String targetMimetype,
+ OutputStream outputStream, Map transformOptions, TransformManager transformManager)
+ throws Exception
+ {
+ // Only used for extract, so may be empty.
+ }
+
+ @Override
+ public Map extractMetadata(String sourceMimetype, InputStream inputStream,
+ String targetMimetype, OutputStream outputStream, Map transformOptions,
+ TransformManager transformManager) throws Exception
{
final Map rawProperties = new HashMap<>();
@@ -175,10 +183,10 @@ public class HtmlMetadataExtractor extends AbstractMetadataExtractor implements
rawProperties.clear();
Reader r = null;
- try (InputStream cis = new FileInputStream(sourceFile))
+ try
{
// TODO: for now, use default charset; we should attempt to map from html meta-data
- r = new InputStreamReader(cis, charsetGuess);
+ r = new InputStreamReader(inputStream, charsetGuess);
HTMLEditorKit.Parser parser = new ParserDelegator();
parser.parse(r, callback, tries > 0);
break;
diff --git a/engines/misc/src/main/java/org/alfresco/transform/misc/metadataExtractors/RFC822MetadataExtractor.java b/engines/misc/src/main/java/org/alfresco/transform/misc/metadataExtractors/RFC822MetadataExtractor.java
index 3930dd85..7b228c6c 100644
--- a/engines/misc/src/main/java/org/alfresco/transform/misc/metadataExtractors/RFC822MetadataExtractor.java
+++ b/engines/misc/src/main/java/org/alfresco/transform/misc/metadataExtractors/RFC822MetadataExtractor.java
@@ -29,7 +29,6 @@ package org.alfresco.transform.misc.metadataExtractors;
import org.alfresco.transform.base.CustomTransformer;
import org.alfresco.transform.base.TransformManager;
import org.alfresco.transform.base.metadataExtractors.AbstractMetadataExtractor;
-import org.alfresco.transform.common.TransformException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -89,102 +88,108 @@ public class RFC822MetadataExtractor extends AbstractMetadataExtractor implement
}
@Override
- public Map extractMetadata(String sourceMimetype, Map transformOptions,
- File sourceFile) throws Exception
+ public void embedMetadata(String sourceMimetype, InputStream inputStream, String targetMimetype,
+ OutputStream outputStream, Map transformOptions, TransformManager transformManager)
+ throws Exception
+ {
+ // Only used for extract, so may be empty.
+ }
+
+ @Override
+ public Map extractMetadata(String sourceMimetype, InputStream inputStream,
+ String targetMimetype, OutputStream outputStream, Map transformOptions,
+ TransformManager transformManager) throws Exception
{
final Map rawProperties = new HashMap<>();
- try (InputStream is = new FileInputStream(sourceFile))
+ MimeMessage mimeMessage = new MimeMessage(null, inputStream);
+
+ if (mimeMessage != null)
{
- MimeMessage mimeMessage = new MimeMessage(null, is);
+ /**
+ * Extract RFC822 values that doesn't match to headers and need to be encoded.
+ * Or those special fields that require some code to extract data
+ */
+ String tmp = InternetAddress.toString(mimeMessage.getFrom());
+ tmp = tmp != null ? MimeUtility.decodeText(tmp) : null;
+ putRawValue(KEY_MESSAGE_FROM, tmp, rawProperties);
- if (mimeMessage != null)
+ tmp = InternetAddress.toString(mimeMessage.getRecipients(RecipientType.TO));
+ tmp = tmp != null ? MimeUtility.decodeText(tmp) : null;
+ putRawValue(KEY_MESSAGE_TO, tmp, rawProperties);
+
+ tmp = InternetAddress.toString(mimeMessage.getRecipients(RecipientType.CC));
+ tmp = tmp != null ? MimeUtility.decodeText(tmp) : null;
+ putRawValue(KEY_MESSAGE_CC, tmp, rawProperties);
+
+ putRawValue(KEY_MESSAGE_SENT, mimeMessage.getSentDate(), rawProperties);
+
+ /**
+ * Received field from RFC 822
+ *
+ * "Received" ":" ; one per relay
+ * ["from" domain] ; sending host
+ * ["by" domain] ; receiving host
+ * ["via" atom] ; physical path
+ * ("with" atom) ; link/mail protocol
+ * ["id" msg-id] ; receiver msg id
+ * ["for" addr-spec] ; initial form
+ * ";" date-time ; time received
+ */
+ Date rxDate = mimeMessage.getReceivedDate();
+
+ if(rxDate != null)
{
- /**
- * Extract RFC822 values that doesn't match to headers and need to be encoded.
- * Or those special fields that require some code to extract data
- */
- String tmp = InternetAddress.toString(mimeMessage.getFrom());
- tmp = tmp != null ? MimeUtility.decodeText(tmp) : null;
- putRawValue(KEY_MESSAGE_FROM, tmp, rawProperties);
-
- tmp = InternetAddress.toString(mimeMessage.getRecipients(RecipientType.TO));
- tmp = tmp != null ? MimeUtility.decodeText(tmp) : null;
- putRawValue(KEY_MESSAGE_TO, tmp, rawProperties);
-
- tmp = InternetAddress.toString(mimeMessage.getRecipients(RecipientType.CC));
- tmp = tmp != null ? MimeUtility.decodeText(tmp) : null;
- putRawValue(KEY_MESSAGE_CC, tmp, rawProperties);
-
- putRawValue(KEY_MESSAGE_SENT, mimeMessage.getSentDate(), rawProperties);
-
- /**
- * Received field from RFC 822
- *
- * "Received" ":" ; one per relay
- * ["from" domain] ; sending host
- * ["by" domain] ; receiving host
- * ["via" atom] ; physical path
- * ("with" atom) ; link/mail protocol
- * ["id" msg-id] ; receiver msg id
- * ["for" addr-spec] ; initial form
- * ";" date-time ; time received
- */
- Date rxDate = mimeMessage.getReceivedDate();
-
- if(rxDate != null)
+ // The email implementation extracted the received date for us.
+ putRawValue(KEY_MESSAGE_RECEIVED, rxDate, rawProperties);
+ }
+ else
+ {
+ // the email implementation did not parse the received date for us.
+ String[] rx = mimeMessage.getHeader("received");
+ if(rx != null && rx.length > 0)
{
- // The email implementation extracted the received date for us.
- putRawValue(KEY_MESSAGE_RECEIVED, rxDate, rawProperties);
- }
- else
- {
- // the email implementation did not parse the received date for us.
- String[] rx = mimeMessage.getHeader("received");
- if(rx != null && rx.length > 0)
+ String lastReceived = rx[0];
+ lastReceived = MimeUtility.unfold(lastReceived);
+ int x = lastReceived.lastIndexOf(';');
+ if(x > 0)
{
- String lastReceived = rx[0];
- lastReceived = MimeUtility.unfold(lastReceived);
- int x = lastReceived.lastIndexOf(';');
- if(x > 0)
- {
- String dateStr = lastReceived.substring(x + 1).trim();
- putRawValue(KEY_MESSAGE_RECEIVED, dateStr, rawProperties);
- }
+ String dateStr = lastReceived.substring(x + 1).trim();
+ putRawValue(KEY_MESSAGE_RECEIVED, dateStr, rawProperties);
}
}
+ }
- String[] subj = mimeMessage.getHeader("Subject");
- if (subj != null && subj.length > 0)
+ String[] subj = mimeMessage.getHeader("Subject");
+ if (subj != null && subj.length > 0)
+ {
+ String decodedSubject = subj[0];
+ try
{
- String decodedSubject = subj[0];
- try
- {
- decodedSubject = MimeUtility.decodeText(decodedSubject);
- }
- catch (UnsupportedEncodingException e)
- {
- logger.warn(e.toString());
- }
- putRawValue(KEY_MESSAGE_SUBJECT, decodedSubject, rawProperties);
+ decodedSubject = MimeUtility.decodeText(decodedSubject);
}
-
- /*
- * Extract values from all header fields, including extension fields "X-"
- */
- Set keys = getExtractMapping().keySet();
- @SuppressWarnings("unchecked")
- Enumeration headers = mimeMessage.getAllHeaders();
- while (headers.hasMoreElements())
+ catch (UnsupportedEncodingException e)
{
- Header header = (Header) headers.nextElement();
- if (keys.contains(header.getName()))
- {
- tmp = header.getValue();
- tmp = tmp != null ? MimeUtility.decodeText(tmp) : null;
+ logger.warn(e.toString());
+ }
+ putRawValue(KEY_MESSAGE_SUBJECT, decodedSubject, rawProperties);
+ }
- putRawValue(header.getName(), tmp, rawProperties);
- }
+ /*
+ * Extract values from all header fields, including extension fields "X-"
+ */
+ Set keys = getExtractMapping().keySet();
+ @SuppressWarnings("unchecked")
+ Enumeration headers = mimeMessage.getAllHeaders();
+ while (headers.hasMoreElements())
+ {
+ Header header = (Header) headers.nextElement();
+ if (keys.contains(header.getName()))
+ {
+ tmp = header.getValue();
+ tmp = tmp != null ? MimeUtility.decodeText(tmp) : null;
+
+ putRawValue(header.getName(), tmp, rawProperties);
}
}
}
diff --git a/engines/tika/src/main/java/org/alfresco/transform/tika/metadataExtractors/AbstractTikaMetadataExtractor.java b/engines/tika/src/main/java/org/alfresco/transform/tika/metadataExtractors/AbstractTikaMetadataExtractor.java
index ef293cac..3a9ae1cf 100644
--- a/engines/tika/src/main/java/org/alfresco/transform/tika/metadataExtractors/AbstractTikaMetadataExtractor.java
+++ b/engines/tika/src/main/java/org/alfresco/transform/tika/metadataExtractors/AbstractTikaMetadataExtractor.java
@@ -51,7 +51,6 @@ import org.xml.sax.Attributes;
import org.xml.sax.ContentHandler;
import org.xml.sax.Locator;
-import java.io.File;
import java.io.FileInputStream;
import java.io.InputStream;
import java.io.OutputStream;
@@ -215,95 +214,93 @@ public abstract class AbstractTikaMetadataExtractor extends AbstractMetadataExtr
}
@Override
- public Map extractMetadata(String sourceMimetype, Map transformOptions,
- File sourceFile) throws Exception
+ public Map extractMetadata(String sourceMimetype, InputStream inputStream,
+ String targetMimetype, OutputStream outputStream, Map transformOptions,
+ TransformManager transformManager) throws Exception
{
Map rawProperties = new HashMap<>();
- try (InputStream is = new FileInputStream(sourceFile))
+ Parser parser = getParser();
+
+ Metadata metadata = new Metadata();
+ metadata.add(Metadata.CONTENT_TYPE, sourceMimetype);
+
+ ParseContext context = buildParseContext(metadata, sourceMimetype);
+
+ ContentHandler handler;
+ Map headers = null;
+ if (needHeaderContents())
{
- Parser parser = getParser();
-
- Metadata metadata = new Metadata();
- metadata.add(Metadata.CONTENT_TYPE, sourceMimetype);
-
- ParseContext context = buildParseContext(metadata, sourceMimetype);
-
- ContentHandler handler;
- Map headers = null;
- if (needHeaderContents())
- {
- MapCaptureContentHandler headerCapture =
- new MapCaptureContentHandler();
- headers = headerCapture.tags;
- handler = new HeadContentHandler(headerCapture);
- }
- else
- {
- handler = new NullContentHandler();
- }
-
- parser.parse(is, handler, metadata, context);
-
- // First up, copy all the Tika metadata over
- // This allows people to map any of the Tika
- // keys onto their own content model
- for (String tikaKey : metadata.names())
- {
- // TODO review this change (part of MNT-15267) - should we really force string concatenation here !?
- putRawValue(tikaKey, getMetadataValue(metadata, Property.internalText(tikaKey)), rawProperties);
- }
-
- // Now, map the common Tika metadata keys onto
- // the common Alfresco metadata keys. This allows
- // existing mapping properties files to continue
- // to work without needing any changes
-
- // The simple ones
- putRawValue(KEY_AUTHOR, getMetadataValue(metadata, TikaCoreProperties.CREATOR), rawProperties);
- putRawValue(KEY_TITLE, getMetadataValue(metadata, TikaCoreProperties.TITLE), rawProperties);
- putRawValue(KEY_COMMENTS, getMetadataValue(metadata, TikaCoreProperties.COMMENTS), rawProperties);
-
- // Tags
- putRawValue(KEY_TAGS, getMetadataValues(metadata, KEY_TAGS), rawProperties);
-
- // Get the subject and description, despite things not
- // being nearly as consistent as one might hope
- String subject = getMetadataValue(metadata, TikaCoreProperties.SUBJECT);
- String description = getMetadataValue(metadata, TikaCoreProperties.DESCRIPTION);
- if (subject != null && description != null)
- {
- putRawValue(KEY_DESCRIPTION, description, rawProperties);
- putRawValue(KEY_SUBJECT, subject, rawProperties);
- }
- else if (subject != null)
- {
- putRawValue(KEY_DESCRIPTION, subject, rawProperties);
- putRawValue(KEY_SUBJECT, subject, rawProperties);
- }
- else if (description != null)
- {
- putRawValue(KEY_DESCRIPTION, description, rawProperties);
- putRawValue(KEY_SUBJECT, description, rawProperties);
- }
-
- // Try for the dates two different ways too
- if (metadata.get(TikaCoreProperties.CREATED) != null)
- {
- putRawValue(KEY_CREATED, metadata.get(TikaCoreProperties.CREATED), rawProperties);
- }
- else if (metadata.get(TikaCoreProperties.MODIFIED) != null)
- {
- putRawValue(KEY_CREATED, metadata.get(TikaCoreProperties.MODIFIED), rawProperties);
- }
-
- // If people created a specific instance
- // (eg OfficeMetadataExtractor), then allow that
- // instance to map the Tika keys onto its
- // existing namespace so that older properties
- // files continue to map correctly
- rawProperties = extractSpecific(metadata, rawProperties, headers);
+ MapCaptureContentHandler headerCapture =
+ new MapCaptureContentHandler();
+ headers = headerCapture.tags;
+ handler = new HeadContentHandler(headerCapture);
}
+ else
+ {
+ handler = new NullContentHandler();
+ }
+
+ parser.parse(inputStream, handler, metadata, context);
+
+ // First up, copy all the Tika metadata over
+ // This allows people to map any of the Tika
+ // keys onto their own content model
+ for (String tikaKey : metadata.names())
+ {
+ // TODO review this change (part of MNT-15267) - should we really force string concatenation here !?
+ putRawValue(tikaKey, getMetadataValue(metadata, Property.internalText(tikaKey)), rawProperties);
+ }
+
+ // Now, map the common Tika metadata keys onto
+ // the common Alfresco metadata keys. This allows
+ // existing mapping properties files to continue
+ // to work without needing any changes
+
+ // The simple ones
+ putRawValue(KEY_AUTHOR, getMetadataValue(metadata, TikaCoreProperties.CREATOR), rawProperties);
+ putRawValue(KEY_TITLE, getMetadataValue(metadata, TikaCoreProperties.TITLE), rawProperties);
+ putRawValue(KEY_COMMENTS, getMetadataValue(metadata, TikaCoreProperties.COMMENTS), rawProperties);
+
+ // Tags
+ putRawValue(KEY_TAGS, getMetadataValues(metadata, KEY_TAGS), rawProperties);
+
+ // Get the subject and description, despite things not
+ // being nearly as consistent as one might hope
+ String subject = getMetadataValue(metadata, TikaCoreProperties.SUBJECT);
+ String description = getMetadataValue(metadata, TikaCoreProperties.DESCRIPTION);
+ if (subject != null && description != null)
+ {
+ putRawValue(KEY_DESCRIPTION, description, rawProperties);
+ putRawValue(KEY_SUBJECT, subject, rawProperties);
+ }
+ else if (subject != null)
+ {
+ putRawValue(KEY_DESCRIPTION, subject, rawProperties);
+ putRawValue(KEY_SUBJECT, subject, rawProperties);
+ }
+ else if (description != null)
+ {
+ putRawValue(KEY_DESCRIPTION, description, rawProperties);
+ putRawValue(KEY_SUBJECT, description, rawProperties);
+ }
+
+ // Try for the dates two different ways too
+ if (metadata.get(TikaCoreProperties.CREATED) != null)
+ {
+ putRawValue(KEY_CREATED, metadata.get(TikaCoreProperties.CREATED), rawProperties);
+ }
+ else if (metadata.get(TikaCoreProperties.MODIFIED) != null)
+ {
+ putRawValue(KEY_CREATED, metadata.get(TikaCoreProperties.MODIFIED), rawProperties);
+ }
+
+ // If people created a specific instance
+ // (eg OfficeMetadataExtractor), then allow that
+ // instance to map the Tika keys onto its
+ // existing namespace so that older properties
+ // files continue to map correctly
+ rawProperties = extractSpecific(metadata, rawProperties, headers);
return rawProperties;
}
diff --git a/models/README.md b/model/iptc/README.md
similarity index 100%
rename from models/README.md
rename to model/iptc/README.md
diff --git a/models/iptc-model-context.xml b/model/iptc/iptc-model-context.xml
similarity index 100%
rename from models/iptc-model-context.xml
rename to model/iptc/iptc-model-context.xml
diff --git a/models/iptc/iptc-model.properties b/model/iptc/iptc-model.properties
similarity index 100%
rename from models/iptc/iptc-model.properties
rename to model/iptc/iptc-model.properties
diff --git a/models/iptc/iptc-model_cs.properties b/model/iptc/iptc-model_cs.properties
similarity index 100%
rename from models/iptc/iptc-model_cs.properties
rename to model/iptc/iptc-model_cs.properties
diff --git a/models/iptc/iptc-model_da.properties b/model/iptc/iptc-model_da.properties
similarity index 100%
rename from models/iptc/iptc-model_da.properties
rename to model/iptc/iptc-model_da.properties
diff --git a/models/iptc/iptc-model_de.properties b/model/iptc/iptc-model_de.properties
similarity index 100%
rename from models/iptc/iptc-model_de.properties
rename to model/iptc/iptc-model_de.properties
diff --git a/models/iptc/iptc-model_es.properties b/model/iptc/iptc-model_es.properties
similarity index 100%
rename from models/iptc/iptc-model_es.properties
rename to model/iptc/iptc-model_es.properties
diff --git a/models/iptc/iptc-model_fi.properties b/model/iptc/iptc-model_fi.properties
similarity index 100%
rename from models/iptc/iptc-model_fi.properties
rename to model/iptc/iptc-model_fi.properties
diff --git a/models/iptc/iptc-model_fr.properties b/model/iptc/iptc-model_fr.properties
similarity index 100%
rename from models/iptc/iptc-model_fr.properties
rename to model/iptc/iptc-model_fr.properties
diff --git a/models/iptc/iptc-model_it.properties b/model/iptc/iptc-model_it.properties
similarity index 100%
rename from models/iptc/iptc-model_it.properties
rename to model/iptc/iptc-model_it.properties
diff --git a/models/iptc/iptc-model_ja.properties b/model/iptc/iptc-model_ja.properties
similarity index 100%
rename from models/iptc/iptc-model_ja.properties
rename to model/iptc/iptc-model_ja.properties
diff --git a/models/iptc/iptc-model_nb.properties b/model/iptc/iptc-model_nb.properties
similarity index 100%
rename from models/iptc/iptc-model_nb.properties
rename to model/iptc/iptc-model_nb.properties
diff --git a/models/iptc/iptc-model_nl.properties b/model/iptc/iptc-model_nl.properties
similarity index 100%
rename from models/iptc/iptc-model_nl.properties
rename to model/iptc/iptc-model_nl.properties
diff --git a/models/iptc/iptc-model_pl.properties b/model/iptc/iptc-model_pl.properties
similarity index 100%
rename from models/iptc/iptc-model_pl.properties
rename to model/iptc/iptc-model_pl.properties
diff --git a/models/iptc/iptc-model_pt_BR.properties b/model/iptc/iptc-model_pt_BR.properties
similarity index 100%
rename from models/iptc/iptc-model_pt_BR.properties
rename to model/iptc/iptc-model_pt_BR.properties
diff --git a/models/iptc/iptc-model_ru.properties b/model/iptc/iptc-model_ru.properties
similarity index 100%
rename from models/iptc/iptc-model_ru.properties
rename to model/iptc/iptc-model_ru.properties
diff --git a/models/iptc/iptc-model_sv.properties b/model/iptc/iptc-model_sv.properties
similarity index 100%
rename from models/iptc/iptc-model_sv.properties
rename to model/iptc/iptc-model_sv.properties
diff --git a/models/iptc/iptc-model_zh_CN.properties b/model/iptc/iptc-model_zh_CN.properties
similarity index 100%
rename from models/iptc/iptc-model_zh_CN.properties
rename to model/iptc/iptc-model_zh_CN.properties
diff --git a/models/iptc/iptcModel.xml b/model/iptc/iptcModel.xml
similarity index 100%
rename from models/iptc/iptcModel.xml
rename to model/iptc/iptcModel.xml