Save point: [skip ci]

* Moved iptc reference info to model rather than having a separate models directory * Use InputStream and OutputStream in metadata extractors rather than Files.
2025-10-01 14:41:17 +00:00 · 2022-07-12 10:18:15 +01:00
parent 2e17c3ec53
commit 6bac1c4287
24 changed files with 211 additions and 235 deletions
--- a/engines/base/src/main/java/org/alfresco/transform/base/TransformManager.java
+++ b/engines/base/src/main/java/org/alfresco/transform/base/TransformManager.java
@@ -38,7 +38,8 @@ public interface TransformManager
 {
    /**
     * Allows a {@link CustomTransformer} to use a local source {@code File} rather than the supplied {@code InputStream}.
-     * The file will be deleted once the request is completed.
+     * The file will be deleted once the request is completed. To avoid creating extra files, if a File has already
     * been created by the base t-engine, it is returned.
     * If possible this method should be avoided as it is better not to leave content on disk.
     * @throws IllegalStateException if this method has already been called.
     */
@@ -46,14 +47,14 @@ public interface TransformManager
    /**
     * Allows a {@link CustomTransformer} to use a local target {@code File} rather than the supplied {@code OutputStream}.
-     * The file will be deleted once the request is completed.
+     * The file will be deleted once the request is completed. To avoid creating extra files, if a File has already
     * been created by the base t-engine, it is returned.
     * If possible this method should be avoided as it is better not to leave content on disk.
     * @throws IllegalStateException if this method has already been called. A call to {@link #respondWithFragment(Integer)}
     *         allows the method to be called again.
     */
    File createTargetFile();
    // TODO: Do we want to support the following?
    /**
     * Allows a single transform request to have multiple transform responses. For example images from a video at
     * different time offsets or different pages of a document. Following a call to this method a transform response is
--- a/engines/base/src/main/java/org/alfresco/transform/base/metadataExtractors/AbstractMetadataExtractor.java
+++ b/engines/base/src/main/java/org/alfresco/transform/base/metadataExtractors/AbstractMetadataExtractor.java
@@ -33,7 +33,6 @@ import org.alfresco.transform.base.CustomTransformer;
 import org.alfresco.transform.base.TransformManager;
 import org.slf4j.Logger;
 import java.io.File;
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.OutputStream;
@@ -64,19 +63,17 @@ import static org.alfresco.transform.base.metadataExtractors.AbstractMetadataExt
 *
 * The transform results in a Map of extracted properties encoded as json being returned to the content repository.
 * <ul>
- *   <li>The content repository will use a transform in preference to any metadata extractors it might have defined
+ *   <li>The method extracts ALL available metadata from the document with
- *   locally for the same MIMETYPE.</li>
+ *   {@link #extractMetadata(String, InputStream, String, OutputStream, Map, TransformManager)} and then calls
- *   <li>The T-Engine's Controller class will call a method in a class that extends {@link AbstractMetadataExtractor}
+ *   {@link #mapMetadataAndWrite(OutputStream, Map, Map)}.</li>
 *   based on the source and target mediatypes in the normal way.</li>
 *   <li>The method extracts ALL available metadata is extracted from the document and then calls
 *   {@link #mapMetadataAndWrite(File, Map, Map)}.</li>
 *   <li>Selected values from the available metadata are mapped into content repository property names and values,
 *   depending on what is defined in a {@code "<classname>_metadata_extract.properties"} file.</li>
 *   <li>The selected values are set back to the content repository as a JSON representation of a Map, where the values
 *   are applied to the source node.</li>
 * </ul>
 * To support the same functionality as metadata extractors configured inside the content repository,
- * extra key value pairs may be returned from {@link #extractMetadata}. These are:
+ * extra key value pairs may be returned from {@link #extractMetadata(String, InputStream, String, OutputStream, Map, TransformManager)}.
 * These are:
 * <ul>
 *     <li>{@code "sys:overwritePolicy"} which can specify the
 *     {@code org.alfresco.repo.content.metadata.MetadataExtracter.OverwritePolicy} name. Defaults to "PRAGMATIC".</li>
@@ -89,7 +86,8 @@ import static org.alfresco.transform.base.metadataExtractors.AbstractMetadataExt
 * If a transform specifies that it can convert from {@code "<MIMETYPE>"} to {@code "alfresco-metadata-embed"}, it is
 * indicating that it can embed metadata in {@code <MIMETYPE>}.
 *
- * The transform results in a new version of supplied source file that contains the metadata supplied in the transform
+ * The transform calls {@link #embedMetadata(String, InputStream, String, OutputStream, Map, TransformManager)}
 * which should results in a new version of supplied source file that contains the metadata supplied in the transform
 * options.
 * 
 * @author Jesper Steen Møller
@@ -162,24 +160,13 @@ public abstract class AbstractMetadataExtractor implements CustomTransformer
        }
        else
        {
-            extractMetadata(sourceMimetype, inputStream, targetMimetype, outputStream, transformOptions, transformManager);
+            extractMapAndWriteMetadata(sourceMimetype, inputStream, targetMimetype, outputStream, transformOptions, transformManager);
        }
    }
-    public void embedMetadata(String sourceMimetype, InputStream inputStream,
+    public abstract void embedMetadata(String sourceMimetype, InputStream inputStream, String targetMimetype,
-            String targetMimetype, OutputStream outputStream,
+            OutputStream outputStream, Map<String, String> transformOptions, TransformManager transformManager)
-            Map<String, String> transformOptions, TransformManager transformManager) throws Exception
+            throws Exception;
    {
        File sourceFile = transformManager.createSourceFile();
        File targetFile = transformManager.createTargetFile();
        embedMetadata(sourceMimetype, targetMimetype, transformOptions, sourceFile, targetFile);
    }
    public void embedMetadata(String sourceMimetype, String targetMimetype, Map<String, String> transformOptions,
                              File sourceFile, File targetFile) throws Exception
    {
        // Default nothing, as embedding is not supported in most cases
    }
    protected Map<String, Serializable> getMetadata(Map<String, String> transformOptions)
    {
@@ -507,31 +494,18 @@ public abstract class AbstractMetadataExtractor implements CustomTransformer
        return true;
    }
-    public void extractMetadata(String sourceMimetype, InputStream inputStream,
+    private void extractMapAndWriteMetadata(String sourceMimetype, InputStream inputStream, String targetMimetype,
-            String targetMimetype, OutputStream outputStream,
+            OutputStream outputStream, Map<String, String> transformOptions, TransformManager transformManager)
-            Map<String, String> transformOptions, TransformManager transformManager) throws Exception
+            throws Exception
    {
        File sourceFile = transformManager.createSourceFile();
        File targetFile = transformManager.createTargetFile();
        extractMetadata(sourceMimetype, transformOptions, sourceFile, targetFile);
    }
    /**
     * The {@code transformOptions} may contain a replacement set of mappings. These will be used in place of the
     * default mappings from read from file if supplied.
     */
    public void extractMetadata(String sourceMimetype, Map<String, String> transformOptions, File sourceFile,
                                File targetFile) throws Exception
    {
        Map<String, Set<String>> mapping = getExtractMappingFromOptions(transformOptions, defaultExtractMapping);
        // Use a ThreadLocal to avoid changing method signatures of methods that currently call getExtractMapping.
        Map<String, Set<String>> mapping = getExtractMappingFromOptions(transformOptions, defaultExtractMapping);
        try
        {
            extractMapping.set(mapping);
-            Map<String, Serializable> metadata = extractMetadata(sourceMimetype, transformOptions, sourceFile);
+            Map<String, Serializable> metadata = extractMetadata(sourceMimetype, inputStream, targetMimetype,
-            mapMetadataAndWrite(targetFile, metadata, mapping);
+                    outputStream, transformOptions, transformManager);
-
+            mapMetadataAndWrite(outputStream, metadata, mapping);
        }
        finally
        {
@@ -539,8 +513,9 @@ public abstract class AbstractMetadataExtractor implements CustomTransformer
        }
    }
-    public abstract Map<String, Serializable> extractMetadata(String sourceMimetype, Map<String, String> transformOptions,
+    public abstract Map<String, Serializable> extractMetadata(String sourceMimetype, InputStream inputStream,
-            File sourceFile) throws Exception;
+            String targetMimetype, OutputStream outputStream, Map<String, String> transformOptions,
            TransformManager transformManager) throws Exception;
    private Map<String, Set<String>> getExtractMappingFromOptions(Map<String, String> transformOptions, Map<String,
            Set<String>> defaultExtractMapping)
@@ -561,17 +536,7 @@ public abstract class AbstractMetadataExtractor implements CustomTransformer
        return defaultExtractMapping;
    }
-    /**
+    public void mapMetadataAndWrite(OutputStream outputStream, Map<String, Serializable> metadata,
     * @deprecated use {@link #extractMetadata(String, Map, File, File)} rather than calling this method.
     * By default call the overloaded method with the default {@code extractMapping}.
     */
    @Deprecated
    public void mapMetadataAndWrite(File targetFile, Map<String, Serializable> metadata) throws IOException
    {
        mapMetadataAndWrite(targetFile, metadata, defaultExtractMapping);
    }
    public void mapMetadataAndWrite(File targetFile, Map<String, Serializable> metadata,
                                    Map<String, Set<String>> extractMapping) throws IOException
    {
        if (logger.isDebugEnabled())
@@ -581,7 +546,7 @@ public abstract class AbstractMetadataExtractor implements CustomTransformer
        }
        metadata = mapRawToSystem(metadata, extractMapping);
-        writeMetadata(targetFile, metadata);
+        writeMetadata(outputStream, metadata);
    }
    /**
@@ -633,9 +598,9 @@ public abstract class AbstractMetadataExtractor implements CustomTransformer
        return new TreeMap<String, Serializable>(systemProperties);
    }
-    private void writeMetadata(File targetFile, Map<String, Serializable> results)
+    private void writeMetadata(OutputStream outputStream, Map<String, Serializable> results)
            throws IOException
    {
-        jsonObjectMapper.writeValue(targetFile, results);
+        jsonObjectMapper.writeValue(outputStream, results);
    }
 }
--- a/engines/misc/src/main/java/org/alfresco/transform/misc/metadataExtractors/HtmlMetadataExtractor.java
+++ b/engines/misc/src/main/java/org/alfresco/transform/misc/metadataExtractors/HtmlMetadataExtractor.java
@@ -29,7 +29,6 @@ package org.alfresco.transform.misc.metadataExtractors;
 import org.alfresco.transform.base.CustomTransformer;
 import org.alfresco.transform.base.TransformManager;
 import org.alfresco.transform.base.metadataExtractors.AbstractMetadataExtractor;
 import org.alfresco.transform.common.TransformException;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -87,8 +86,17 @@ public class HtmlMetadataExtractor extends AbstractMetadataExtractor implements
    }
    @Override
-    public Map<String, Serializable> extractMetadata(String sourceMimetype, Map<String, String> transformOptions,
+    public void embedMetadata(String sourceMimetype, InputStream inputStream, String targetMimetype,
-                                                     File sourceFile) throws Exception
+            OutputStream outputStream, Map<String, String> transformOptions, TransformManager transformManager)
            throws Exception
    {
        // Only used for extract, so may be empty.
    }
    @Override
    public Map<String, Serializable> extractMetadata(String sourceMimetype, InputStream inputStream,
            String targetMimetype, OutputStream outputStream, Map<String, String> transformOptions,
            TransformManager transformManager) throws Exception
    {
        final Map<String, Serializable> rawProperties = new HashMap<>();
@@ -175,10 +183,10 @@ public class HtmlMetadataExtractor extends AbstractMetadataExtractor implements
            rawProperties.clear();
            Reader r = null;
-            try (InputStream cis = new FileInputStream(sourceFile))
+            try
            {
                // TODO: for now, use default charset; we should attempt to map from html meta-data
-                r = new InputStreamReader(cis, charsetGuess);
+                r = new InputStreamReader(inputStream, charsetGuess);
                HTMLEditorKit.Parser parser = new ParserDelegator();
                parser.parse(r, callback, tries > 0);
                break;
--- a/engines/misc/src/main/java/org/alfresco/transform/misc/metadataExtractors/RFC822MetadataExtractor.java
+++ b/engines/misc/src/main/java/org/alfresco/transform/misc/metadataExtractors/RFC822MetadataExtractor.java
@@ -29,7 +29,6 @@ package org.alfresco.transform.misc.metadataExtractors;
 import org.alfresco.transform.base.CustomTransformer;
 import org.alfresco.transform.base.TransformManager;
 import org.alfresco.transform.base.metadataExtractors.AbstractMetadataExtractor;
 import org.alfresco.transform.common.TransformException;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -89,102 +88,108 @@ public class RFC822MetadataExtractor extends AbstractMetadataExtractor implement
    }
    @Override
-    public Map<String, Serializable> extractMetadata(String sourceMimetype, Map<String, String> transformOptions,
+    public void embedMetadata(String sourceMimetype, InputStream inputStream, String targetMimetype,
-                                                     File sourceFile) throws Exception
+            OutputStream outputStream, Map<String, String> transformOptions, TransformManager transformManager)
            throws Exception
    {
        // Only used for extract, so may be empty.
    }
    @Override
    public Map<String, Serializable> extractMetadata(String sourceMimetype, InputStream inputStream,
            String targetMimetype, OutputStream outputStream, Map<String, String> transformOptions,
            TransformManager transformManager) throws Exception
    {
        final Map<String, Serializable> rawProperties = new HashMap<>();
-        try (InputStream is = new FileInputStream(sourceFile))
+        MimeMessage mimeMessage = new MimeMessage(null, inputStream);
        if (mimeMessage != null)
        {
-            MimeMessage mimeMessage = new MimeMessage(null, is);
+            /**
             * Extract RFC822 values that doesn't match to headers and need to be encoded.
             * Or those special fields that require some code to extract data
             */
            String tmp = InternetAddress.toString(mimeMessage.getFrom());
            tmp = tmp != null ? MimeUtility.decodeText(tmp) : null;
            putRawValue(KEY_MESSAGE_FROM, tmp, rawProperties);
-            if (mimeMessage != null)
+            tmp = InternetAddress.toString(mimeMessage.getRecipients(RecipientType.TO));
            tmp = tmp != null ? MimeUtility.decodeText(tmp) : null;
            putRawValue(KEY_MESSAGE_TO, tmp, rawProperties);
            tmp = InternetAddress.toString(mimeMessage.getRecipients(RecipientType.CC));
            tmp = tmp != null ? MimeUtility.decodeText(tmp) : null;
            putRawValue(KEY_MESSAGE_CC, tmp, rawProperties);
            putRawValue(KEY_MESSAGE_SENT, mimeMessage.getSentDate(), rawProperties);
            /**
             * Received field from RFC 822
             *
             * "Received"    ":"        ; one per relay
             *   ["from" domain]        ; sending host
             *   ["by"   domain]        ; receiving host
             *   ["via"  atom]          ; physical path
             *  ("with" atom)           ; link/mail protocol
             *   ["id"   msg-id]        ; receiver msg id
             *   ["for"  addr-spec]     ; initial form
             * ";"    date-time         ; time received
             */
            Date rxDate = mimeMessage.getReceivedDate();
            if(rxDate != null)
            {
-                /**
+                // The email implementation extracted the received date for us.
-                 * Extract RFC822 values that doesn't match to headers and need to be encoded.
+                putRawValue(KEY_MESSAGE_RECEIVED, rxDate, rawProperties);
-                 * Or those special fields that require some code to extract data
+            }
-                 */
+            else
-                String tmp = InternetAddress.toString(mimeMessage.getFrom());
+            {
-                tmp = tmp != null ? MimeUtility.decodeText(tmp) : null;
+                // the email implementation did not parse the received date for us.
-                putRawValue(KEY_MESSAGE_FROM, tmp, rawProperties);
+                String[] rx = mimeMessage.getHeader("received");
-
+                if(rx != null && rx.length > 0)
                tmp = InternetAddress.toString(mimeMessage.getRecipients(RecipientType.TO));
                tmp = tmp != null ? MimeUtility.decodeText(tmp) : null;
                putRawValue(KEY_MESSAGE_TO, tmp, rawProperties);
                tmp = InternetAddress.toString(mimeMessage.getRecipients(RecipientType.CC));
                tmp = tmp != null ? MimeUtility.decodeText(tmp) : null;
                putRawValue(KEY_MESSAGE_CC, tmp, rawProperties);
                putRawValue(KEY_MESSAGE_SENT, mimeMessage.getSentDate(), rawProperties);
                /**
                 * Received field from RFC 822
                 *
                 * "Received"    ":"        ; one per relay
                 *   ["from" domain]        ; sending host
                 *   ["by"   domain]        ; receiving host
                 *   ["via"  atom]          ; physical path
                 *  ("with" atom)           ; link/mail protocol
                 *   ["id"   msg-id]        ; receiver msg id
                 *   ["for"  addr-spec]     ; initial form
                 * ";"    date-time         ; time received
                 */
                Date rxDate = mimeMessage.getReceivedDate();
                if(rxDate != null)
                {
-                    // The email implementation extracted the received date for us.
+                    String lastReceived = rx[0];
-                    putRawValue(KEY_MESSAGE_RECEIVED, rxDate, rawProperties);
+                    lastReceived = MimeUtility.unfold(lastReceived);
-                }
+                    int x = lastReceived.lastIndexOf(';');
-                else
+                    if(x > 0)
                {
                    // the email implementation did not parse the received date for us.
                    String[] rx = mimeMessage.getHeader("received");
                    if(rx != null && rx.length > 0)
                    {
-                        String lastReceived = rx[0];
+                        String dateStr = lastReceived.substring(x + 1).trim();
-                        lastReceived = MimeUtility.unfold(lastReceived);
+                        putRawValue(KEY_MESSAGE_RECEIVED, dateStr, rawProperties);
                        int x = lastReceived.lastIndexOf(';');
                        if(x > 0)
                        {
                            String dateStr = lastReceived.substring(x + 1).trim();
                            putRawValue(KEY_MESSAGE_RECEIVED, dateStr, rawProperties);
                        }
                    }
                }
            }
-                String[] subj = mimeMessage.getHeader("Subject");
+            String[] subj = mimeMessage.getHeader("Subject");
-                if (subj != null && subj.length > 0)
+            if (subj != null && subj.length > 0)
            {
                String decodedSubject = subj[0];
                try
                {
-                    String decodedSubject = subj[0];
+                    decodedSubject = MimeUtility.decodeText(decodedSubject);
                    try
                    {
                        decodedSubject = MimeUtility.decodeText(decodedSubject);
                    }
                    catch (UnsupportedEncodingException e)
                    {
                        logger.warn(e.toString());
                    }
                    putRawValue(KEY_MESSAGE_SUBJECT, decodedSubject, rawProperties);
                }
-
+                catch (UnsupportedEncodingException e)
                /*
                 * Extract values from all header fields, including extension fields "X-"
                 */
                Set<String> keys = getExtractMapping().keySet();
                @SuppressWarnings("unchecked")
                Enumeration<Header> headers = mimeMessage.getAllHeaders();
                while (headers.hasMoreElements())
                {
-                    Header header = (Header) headers.nextElement();
+                    logger.warn(e.toString());
-                    if (keys.contains(header.getName()))
+                }
-                    {
+                putRawValue(KEY_MESSAGE_SUBJECT, decodedSubject, rawProperties);
-                        tmp = header.getValue();
+            }
                        tmp = tmp != null ? MimeUtility.decodeText(tmp) : null;
-                        putRawValue(header.getName(), tmp, rawProperties);
+            /*
-                    }
+             * Extract values from all header fields, including extension fields "X-"
             */
            Set<String> keys = getExtractMapping().keySet();
            @SuppressWarnings("unchecked")
            Enumeration<Header> headers = mimeMessage.getAllHeaders();
            while (headers.hasMoreElements())
            {
                Header header = (Header) headers.nextElement();
                if (keys.contains(header.getName()))
                {
                    tmp = header.getValue();
                    tmp = tmp != null ? MimeUtility.decodeText(tmp) : null;
                    putRawValue(header.getName(), tmp, rawProperties);
                }
            }
        }
--- a/engines/tika/src/main/java/org/alfresco/transform/tika/metadataExtractors/AbstractTikaMetadataExtractor.java
+++ b/engines/tika/src/main/java/org/alfresco/transform/tika/metadataExtractors/AbstractTikaMetadataExtractor.java
@@ -51,7 +51,6 @@ import org.xml.sax.Attributes;
 import org.xml.sax.ContentHandler;
 import org.xml.sax.Locator;
 import java.io.File;
 import java.io.FileInputStream;
 import java.io.InputStream;
 import java.io.OutputStream;
@@ -215,95 +214,93 @@ public abstract class AbstractTikaMetadataExtractor extends AbstractMetadataExtr
    }
    @Override
-    public Map<String, Serializable> extractMetadata(String sourceMimetype, Map<String, String> transformOptions,
+    public Map<String, Serializable> extractMetadata(String sourceMimetype, InputStream inputStream,
-                                                     File sourceFile) throws Exception
+            String targetMimetype, OutputStream outputStream, Map<String, String> transformOptions,
            TransformManager transformManager) throws Exception
    {
        Map<String, Serializable> rawProperties = new HashMap<>();
-        try (InputStream is = new FileInputStream(sourceFile))
+        Parser parser = getParser();
        Metadata metadata = new Metadata();
        metadata.add(Metadata.CONTENT_TYPE, sourceMimetype);
        ParseContext context = buildParseContext(metadata, sourceMimetype);
        ContentHandler handler;
        Map<String,String> headers = null;
        if (needHeaderContents())
        {
-            Parser parser = getParser();
+            MapCaptureContentHandler headerCapture =
-
+                    new MapCaptureContentHandler();
-            Metadata metadata = new Metadata();
+            headers = headerCapture.tags;
-            metadata.add(Metadata.CONTENT_TYPE, sourceMimetype);
+            handler = new HeadContentHandler(headerCapture);
            ParseContext context = buildParseContext(metadata, sourceMimetype);
            ContentHandler handler;
            Map<String,String> headers = null;
            if (needHeaderContents())
            {
                MapCaptureContentHandler headerCapture =
                        new MapCaptureContentHandler();
                headers = headerCapture.tags;
                handler = new HeadContentHandler(headerCapture);
            }
            else
            {
                handler = new NullContentHandler();
            }
            parser.parse(is, handler, metadata, context);
            // First up, copy all the Tika metadata over
            // This allows people to map any of the Tika
            //  keys onto their own content model
            for (String tikaKey : metadata.names())
            {
                // TODO review this change (part of MNT-15267) - should we really force string concatenation here !?
                putRawValue(tikaKey, getMetadataValue(metadata, Property.internalText(tikaKey)), rawProperties);
            }
            // Now, map the common Tika metadata keys onto
            //  the common Alfresco metadata keys. This allows
            //  existing mapping properties files to continue
            //  to work without needing any changes
            // The simple ones
            putRawValue(KEY_AUTHOR, getMetadataValue(metadata, TikaCoreProperties.CREATOR), rawProperties);
            putRawValue(KEY_TITLE, getMetadataValue(metadata, TikaCoreProperties.TITLE), rawProperties);
            putRawValue(KEY_COMMENTS, getMetadataValue(metadata, TikaCoreProperties.COMMENTS), rawProperties);
            // Tags
            putRawValue(KEY_TAGS, getMetadataValues(metadata, KEY_TAGS), rawProperties);
            // Get the subject and description, despite things not
            //  being nearly as consistent as one might hope
            String subject = getMetadataValue(metadata, TikaCoreProperties.SUBJECT);
            String description = getMetadataValue(metadata, TikaCoreProperties.DESCRIPTION);
            if (subject != null && description != null)
            {
                putRawValue(KEY_DESCRIPTION, description, rawProperties);
                putRawValue(KEY_SUBJECT, subject, rawProperties);
            }
            else if (subject != null)
            {
                putRawValue(KEY_DESCRIPTION, subject, rawProperties);
                putRawValue(KEY_SUBJECT, subject, rawProperties);
            }
            else if (description != null)
            {
                putRawValue(KEY_DESCRIPTION, description, rawProperties);
                putRawValue(KEY_SUBJECT, description, rawProperties);
            }
            // Try for the dates two different ways too
            if (metadata.get(TikaCoreProperties.CREATED) != null)
            {
                putRawValue(KEY_CREATED, metadata.get(TikaCoreProperties.CREATED), rawProperties);
            }
            else if (metadata.get(TikaCoreProperties.MODIFIED) != null)
            {
                putRawValue(KEY_CREATED, metadata.get(TikaCoreProperties.MODIFIED), rawProperties);
            }
            // If people created a specific instance
            //  (eg OfficeMetadataExtractor), then allow that
            //  instance to map the Tika keys onto its
            //  existing namespace so that older properties
            //  files continue to map correctly
            rawProperties = extractSpecific(metadata, rawProperties, headers);
        }
        else
        {
            handler = new NullContentHandler();
        }
        parser.parse(inputStream, handler, metadata, context);
        // First up, copy all the Tika metadata over
        // This allows people to map any of the Tika
        //  keys onto their own content model
        for (String tikaKey : metadata.names())
        {
            // TODO review this change (part of MNT-15267) - should we really force string concatenation here !?
            putRawValue(tikaKey, getMetadataValue(metadata, Property.internalText(tikaKey)), rawProperties);
        }
        // Now, map the common Tika metadata keys onto
        //  the common Alfresco metadata keys. This allows
        //  existing mapping properties files to continue
        //  to work without needing any changes
        // The simple ones
        putRawValue(KEY_AUTHOR, getMetadataValue(metadata, TikaCoreProperties.CREATOR), rawProperties);
        putRawValue(KEY_TITLE, getMetadataValue(metadata, TikaCoreProperties.TITLE), rawProperties);
        putRawValue(KEY_COMMENTS, getMetadataValue(metadata, TikaCoreProperties.COMMENTS), rawProperties);
        // Tags
        putRawValue(KEY_TAGS, getMetadataValues(metadata, KEY_TAGS), rawProperties);
        // Get the subject and description, despite things not
        //  being nearly as consistent as one might hope
        String subject = getMetadataValue(metadata, TikaCoreProperties.SUBJECT);
        String description = getMetadataValue(metadata, TikaCoreProperties.DESCRIPTION);
        if (subject != null && description != null)
        {
            putRawValue(KEY_DESCRIPTION, description, rawProperties);
            putRawValue(KEY_SUBJECT, subject, rawProperties);
        }
        else if (subject != null)
        {
            putRawValue(KEY_DESCRIPTION, subject, rawProperties);
            putRawValue(KEY_SUBJECT, subject, rawProperties);
        }
        else if (description != null)
        {
            putRawValue(KEY_DESCRIPTION, description, rawProperties);
            putRawValue(KEY_SUBJECT, description, rawProperties);
        }
        // Try for the dates two different ways too
        if (metadata.get(TikaCoreProperties.CREATED) != null)
        {
            putRawValue(KEY_CREATED, metadata.get(TikaCoreProperties.CREATED), rawProperties);
        }
        else if (metadata.get(TikaCoreProperties.MODIFIED) != null)
        {
            putRawValue(KEY_CREATED, metadata.get(TikaCoreProperties.MODIFIED), rawProperties);
        }
        // If people created a specific instance
        //  (eg OfficeMetadataExtractor), then allow that
        //  instance to map the Tika keys onto its
        //  existing namespace so that older properties
        //  files continue to map correctly
        rawProperties = extractSpecific(metadata, rawProperties, headers);
        return rawProperties;
    }
--- a/model/iptc/README.md
+++ b/model/iptc/README.md
--- a/model/iptc/iptc-model-context.xml
+++ b/model/iptc/iptc-model-context.xml
--- a/models/iptc/iptc-model.properties
+++ b/models/iptc/iptc-model.properties
--- a/models/iptc/iptc-model_cs.properties
+++ b/models/iptc/iptc-model_cs.properties
--- a/models/iptc/iptc-model_da.properties
+++ b/models/iptc/iptc-model_da.properties
--- a/models/iptc/iptc-model_de.properties
+++ b/models/iptc/iptc-model_de.properties
--- a/models/iptc/iptc-model_es.properties
+++ b/models/iptc/iptc-model_es.properties
--- a/models/iptc/iptc-model_fi.properties
+++ b/models/iptc/iptc-model_fi.properties
--- a/models/iptc/iptc-model_fr.properties
+++ b/models/iptc/iptc-model_fr.properties
--- a/models/iptc/iptc-model_it.properties
+++ b/models/iptc/iptc-model_it.properties
--- a/models/iptc/iptc-model_ja.properties
+++ b/models/iptc/iptc-model_ja.properties
--- a/models/iptc/iptc-model_nb.properties
+++ b/models/iptc/iptc-model_nb.properties
--- a/models/iptc/iptc-model_nl.properties
+++ b/models/iptc/iptc-model_nl.properties
--- a/models/iptc/iptc-model_pl.properties
+++ b/models/iptc/iptc-model_pl.properties
--- a/models/iptc/iptc-model_pt_BR.properties
+++ b/models/iptc/iptc-model_pt_BR.properties
--- a/models/iptc/iptc-model_ru.properties
+++ b/models/iptc/iptc-model_ru.properties
--- a/models/iptc/iptc-model_sv.properties
+++ b/models/iptc/iptc-model_sv.properties
--- a/models/iptc/iptc-model_zh_CN.properties
+++ b/models/iptc/iptc-model_zh_CN.properties
--- a/models/iptc/iptcModel.xml
+++ b/models/iptc/iptcModel.xml