REPO-5219 Allow AGS AMP to specify metadata extract mapping (REPO) (#169)

Added an optional extractMapping transform option to all metadata extractors to override the default one in the T-Engine. In the case of the AGS AMP it extends the RFC822MetadataExtracter with its own class to specify a different set of document to system mappings. The class in the repo no longer does extractions, but is now used by the AsynchronousExtractor, which offloads extractions to T-Engines to obtain the mappings if it has been extended that are then passed to the T-Engine.
2025-07-24 17:32:48 +00:00 · 2020-11-19 17:04:52 +00:00
parent fc0fa4e4b7
commit 33f37731c7
19 changed files with 373 additions and 393 deletions
--- a/repository/src/main/java/org/alfresco/repo/content/metadata/AsynchronousExtractor.java
+++ b/repository/src/main/java/org/alfresco/repo/content/metadata/AsynchronousExtractor.java
@@ -89,6 +89,7 @@ public class AsynchronousExtractor extends AbstractMappingMetadataExtracter
    private static final String EMBED = "embed";
    private static final String MIMETYPE_METADATA_EXTRACT = "alfresco-metadata-extract";
    private static final String MIMETYPE_METADATA_EMBED = "alfresco-metadata-embed";
+    private static final String EXTRACT_MAPPING = "extractMapping";
    private static final String METADATA = "metadata";
    private static final Map<String, Serializable> EMPTY_METADATA = Collections.emptyMap();

@@ -102,6 +103,7 @@ public class AsynchronousExtractor extends AbstractMappingMetadataExtracter
    private TransactionService transactionService;
    private TransformServiceRegistry transformServiceRegistry;
    private TaggingService taggingService;
+    private List<MetadataExtractorPropertyMappingOverride> metadataExtractorPropertyMappingOverrides = Collections.emptyList();

    public void setNodeService(NodeService nodeService)
    {
@@ -143,6 +145,11 @@ public class AsynchronousExtractor extends AbstractMappingMetadataExtracter
        this.taggingService = taggingService;
    }

+    public void setMetadataExtractorPropertyMappingOverrides(List<MetadataExtractorPropertyMappingOverride> metadataExtractorPropertyMappingOverrides)
+    {
+        this.metadataExtractorPropertyMappingOverrides = metadataExtractorPropertyMappingOverrides;
+    }
+
    @Override
    protected Map<String, Set<QName>> getDefaultMapping()
    {
@@ -223,7 +230,7 @@ public class AsynchronousExtractor extends AbstractMappingMetadataExtracter
    }

    @Override
-    // Not called. Overloaded method with the NodeRef is called.
+    // Not called. extractRawInThread is called.
    protected Map<String, Serializable> extractRaw(ContentReader reader)
    {
        return null;
@@ -233,12 +240,48 @@ public class AsynchronousExtractor extends AbstractMappingMetadataExtracter
    protected Map<String, Serializable> extractRawInThread(NodeRef nodeRef, ContentReader reader, MetadataExtracterLimits limits)
            throws Throwable
    {
-        long timeoutMs = limits.getTimeoutMs();
-        Map<String, String> options = Collections.singletonMap(TIMEOUT, Long.toString(timeoutMs));
+        Map<String, String> options = getExtractOptions(nodeRef, reader, limits);
        transformInBackground(nodeRef, reader, MIMETYPE_METADATA_EXTRACT, EXTRACT, options);
        return EMPTY_METADATA;
    }

+    private Map<String, String> getExtractOptions(NodeRef nodeRef, ContentReader reader, MetadataExtracterLimits limits)
+    {
+        long timeoutMs = limits.getTimeoutMs();
+
+        // This is to allow the AGS (RM) AMP to specify the mapping of properties from the repository
+        // rather than doing it out of process in the T-Engine.
+        String sourceMimetype = reader.getMimetype();
+        for (MetadataExtractorPropertyMappingOverride override : metadataExtractorPropertyMappingOverrides)
+        {
+            if (override.match(sourceMimetype))
+            {
+                Map<String, Set<String>> extractMapping = override.getExtractMapping(nodeRef);
+                String extractMappingAsString = extractMappingToString(extractMapping);
+
+                Map<String, String> options = new HashMap<>(2);
+                options.put(TIMEOUT, Long.toString(timeoutMs));
+                options.put(EXTRACT_MAPPING, extractMappingAsString);
+                return options;
+            }
+        }
+
+        return Collections.singletonMap(TIMEOUT, Long.toString(timeoutMs));
+    }
+
+    private String extractMappingToString(Map<String, Set<String>> map)
+    {
+        try
+        {
+            return jsonObjectMapper.writeValueAsString(map);
+        }
+        catch (JsonProcessingException e)
+        {
+            logger.error("Failed to save extractMapping as Json", e);
+            return null;
+        }
+    }
+
    @Override
    protected void embedInternal(NodeRef nodeRef, Map<String, Serializable> metadata, ContentReader reader, ContentWriter writer)
    {
--- a/repository/src/main/java/org/alfresco/repo/content/metadata/MetadataExtractorPropertyMappingOverride.java
+++ b/repository/src/main/java/org/alfresco/repo/content/metadata/MetadataExtractorPropertyMappingOverride.java
@@ -0,0 +1,59 @@
+/*
+ * #%L
+ * Alfresco Repository
+ * %%
+ * Copyright (C) 2005 - 2020 Alfresco Software Limited
+ * %%
+ * This file is part of the Alfresco software.
+ * If the software was purchased under a paid Alfresco license, the terms of
+ * the paid license agreement will prevail.  Otherwise, the software is
+ * provided under the following open source license terms:
+ *
+ * Alfresco is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Alfresco is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
+ * #L%
+ */
+package org.alfresco.repo.content.metadata;
+
+import org.alfresco.service.cmr.repository.NodeRef;
+import org.alfresco.service.namespace.QName;
+
+import java.util.Map;
+import java.util.Set;
+
+/**
+ * @deprecated as code running inside the content repository process that overrides metadata extract properties should
+ * be moved out of process to reduce coupling of components, making upgrade simpler.
+ *
+ * @author adavis
+ */
+@Deprecated
+public interface MetadataExtractorPropertyMappingOverride
+{
+    /**
+     * Indicates if the {@link #getExtractMapping(NodeRef)} will provide extract properties
+     * to override those in the T-Engine.
+     *
+     * @param sourceMimetype of the node.
+     * @return {@code true} if there will be override extract properties.
+     */
+    boolean match(String sourceMimetype);
+
+    /**
+     * Returns the extract mapping to be passed to the T-Engine.
+     *
+     * @param nodeRef of the node having its metadata extracted.
+     * @return the mapping of document properties to system properties
+     */
+    Map<String, Set<String>> getExtractMapping(NodeRef nodeRef);
+}
--- a/repository/src/main/java/org/alfresco/repo/content/metadata/RFC822MetadataExtracter.java
+++ b/repository/src/main/java/org/alfresco/repo/content/metadata/RFC822MetadataExtracter.java
@@ -25,33 +25,33 @@
 */
 package org.alfresco.repo.content.metadata;

-import java.io.IOException;
-import java.io.InputStream;
+import org.alfresco.repo.content.MimetypeMap;
+import org.alfresco.service.cmr.repository.ContentReader;
+import org.alfresco.service.cmr.repository.NodeRef;
+import org.alfresco.service.cmr.repository.NodeService;
+import org.alfresco.service.namespace.QName;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+
+import javax.mail.Header;
 import java.io.Serializable;
-import java.io.UnsupportedEncodingException;
 import java.util.Arrays;
-import java.util.Date;
-import java.util.Enumeration;
+import java.util.Collections;
+import java.util.HashMap;
 import java.util.HashSet;
 import java.util.Map;
 import java.util.Set;

-import javax.mail.Header;
-import javax.mail.internet.InternetAddress;
-import javax.mail.internet.MimeMessage;
-import javax.mail.internet.MimeUtility;
-import javax.mail.internet.MimeMessage.RecipientType;
-
-import org.alfresco.repo.content.MimetypeMap;
-import org.alfresco.service.cmr.repository.ContentReader;
-import org.alfresco.service.namespace.QName;
-
 /**
- * @deprecated OOTB extractors are being moved to T-Engines.
+ * @deprecated OOTB extractors have being moved to T-Engines.
 *
- * Metadata extractor for RFC822 mime emails.
+ * This class originally provided metadata extraction of RFC822 mimetype emails. It will no longer be used for that
+ * purpose as that work has been off loaded to a T-Engine via the AsynchronousExtractor. It still exists because the
+ * governance services (RM) AMP overrides it to provide alternate property mappings and to filter out some of
+ * these properties if the node does not have the "record" or "dod5015record" aspects.<p>
 *
- * Default configuration:   (see RFC822MetadataExtractor.properties)
+ * We still also have the Default configuration file (RFC822MetadataExtracter.properties) file which contains the
+ * default set of properties, which may be manipulated by RM.
 *
 * <pre>
 *   <b>messageFrom:</b>              --      imap:messageFrom, cm:originator
@@ -65,143 +65,49 @@ import org.alfresco.service.namespace.QName;
 *      <b>Message-ID:</b>            --      imap:messageId
 * </pre>
 *
- * @author Derek Hulley
- * @since 3.2
+ * This class now provides an alternative property mapping in the request to the T-Engine. Unlike the previous
+ * implementation the filtering of properties takes place before rather than after the extraction. This is done in
+ * this class making the code within the org.alfresco.module.org_alfresco_module_rm.email.RFC822MetadataExtracter
+ * filterSystemProperties method redundant.
+ *
+ * @author adavis
 */
@Deprecated
 public class RFC822MetadataExtracter extends AbstractMappingMetadataExtracter
+        implements MetadataExtractorPropertyMappingOverride
 {
+    static String RM_URI = "http://www.alfresco.org/model/recordsmanagement/1.0";
+    static String DOD_URI = "http://www.alfresco.org/model/dod5015/1.0";

-    protected static final String KEY_MESSAGE_FROM = "messageFrom";
-    protected static final String KEY_MESSAGE_TO = "messageTo";
-    protected static final String KEY_MESSAGE_CC = "messageCc";
-    protected static final String KEY_MESSAGE_SUBJECT = "messageSubject";
-    protected static final String KEY_MESSAGE_SENT = "messageSent";
-    protected static final String KEY_MESSAGE_RECEIVED = "messageReceived";
+    static final String RECORD = "record";
+    static final String DOD_5015_RECORD = "dod5015record";

-    public static String[] SUPPORTED_MIMETYPES = new String[] { MimetypeMap.MIMETYPE_RFC822 };
+    static final QName ASPECT_RECORD = QName.createQName(RM_URI, RECORD);
+    static final QName ASPECT_DOD_5015_RECORD = QName.createQName(DOD_URI, DOD_5015_RECORD);
+
+    private static Log logger = LogFactory.getLog(RFC822MetadataExtracter.class);
+
+    private static final HashSet<String> SUPPORTED_MIMETYPES =
+            new HashSet<>(Arrays.asList(new String[] { MimetypeMap.MIMETYPE_RFC822 }));

    public RFC822MetadataExtracter()
    {
-        super(new HashSet<String>(Arrays.asList(SUPPORTED_MIMETYPES)));
+        super(SUPPORTED_MIMETYPES);
+    }
+
+    private NodeService nodeService;
+
+    public void setNodeService(NodeService nodeService)
+    {
+        this.nodeService = nodeService;
    }

    @Override
    protected Map<String, Serializable> extractRaw(ContentReader reader) throws Throwable
    {
-        Map<String, Serializable> rawProperties = newRawMap();
-
-        InputStream is = null;
-        try
-        {
-            is = reader.getContentInputStream();
-            MimeMessage mimeMessage = new MimeMessage(null, is);
-
-            if (mimeMessage != null)
-            {
-                /**
-                 * Extract RFC822 values that doesn't match to headers and need to be encoded.
-                 * Or those special fields that require some code to extract data
-                 */
-                String tmp = InternetAddress.toString(mimeMessage.getFrom());
-                tmp = tmp != null ? MimeUtility.decodeText(tmp) : null;
-                putRawValue(KEY_MESSAGE_FROM, tmp, rawProperties);
-
-                tmp = InternetAddress.toString(mimeMessage.getRecipients(RecipientType.TO));
-                tmp = tmp != null ? MimeUtility.decodeText(tmp) : null;
-                putRawValue(KEY_MESSAGE_TO, tmp, rawProperties);
-
-                tmp = InternetAddress.toString(mimeMessage.getRecipients(RecipientType.CC));
-                tmp = tmp != null ? MimeUtility.decodeText(tmp) : null;
-                putRawValue(KEY_MESSAGE_CC, tmp, rawProperties);
-
-                putRawValue(KEY_MESSAGE_SENT, mimeMessage.getSentDate(), rawProperties);
-
-                /**
-                 * Received field from RFC 822
-                 *
-                 * "Received"    ":"        ; one per relay
-                 *   ["from" domain]        ; sending host
-                 *   ["by"   domain]        ; receiving host
-                 *   ["via"  atom]          ; physical path
-                 *  ("with" atom)           ; link/mail protocol
-                 *   ["id"   msg-id]        ; receiver msg id
-                 *   ["for"  addr-spec]     ; initial form
-                 * ";"    date-time         ; time received
-                 */
-                Date rxDate = mimeMessage.getReceivedDate();
-
-                if(rxDate != null)
-                {
-                    // The email implementation extracted the received date for us.
-                    putRawValue(KEY_MESSAGE_RECEIVED, rxDate, rawProperties);
-                }
-                else
-                {
-                    // the email implementation did not parse the received date for us.
-                    String[] rx = mimeMessage.getHeader("received");
-                    if(rx != null && rx.length > 0)
-                    {
-                        String lastReceived = rx[0];
-                        lastReceived = MimeUtility.unfold(lastReceived);
-                        int x = lastReceived.lastIndexOf(';');
-                        if(x > 0)
-                        {
-                            String dateStr = lastReceived.substring(x + 1).trim();
-                            putRawValue(KEY_MESSAGE_RECEIVED, dateStr, rawProperties);
-                        }
-                    }
-                }
-
-                String[] subj = mimeMessage.getHeader("Subject");
-                if (subj != null && subj.length > 0)
-                {
-                    String decodedSubject = subj[0];
-                    try
-                    {
-                        decodedSubject = MimeUtility.decodeText(decodedSubject);
-                    }
-                    catch (UnsupportedEncodingException e)
-                    {
-                        logger.warn(e.toString());
-                    }
-                    putRawValue(KEY_MESSAGE_SUBJECT, decodedSubject, rawProperties);
-                }
-
-                /*
-                 * Extract values from all header fields, including extension fields "X-"
-                 */
-                Set<String> keys = getMapping().keySet();
-                @SuppressWarnings("unchecked")
-                Enumeration<Header> headers = mimeMessage.getAllHeaders();
-                while (headers.hasMoreElements())
-                {
-                    Header header = (Header) headers.nextElement();
-                    if (keys.contains(header.getName()))
-                    {
-                        tmp = header.getValue();
-                        tmp = tmp != null ? MimeUtility.decodeText(tmp) : null;
-
-                        putRawValue(header.getName(), tmp, rawProperties);
-                    }
-                }
-            }
-        }
-        finally
-        {
-            if (is != null)
-            {
-                try
-                {
-                    is.close();
-                }
-                catch (IOException e)
-                {
-                }
-            }
-        }
-        // Done
-        return rawProperties;
+        logger.error("RFC822MetadataExtracter.extractRaw should not have been called, " +
+                "as the extraction should have taken place in a T-Engine.");
+        return Collections.emptyMap(); // will result in no updates.
    }

    /**
@@ -212,4 +118,46 @@ public class RFC822MetadataExtracter extends AbstractMappingMetadataExtracter
    {
        return super.getMapping();
    }
+
+    @Override
+    public boolean match(String sourceMimetype)
+    {
+        // When RM overrides the "extracter.RFC822" bean with its own class 'this' will be a sub class.
+        return SUPPORTED_MIMETYPES.contains(sourceMimetype) && this.getClass() != RFC822MetadataExtracter.class;
+    }
+
+    @Override
+    // Only include system properties depending on RM / DOD aspects on this nodeRef
+    public Map<String, Set<String>> getExtractMapping(NodeRef nodeRef)
+    {
+        Map<String, Set<QName>> customMapping = getMapping();
+        HashMap<String, Set<String>> mapping = new HashMap<>(customMapping.size());
+
+        boolean isARecord = nodeService.hasAspect(nodeRef, ASPECT_RECORD);
+        boolean isADodRecord = nodeService.hasAspect(nodeRef, ASPECT_DOD_5015_RECORD);
+
+        for (Map.Entry<String, Set<QName>> entry : customMapping.entrySet())
+        {
+            Set<QName> customSystemProperties = entry.getValue();
+            HashSet<String> systemProperties = new HashSet<>(customSystemProperties.size());
+            String documentProperty = entry.getKey();
+
+            for (QName customSystemProperty : customSystemProperties)
+            {
+                String uri = customSystemProperty.getNamespaceURI();
+                boolean rmProperty = RM_URI.equals(uri);
+                boolean dodProperty = DOD_URI.equals(uri);
+                if ((rmProperty && isARecord) || (dodProperty && isADodRecord) || (!rmProperty && !dodProperty))
+                {
+                    systemProperties.add(customSystemProperty.toString());
+                }
+            }
+            if (!systemProperties.isEmpty())
+            {
+                mapping.put(documentProperty, systemProperties);
+            }
+        }
+
+        return mapping;
+    }
 }
--- a/repository/src/main/resources/alfresco/content-services-context.xml
+++ b/repository/src/main/resources/alfresco/content-services-context.xml
@@ -299,8 +299,25 @@
      <property name="transactionService" ref="transactionService" />
      <property name="transformServiceRegistry" ref="transformServiceRegistry" />
      <property name="taggingService" ref="taggingService" />
+      <property name="metadataExtractorPropertyMappingOverrides">
+         <list>
+             <ref bean="extracter.RFC822" /> <!-- The RM AMP overrides this bean, extending the base class -->
+         </list>
+      </property>
   </bean>

+    <!-- No longer used as an extractor but still extended by RM to provide additional mappings -->
+    <bean id="extracter.RFC822"        class="org.alfresco.repo.content.metadata.RFC822MetadataExtracter"        parent="baseMetadataExtracter" >
+        <property name="nodeService" ref="nodeService"/>
+        <property name="supportedDateFormats">
+            <list>
+                <value>EEE, d MMM yyyy HH:mm:ss Z</value>
+                <value>EEE, d MMM yy HH:mm:ss Z</value>
+                <value>d MMM yyyy HH:mm:ss Z</value>
+            </list>
+        </property>
+    </bean>
+
   <!-- Content Transformation Regisitry -->
   <bean id="contentTransformerRegistry" class="org.alfresco.repo.content.transform.ContentTransformerRegistry" >
      <constructor-arg>
--- a/repository/src/main/resources/alfresco/metadata/DWGMetadataExtracter.properties
+++ b/repository/src/main/resources/alfresco/metadata/DWGMetadataExtracter.properties
@@ -1,12 +0,0 @@
-#
-# DWGMetadataExtracter - default mapping
-#
-# author: Nick Burch
-
-# Namespaces
-namespace.prefix.cm=http://www.alfresco.org/model/content/1.0
-
-# Mappings
-author=cm:author
-title=cm:title
-description=cm:description
--- a/repository/src/main/resources/alfresco/metadata/HtmlMetadataExtracter.properties
+++ b/repository/src/main/resources/alfresco/metadata/HtmlMetadataExtracter.properties
@@ -1,12 +0,0 @@
-#
-# HtmlMetadataExtracter - default mapping
-#
-# author: Derek Hulley
-
-# Namespaces
-namespace.prefix.cm=http://www.alfresco.org/model/content/1.0
-
-# Mappings
-author=cm:author
-title=cm:title
-description=cm:description
--- a/repository/src/main/resources/alfresco/metadata/JodConverterMetadataExtracter.properties
+++ b/repository/src/main/resources/alfresco/metadata/JodConverterMetadataExtracter.properties
@@ -1,12 +0,0 @@
-#
-# JodConverterMetadataExtracter - default mapping
-#
-# author: Neil McErlean
-
-# Namespaces
-namespace.prefix.cm=http://www.alfresco.org/model/content/1.0
-
-# Mappings
-author=cm:author
-title=cm:title
-description=cm:description
--- a/repository/src/main/resources/alfresco/metadata/MP3MetadataExtracter.properties
+++ b/repository/src/main/resources/alfresco/metadata/MP3MetadataExtracter.properties
@@ -1,30 +0,0 @@
-#
-# MP3MetadataExtracter - default mapping
-#
-# author: Derek Hulley
-
-# Namespaces
-namespace.prefix.cm=http://www.alfresco.org/model/content/1.0
-namespace.prefix.audio=http://www.alfresco.org/model/audio/1.0
-
-# Core mappings
-author=cm:author
-title=cm:title
-description=cm:description
-created=cm:created
-
-# Audio descriptive mappings
-xmpDM\:album=audio:album
-xmpDM\:artist=audio:artist
-xmpDM\:composer=audio:composer
-xmpDM\:engineer=audio:engineer
-xmpDM\:genre=audio:genre
-xmpDM\:trackNumber=audio:trackNumber
-xmpDM\:releaseDate=audio:releaseDate
-#xmpDM:logComment
-
-# Audio specific mappings
-xmpDM\:audioSampleRate=audio:sampleRate
-xmpDM\:audioSampleType=audio:sampleType
-xmpDM\:audioChannelType=audio:channelType
-xmpDM\:audioCompressor=audio:compressor
--- a/repository/src/main/resources/alfresco/metadata/MailMetadataExtracter.properties
+++ b/repository/src/main/resources/alfresco/metadata/MailMetadataExtracter.properties
@@ -1,14 +0,0 @@
-#
-# MailMetadataExtracter - default mapping
-#
-# author: Derek Hulley
-
-# Namespaces
-namespace.prefix.cm=http://www.alfresco.org/model/content/1.0
-
-# Mappings
-sentDate=cm:sentdate
-originator=cm:originator, cm:author
-addressee=cm:addressee
-addressees=cm:addressees
-subjectLine=cm:subjectline, cm:description
--- a/repository/src/main/resources/alfresco/metadata/OfficeMetadataExtracter.properties
+++ b/repository/src/main/resources/alfresco/metadata/OfficeMetadataExtracter.properties
@@ -1,14 +0,0 @@
-#
-# OfficeMetadataExtracter - default mapping
-#
-# author: Derek Hulley
-
-# Namespaces
-namespace.prefix.cm=http://www.alfresco.org/model/content/1.0
-
-# Mappings
-author=cm:author
-title=cm:title
-subject=cm:description
-createDateTime=cm:created
-lastSaveDateTime=cm:modified
--- a/repository/src/main/resources/alfresco/metadata/OpenDocumentMetadataExtracter.properties
+++ b/repository/src/main/resources/alfresco/metadata/OpenDocumentMetadataExtracter.properties
@@ -1,21 +0,0 @@
-#
-# OpenDocumentMetadataExtracter - default mapping
-#
-# author: Derek Hulley
-
-# Namespaces
-namespace.prefix.cm=http://www.alfresco.org/model/content/1.0
-
-# Mappings
-creationDate=cm:created
-creator=cm:author
-date=
-description=
-generator=
-initialCreator=
-keyword=
-language=
-printDate=
-printedBy=
-subject=cm:description
-title=cm:title
--- a/repository/src/main/resources/alfresco/metadata/PdfBoxMetadataExtracter.properties
+++ b/repository/src/main/resources/alfresco/metadata/PdfBoxMetadataExtracter.properties
@@ -1,13 +0,0 @@
-#
-# PdfBoxMetadataExtracter - default mapping
-#
-# author: Derek Hulley
-
-# Namespaces
-namespace.prefix.cm=http://www.alfresco.org/model/content/1.0
-
-# Mappings
-author=cm:author
-title=cm:title
-subject=cm:description
-created=cm:created
--- a/repository/src/main/resources/alfresco/metadata/PoiMetadataExtracter.properties
+++ b/repository/src/main/resources/alfresco/metadata/PoiMetadataExtracter.properties
@@ -1,13 +0,0 @@
-#
-# PoiMetadataExtracter - default mapping
-#
-# author: Neil McErlean
-
-# Namespaces
-namespace.prefix.cm=http://www.alfresco.org/model/content/1.0
-
-# Mappings
-author=cm:author
-title=cm:title
-description=cm:description
-created=cm:created
--- a/repository/src/main/resources/alfresco/metadata/TikaAudioMetadataExtracter.properties
+++ b/repository/src/main/resources/alfresco/metadata/TikaAudioMetadataExtracter.properties
@@ -1,34 +0,0 @@
-#
-# TikaAudioMetadataExtracter - audio mapping
-#
-# This is used to map from the Tika audio metadata onto your
-#  content model. This will be used for any Audio content
-#  for which an explicit extractor isn't defined
-#
-# author: Nick Burch
-
-# Namespaces
-namespace.prefix.cm=http://www.alfresco.org/model/content/1.0
-namespace.prefix.audio=http://www.alfresco.org/model/audio/1.0
-
-# Core mappings
-author=cm:author
-title=cm:title
-description=cm:description
-created=cm:created
-
-# Audio descriptive mappings
-xmpDM\:album=audio:album
-xmpDM\:artist=audio:artist
-xmpDM\:composer=audio:composer
-xmpDM\:engineer=audio:engineer
-xmpDM\:genre=audio:genre
-xmpDM\:trackNumber=audio:trackNumber
-xmpDM\:releaseDate=audio:releaseDate
-#xmpDM:logComment
-
-# Audio specific mappings
-xmpDM\:audioSampleRate=audio:sampleRate
-xmpDM\:audioSampleType=audio:sampleType
-xmpDM\:audioChannelType=audio:channelType
-xmpDM\:audioCompressor=audio:compressor
--- a/repository/src/main/resources/alfresco/metadata/TikaAutoMetadataExtracter.properties
+++ b/repository/src/main/resources/alfresco/metadata/TikaAutoMetadataExtracter.properties
@@ -1,52 +0,0 @@
-#
-# TikaAutoMetadataExtracter - default mapping
-#
-# This is used to map from the Tika and standard namespaces
-#  onto your content model. This will be used for any
-#  content for which an explicit extractor isn't defined,
-#  by using Tika's auto-selection facilities.
-#
-# author: Nick Burch
-
-# Namespaces
-namespace.prefix.cm=http://www.alfresco.org/model/content/1.0
-namespace.prefix.exif=http://www.alfresco.org/model/exif/1.0
-namespace.prefix.audio=http://www.alfresco.org/model/audio/1.0
-
-# Mappings
-author=cm:author
-title=cm:title
-description=cm:description
-created=cm:created
-
-geo\:lat=cm:latitude
-geo\:long=cm:longitude
-
-tiff\:ImageWidth=exif:pixelXDimension
-tiff\:ImageLength=exif:pixelYDimension
-tiff\:Make=exif:manufacturer
-tiff\:Model=exif:model
-tiff\:Software=exif:software
-tiff\:Orientation=exif:orientation
-tiff\:XResolution=exif:xResolution
-tiff\:YResolution=exif:yResolution
-tiff\:ResolutionUnit=exif:resolutionUnit
-exif\:Flash=exif:flash
-exif\:ExposureTime=exif:exposureTime
-exif\:FNumber=exif:fNumber
-exif\:FocalLength=exif:focalLength
-exif\:IsoSpeedRatings=exif:isoSpeedRatings
-exif\:DateTimeOriginal=exif:dateTimeOriginal
-
-xmpDM\:album=audio:album
-xmpDM\:artist=audio:artist
-xmpDM\:composer=audio:composer
-xmpDM\:engineer=audio:engineer
-xmpDM\:genre=audio:genre
-xmpDM\:trackNumber=audio:trackNumber
-xmpDM\:releaseDate=audio:releaseDate
-#xmpDM:logComment
-xmpDM\:audioSampleRate=audio:sampleRate
-xmpDM\:audioSampleType=audio:sampleType
-xmpDM\:audioChannelType=audio:channelType
-xmpDM\:audioCompressor=audio:compressor
--- a/repository/src/main/resources/alfresco/metadata/TikaSpringConfiguredMetadataExtracter.properties
+++ b/repository/src/main/resources/alfresco/metadata/TikaSpringConfiguredMetadataExtracter.properties
@@ -1,20 +0,0 @@
-#
-# TikaSpringConfiguredMetadataExtracter.properties - default mapping
-#
-# This is used to map from the Tika and standard namespaces
-#  onto your content model. This is used for custom tika parsers,
-#  but one file is used across all custom parsers.
-#
-# author: Nick Burch
-
-# Namespaces
-namespace.prefix.cm=http://www.alfresco.org/model/content/1.0
-
-# Mappings
-author=cm:author
-title=cm:title
-description=cm:description
-created=cm:created
-
-geo\:lat=cm:latitude
-geo\:long=cm:longitude
--- a/repository/src/test/java/org/alfresco/MiscContextTestSuite.java
+++ b/repository/src/test/java/org/alfresco/MiscContextTestSuite.java
@@ -72,7 +72,8 @@ import org.springframework.context.ApplicationContext;
    org.alfresco.repo.content.transform.AppleIWorksContentTransformerTest.class,
    org.alfresco.repo.content.transform.ArchiveContentTransformerTest.class,

-    // Metadata tests - replaced with simplified test in LocalRenditionTest and ServiceRenditionTest
+    // Metadata tests - replaced with simplified tests in LocalRenditionTest and ServiceRenditionTest
+    org.alfresco.repo.content.metadata.RFC822MetadataExtracterTest.class,
    org.alfresco.repo.content.metadata.MappingMetadataExtracterTest.class,

    // ----------------------------------------------------------------------
--- a/repository/src/test/java/org/alfresco/repo/content/metadata/RFC822MetadataExtracterTest.java
+++ b/repository/src/test/java/org/alfresco/repo/content/metadata/RFC822MetadataExtracterTest.java
@@ -0,0 +1,159 @@
+/*
+ * #%L
+ * Alfresco Repository
+ * %%
+ * Copyright (C) 2005 - 2020 Alfresco Software Limited
+ * %%
+ * This file is part of the Alfresco software.
+ * If the software was purchased under a paid Alfresco license, the terms of
+ * the paid license agreement will prevail.  Otherwise, the software is
+ * provided under the following open source license terms:
+ *
+ * Alfresco is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Alfresco is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
+ * #L%
+ */
+package org.alfresco.repo.content.metadata;
+
+import org.alfresco.repo.content.MimetypeMap;
+import org.alfresco.service.cmr.repository.NodeRef;
+import org.alfresco.service.cmr.repository.NodeService;
+import org.alfresco.service.namespace.QName;
+import org.mockito.Mock;
+import org.mockito.MockitoAnnotations;
+
+import java.io.Serializable;
+import java.util.Collections;
+import java.util.Map;
+import java.util.Properties;
+import java.util.Set;
+import java.util.StringJoiner;
+import java.util.concurrent.atomic.AtomicInteger;
+
+import static org.alfresco.repo.content.metadata.RFC822MetadataExtracter.ASPECT_DOD_5015_RECORD;
+import static org.alfresco.repo.content.metadata.RFC822MetadataExtracter.ASPECT_RECORD;
+import static org.alfresco.repo.content.metadata.RFC822MetadataExtracter.DOD_URI;
+import static org.alfresco.repo.content.metadata.RFC822MetadataExtracter.RM_URI;
+import static org.alfresco.service.namespace.NamespaceService.CONTENT_MODEL_1_0_URI;
+import static org.mockito.Mockito.when;
+
+/**
+ * Test the ability of RFC822MetadataExtracter when overridden by RM, to control which properties are extracted
+ * from T-Engines. RFC822MetadataExtracter no longer extracts.
+ *
+ * @author adavis
+ */
+//@RunWith(MockitoJUnitRunner.class)
+public class RFC822MetadataExtracterTest extends AbstractMetadataExtracterTest
+{
+    private RFC822MetadataExtracter extracter;
+    private RFC822MetadataExtracter rmExtracter;
+    @Mock private NodeService mockNodeService;
+
+    private NodeRef nodeRefWithDodRecord = new NodeRef("workspace://spacesStore/test-dod");
+    private NodeRef nodeRefWithRecord = new NodeRef("workspace://spacesStore/test-rm");
+    private NodeRef nodeRefWithBoth = new NodeRef("workspace://spacesStore/test-both");
+    private NodeRef nodeRefWithNeither = new NodeRef("workspace://spacesStore/test-neither");
+
+    private static final QName MESSAGE_FROM_TEST_PROPERTY =
+            QName.createQName("MessageToTest");
+    private static final QName MESSAGE_TO_TEST_PROPERTY =
+            QName.createQName("MessageFromTest");
+    private static final QName MESSAGE_CC_TEST_PROPERTY =
+            QName.createQName("MessageCCTest");
+
+    @Override
+    public void setUp() throws Exception
+    {
+        super.setUp();
+
+        extracter = (RFC822MetadataExtracter) ctx.getBean("extracter.RFC822");
+
+        MockitoAnnotations.initMocks(this);
+        when(mockNodeService.hasAspect(nodeRefWithDodRecord, ASPECT_DOD_5015_RECORD)).thenReturn(true);
+        when(mockNodeService.hasAspect(nodeRefWithRecord, ASPECT_RECORD)).thenReturn(true);
+        when(mockNodeService.hasAspect(nodeRefWithBoth, ASPECT_DOD_5015_RECORD)).thenReturn(true);
+        when(mockNodeService.hasAspect(nodeRefWithBoth, ASPECT_RECORD)).thenReturn(true);
+
+        rmExtracter = new RFC822MetadataExtracter()
+        {
+            @Override
+            // Needed so the init method runs.
+            protected Map<String, Set<QName>> getDefaultMapping()
+            {
+                return Collections.emptyMap();
+            }
+        };
+        rmExtracter.setNodeService(mockNodeService);
+        rmExtracter.init();
+    }
+
+    @Override
+    protected MetadataExtracter getExtracter()
+    {
+        return extracter;
+    }
+
+    @Override
+    protected void testFileSpecificMetadata(String mimetype, Map<QName, Serializable> properties)
+    {
+        // ignore as this is no longer an extractor
+    }
+
+    public void testMatch()
+    {
+        assertFalse("Normal class should never match", extracter.match(MimetypeMap.MIMETYPE_RFC822));
+        assertTrue("RM class should match with correct type", rmExtracter.match(MimetypeMap.MIMETYPE_RFC822));
+        assertFalse("RM class should not match with other types", rmExtracter.match(MimetypeMap.MIMETYPE_PDF));
+    }
+
+    public void testGetExtractMapping()
+    {
+        Properties properties = new Properties();
+        properties.put("namespace.prefix.rm", RM_URI);
+        properties.put("namespace.prefix.dod", DOD_URI);
+        properties.put("namespace.prefix.cm", CONTENT_MODEL_1_0_URI);
+        properties.put("a", "cm:a");
+        properties.put("b", "rm:b, dod:b");
+        properties.put("c", "rm:c");
+        properties.put("d", "cm:d, rm:d1, rm:d2");
+        rmExtracter.setMappingProperties(properties);
+
+        assertEquals("No properties should have been removed", 7, countSystemProperties(nodeRefWithBoth));
+        assertEquals("The 1 dod and 4 record properties should have been removed", 2, countSystemProperties(nodeRefWithNeither));
+        assertEquals("The 4 record properties should have been removed", 3, countSystemProperties(nodeRefWithDodRecord));
+        assertEquals("The 1 dod property should have been removed", 6, countSystemProperties(nodeRefWithRecord));
+
+        // Check that we have the fully qualified version as the T-Engine know nothing about the repo's prefixes.
+        // Check just one of them.
+        assertEquals("{http://www.alfresco.org/model/content/1.0}d, " +
+                "{http://www.alfresco.org/model/content/1.0}a, " +
+                "{http://www.alfresco.org/model/dod5015/1.0}b", getSystemProperties(nodeRefWithDodRecord));
+    }
+
+    private int countSystemProperties(NodeRef nodeRef)
+    {
+        Map<String, Set<String>> extractMapping = rmExtracter.getExtractMapping(nodeRef);
+        AtomicInteger count = new AtomicInteger();
+        extractMapping.forEach((k,v) -> count.addAndGet(v.size()));
+        return count.get();
+    }
+
+    private String getSystemProperties(NodeRef nodeRef)
+    {
+        Map<String, Set<String>> extractMapping = rmExtracter.getExtractMapping(nodeRef);
+        StringJoiner sj = new StringJoiner(", ");
+        extractMapping.forEach((k,v) -> v.forEach(p -> sj.add(p.toString())));
+        return sj.toString();
+    }
+}