mirror of
https://github.com/Alfresco/alfresco-community-repo.git
synced 2025-07-24 17:32:48 +00:00
REPO-5219 Allow AGS AMP to specify metadata extract mapping (REPO) (#169)
Added an optional extractMapping transform option to all metadata extractors to override the default one in the T-Engine. In the case of the AGS AMP it extends the RFC822MetadataExtracter with its own class to specify a different set of document to system mappings. The class in the repo no longer does extractions, but is now used by the AsynchronousExtractor, which offloads extractions to T-Engines to obtain the mappings if it has been extended that are then passed to the T-Engine.
This commit is contained in:
@@ -89,6 +89,7 @@ public class AsynchronousExtractor extends AbstractMappingMetadataExtracter
|
||||
private static final String EMBED = "embed";
|
||||
private static final String MIMETYPE_METADATA_EXTRACT = "alfresco-metadata-extract";
|
||||
private static final String MIMETYPE_METADATA_EMBED = "alfresco-metadata-embed";
|
||||
private static final String EXTRACT_MAPPING = "extractMapping";
|
||||
private static final String METADATA = "metadata";
|
||||
private static final Map<String, Serializable> EMPTY_METADATA = Collections.emptyMap();
|
||||
|
||||
@@ -102,6 +103,7 @@ public class AsynchronousExtractor extends AbstractMappingMetadataExtracter
|
||||
private TransactionService transactionService;
|
||||
private TransformServiceRegistry transformServiceRegistry;
|
||||
private TaggingService taggingService;
|
||||
private List<MetadataExtractorPropertyMappingOverride> metadataExtractorPropertyMappingOverrides = Collections.emptyList();
|
||||
|
||||
public void setNodeService(NodeService nodeService)
|
||||
{
|
||||
@@ -143,6 +145,11 @@ public class AsynchronousExtractor extends AbstractMappingMetadataExtracter
|
||||
this.taggingService = taggingService;
|
||||
}
|
||||
|
||||
public void setMetadataExtractorPropertyMappingOverrides(List<MetadataExtractorPropertyMappingOverride> metadataExtractorPropertyMappingOverrides)
|
||||
{
|
||||
this.metadataExtractorPropertyMappingOverrides = metadataExtractorPropertyMappingOverrides;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Map<String, Set<QName>> getDefaultMapping()
|
||||
{
|
||||
@@ -223,7 +230,7 @@ public class AsynchronousExtractor extends AbstractMappingMetadataExtracter
|
||||
}
|
||||
|
||||
@Override
|
||||
// Not called. Overloaded method with the NodeRef is called.
|
||||
// Not called. extractRawInThread is called.
|
||||
protected Map<String, Serializable> extractRaw(ContentReader reader)
|
||||
{
|
||||
return null;
|
||||
@@ -233,12 +240,48 @@ public class AsynchronousExtractor extends AbstractMappingMetadataExtracter
|
||||
protected Map<String, Serializable> extractRawInThread(NodeRef nodeRef, ContentReader reader, MetadataExtracterLimits limits)
|
||||
throws Throwable
|
||||
{
|
||||
long timeoutMs = limits.getTimeoutMs();
|
||||
Map<String, String> options = Collections.singletonMap(TIMEOUT, Long.toString(timeoutMs));
|
||||
Map<String, String> options = getExtractOptions(nodeRef, reader, limits);
|
||||
transformInBackground(nodeRef, reader, MIMETYPE_METADATA_EXTRACT, EXTRACT, options);
|
||||
return EMPTY_METADATA;
|
||||
}
|
||||
|
||||
private Map<String, String> getExtractOptions(NodeRef nodeRef, ContentReader reader, MetadataExtracterLimits limits)
|
||||
{
|
||||
long timeoutMs = limits.getTimeoutMs();
|
||||
|
||||
// This is to allow the AGS (RM) AMP to specify the mapping of properties from the repository
|
||||
// rather than doing it out of process in the T-Engine.
|
||||
String sourceMimetype = reader.getMimetype();
|
||||
for (MetadataExtractorPropertyMappingOverride override : metadataExtractorPropertyMappingOverrides)
|
||||
{
|
||||
if (override.match(sourceMimetype))
|
||||
{
|
||||
Map<String, Set<String>> extractMapping = override.getExtractMapping(nodeRef);
|
||||
String extractMappingAsString = extractMappingToString(extractMapping);
|
||||
|
||||
Map<String, String> options = new HashMap<>(2);
|
||||
options.put(TIMEOUT, Long.toString(timeoutMs));
|
||||
options.put(EXTRACT_MAPPING, extractMappingAsString);
|
||||
return options;
|
||||
}
|
||||
}
|
||||
|
||||
return Collections.singletonMap(TIMEOUT, Long.toString(timeoutMs));
|
||||
}
|
||||
|
||||
private String extractMappingToString(Map<String, Set<String>> map)
|
||||
{
|
||||
try
|
||||
{
|
||||
return jsonObjectMapper.writeValueAsString(map);
|
||||
}
|
||||
catch (JsonProcessingException e)
|
||||
{
|
||||
logger.error("Failed to save extractMapping as Json", e);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void embedInternal(NodeRef nodeRef, Map<String, Serializable> metadata, ContentReader reader, ContentWriter writer)
|
||||
{
|
||||
|
@@ -0,0 +1,59 @@
|
||||
/*
|
||||
* #%L
|
||||
* Alfresco Repository
|
||||
* %%
|
||||
* Copyright (C) 2005 - 2020 Alfresco Software Limited
|
||||
* %%
|
||||
* This file is part of the Alfresco software.
|
||||
* If the software was purchased under a paid Alfresco license, the terms of
|
||||
* the paid license agreement will prevail. Otherwise, the software is
|
||||
* provided under the following open source license terms:
|
||||
*
|
||||
* Alfresco is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Lesser General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* Alfresco is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
|
||||
* #L%
|
||||
*/
|
||||
package org.alfresco.repo.content.metadata;
|
||||
|
||||
import org.alfresco.service.cmr.repository.NodeRef;
|
||||
import org.alfresco.service.namespace.QName;
|
||||
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
/**
|
||||
* @deprecated as code running inside the content repository process that overrides metadata extract properties should
|
||||
* be moved out of process to reduce coupling of components, making upgrade simpler.
|
||||
*
|
||||
* @author adavis
|
||||
*/
|
||||
@Deprecated
|
||||
public interface MetadataExtractorPropertyMappingOverride
|
||||
{
|
||||
/**
|
||||
* Indicates if the {@link #getExtractMapping(NodeRef)} will provide extract properties
|
||||
* to override those in the T-Engine.
|
||||
*
|
||||
* @param sourceMimetype of the node.
|
||||
* @return {@code true} if there will be override extract properties.
|
||||
*/
|
||||
boolean match(String sourceMimetype);
|
||||
|
||||
/**
|
||||
* Returns the extract mapping to be passed to the T-Engine.
|
||||
*
|
||||
* @param nodeRef of the node having its metadata extracted.
|
||||
* @return the mapping of document properties to system properties
|
||||
*/
|
||||
Map<String, Set<String>> getExtractMapping(NodeRef nodeRef);
|
||||
}
|
@@ -25,33 +25,33 @@
|
||||
*/
|
||||
package org.alfresco.repo.content.metadata;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import org.alfresco.repo.content.MimetypeMap;
|
||||
import org.alfresco.service.cmr.repository.ContentReader;
|
||||
import org.alfresco.service.cmr.repository.NodeRef;
|
||||
import org.alfresco.service.cmr.repository.NodeService;
|
||||
import org.alfresco.service.namespace.QName;
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
|
||||
import javax.mail.Header;
|
||||
import java.io.Serializable;
|
||||
import java.io.UnsupportedEncodingException;
|
||||
import java.util.Arrays;
|
||||
import java.util.Date;
|
||||
import java.util.Enumeration;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
import javax.mail.Header;
|
||||
import javax.mail.internet.InternetAddress;
|
||||
import javax.mail.internet.MimeMessage;
|
||||
import javax.mail.internet.MimeUtility;
|
||||
import javax.mail.internet.MimeMessage.RecipientType;
|
||||
|
||||
import org.alfresco.repo.content.MimetypeMap;
|
||||
import org.alfresco.service.cmr.repository.ContentReader;
|
||||
import org.alfresco.service.namespace.QName;
|
||||
|
||||
/**
|
||||
* @deprecated OOTB extractors are being moved to T-Engines.
|
||||
* @deprecated OOTB extractors have being moved to T-Engines.
|
||||
*
|
||||
* Metadata extractor for RFC822 mime emails.
|
||||
* This class originally provided metadata extraction of RFC822 mimetype emails. It will no longer be used for that
|
||||
* purpose as that work has been off loaded to a T-Engine via the AsynchronousExtractor. It still exists because the
|
||||
* governance services (RM) AMP overrides it to provide alternate property mappings and to filter out some of
|
||||
* these properties if the node does not have the "record" or "dod5015record" aspects.<p>
|
||||
*
|
||||
* Default configuration: (see RFC822MetadataExtractor.properties)
|
||||
* We still also have the Default configuration file (RFC822MetadataExtracter.properties) file which contains the
|
||||
* default set of properties, which may be manipulated by RM.
|
||||
*
|
||||
* <pre>
|
||||
* <b>messageFrom:</b> -- imap:messageFrom, cm:originator
|
||||
@@ -65,143 +65,49 @@ import org.alfresco.service.namespace.QName;
|
||||
* <b>Message-ID:</b> -- imap:messageId
|
||||
* </pre>
|
||||
*
|
||||
* @author Derek Hulley
|
||||
* @since 3.2
|
||||
* This class now provides an alternative property mapping in the request to the T-Engine. Unlike the previous
|
||||
* implementation the filtering of properties takes place before rather than after the extraction. This is done in
|
||||
* this class making the code within the org.alfresco.module.org_alfresco_module_rm.email.RFC822MetadataExtracter
|
||||
* filterSystemProperties method redundant.
|
||||
*
|
||||
* @author adavis
|
||||
*/
|
||||
@Deprecated
|
||||
public class RFC822MetadataExtracter extends AbstractMappingMetadataExtracter
|
||||
implements MetadataExtractorPropertyMappingOverride
|
||||
{
|
||||
static String RM_URI = "http://www.alfresco.org/model/recordsmanagement/1.0";
|
||||
static String DOD_URI = "http://www.alfresco.org/model/dod5015/1.0";
|
||||
|
||||
protected static final String KEY_MESSAGE_FROM = "messageFrom";
|
||||
protected static final String KEY_MESSAGE_TO = "messageTo";
|
||||
protected static final String KEY_MESSAGE_CC = "messageCc";
|
||||
protected static final String KEY_MESSAGE_SUBJECT = "messageSubject";
|
||||
protected static final String KEY_MESSAGE_SENT = "messageSent";
|
||||
protected static final String KEY_MESSAGE_RECEIVED = "messageReceived";
|
||||
static final String RECORD = "record";
|
||||
static final String DOD_5015_RECORD = "dod5015record";
|
||||
|
||||
public static String[] SUPPORTED_MIMETYPES = new String[] { MimetypeMap.MIMETYPE_RFC822 };
|
||||
static final QName ASPECT_RECORD = QName.createQName(RM_URI, RECORD);
|
||||
static final QName ASPECT_DOD_5015_RECORD = QName.createQName(DOD_URI, DOD_5015_RECORD);
|
||||
|
||||
private static Log logger = LogFactory.getLog(RFC822MetadataExtracter.class);
|
||||
|
||||
private static final HashSet<String> SUPPORTED_MIMETYPES =
|
||||
new HashSet<>(Arrays.asList(new String[] { MimetypeMap.MIMETYPE_RFC822 }));
|
||||
|
||||
public RFC822MetadataExtracter()
|
||||
{
|
||||
super(new HashSet<String>(Arrays.asList(SUPPORTED_MIMETYPES)));
|
||||
super(SUPPORTED_MIMETYPES);
|
||||
}
|
||||
|
||||
private NodeService nodeService;
|
||||
|
||||
public void setNodeService(NodeService nodeService)
|
||||
{
|
||||
this.nodeService = nodeService;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Map<String, Serializable> extractRaw(ContentReader reader) throws Throwable
|
||||
{
|
||||
Map<String, Serializable> rawProperties = newRawMap();
|
||||
|
||||
InputStream is = null;
|
||||
try
|
||||
{
|
||||
is = reader.getContentInputStream();
|
||||
MimeMessage mimeMessage = new MimeMessage(null, is);
|
||||
|
||||
if (mimeMessage != null)
|
||||
{
|
||||
/**
|
||||
* Extract RFC822 values that doesn't match to headers and need to be encoded.
|
||||
* Or those special fields that require some code to extract data
|
||||
*/
|
||||
String tmp = InternetAddress.toString(mimeMessage.getFrom());
|
||||
tmp = tmp != null ? MimeUtility.decodeText(tmp) : null;
|
||||
putRawValue(KEY_MESSAGE_FROM, tmp, rawProperties);
|
||||
|
||||
tmp = InternetAddress.toString(mimeMessage.getRecipients(RecipientType.TO));
|
||||
tmp = tmp != null ? MimeUtility.decodeText(tmp) : null;
|
||||
putRawValue(KEY_MESSAGE_TO, tmp, rawProperties);
|
||||
|
||||
tmp = InternetAddress.toString(mimeMessage.getRecipients(RecipientType.CC));
|
||||
tmp = tmp != null ? MimeUtility.decodeText(tmp) : null;
|
||||
putRawValue(KEY_MESSAGE_CC, tmp, rawProperties);
|
||||
|
||||
putRawValue(KEY_MESSAGE_SENT, mimeMessage.getSentDate(), rawProperties);
|
||||
|
||||
/**
|
||||
* Received field from RFC 822
|
||||
*
|
||||
* "Received" ":" ; one per relay
|
||||
* ["from" domain] ; sending host
|
||||
* ["by" domain] ; receiving host
|
||||
* ["via" atom] ; physical path
|
||||
* ("with" atom) ; link/mail protocol
|
||||
* ["id" msg-id] ; receiver msg id
|
||||
* ["for" addr-spec] ; initial form
|
||||
* ";" date-time ; time received
|
||||
*/
|
||||
Date rxDate = mimeMessage.getReceivedDate();
|
||||
|
||||
if(rxDate != null)
|
||||
{
|
||||
// The email implementation extracted the received date for us.
|
||||
putRawValue(KEY_MESSAGE_RECEIVED, rxDate, rawProperties);
|
||||
}
|
||||
else
|
||||
{
|
||||
// the email implementation did not parse the received date for us.
|
||||
String[] rx = mimeMessage.getHeader("received");
|
||||
if(rx != null && rx.length > 0)
|
||||
{
|
||||
String lastReceived = rx[0];
|
||||
lastReceived = MimeUtility.unfold(lastReceived);
|
||||
int x = lastReceived.lastIndexOf(';');
|
||||
if(x > 0)
|
||||
{
|
||||
String dateStr = lastReceived.substring(x + 1).trim();
|
||||
putRawValue(KEY_MESSAGE_RECEIVED, dateStr, rawProperties);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
String[] subj = mimeMessage.getHeader("Subject");
|
||||
if (subj != null && subj.length > 0)
|
||||
{
|
||||
String decodedSubject = subj[0];
|
||||
try
|
||||
{
|
||||
decodedSubject = MimeUtility.decodeText(decodedSubject);
|
||||
}
|
||||
catch (UnsupportedEncodingException e)
|
||||
{
|
||||
logger.warn(e.toString());
|
||||
}
|
||||
putRawValue(KEY_MESSAGE_SUBJECT, decodedSubject, rawProperties);
|
||||
}
|
||||
|
||||
/*
|
||||
* Extract values from all header fields, including extension fields "X-"
|
||||
*/
|
||||
Set<String> keys = getMapping().keySet();
|
||||
@SuppressWarnings("unchecked")
|
||||
Enumeration<Header> headers = mimeMessage.getAllHeaders();
|
||||
while (headers.hasMoreElements())
|
||||
{
|
||||
Header header = (Header) headers.nextElement();
|
||||
if (keys.contains(header.getName()))
|
||||
{
|
||||
tmp = header.getValue();
|
||||
tmp = tmp != null ? MimeUtility.decodeText(tmp) : null;
|
||||
|
||||
putRawValue(header.getName(), tmp, rawProperties);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
finally
|
||||
{
|
||||
if (is != null)
|
||||
{
|
||||
try
|
||||
{
|
||||
is.close();
|
||||
}
|
||||
catch (IOException e)
|
||||
{
|
||||
}
|
||||
}
|
||||
}
|
||||
// Done
|
||||
return rawProperties;
|
||||
logger.error("RFC822MetadataExtracter.extractRaw should not have been called, " +
|
||||
"as the extraction should have taken place in a T-Engine.");
|
||||
return Collections.emptyMap(); // will result in no updates.
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -212,4 +118,46 @@ public class RFC822MetadataExtracter extends AbstractMappingMetadataExtracter
|
||||
{
|
||||
return super.getMapping();
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean match(String sourceMimetype)
|
||||
{
|
||||
// When RM overrides the "extracter.RFC822" bean with its own class 'this' will be a sub class.
|
||||
return SUPPORTED_MIMETYPES.contains(sourceMimetype) && this.getClass() != RFC822MetadataExtracter.class;
|
||||
}
|
||||
|
||||
@Override
|
||||
// Only include system properties depending on RM / DOD aspects on this nodeRef
|
||||
public Map<String, Set<String>> getExtractMapping(NodeRef nodeRef)
|
||||
{
|
||||
Map<String, Set<QName>> customMapping = getMapping();
|
||||
HashMap<String, Set<String>> mapping = new HashMap<>(customMapping.size());
|
||||
|
||||
boolean isARecord = nodeService.hasAspect(nodeRef, ASPECT_RECORD);
|
||||
boolean isADodRecord = nodeService.hasAspect(nodeRef, ASPECT_DOD_5015_RECORD);
|
||||
|
||||
for (Map.Entry<String, Set<QName>> entry : customMapping.entrySet())
|
||||
{
|
||||
Set<QName> customSystemProperties = entry.getValue();
|
||||
HashSet<String> systemProperties = new HashSet<>(customSystemProperties.size());
|
||||
String documentProperty = entry.getKey();
|
||||
|
||||
for (QName customSystemProperty : customSystemProperties)
|
||||
{
|
||||
String uri = customSystemProperty.getNamespaceURI();
|
||||
boolean rmProperty = RM_URI.equals(uri);
|
||||
boolean dodProperty = DOD_URI.equals(uri);
|
||||
if ((rmProperty && isARecord) || (dodProperty && isADodRecord) || (!rmProperty && !dodProperty))
|
||||
{
|
||||
systemProperties.add(customSystemProperty.toString());
|
||||
}
|
||||
}
|
||||
if (!systemProperties.isEmpty())
|
||||
{
|
||||
mapping.put(documentProperty, systemProperties);
|
||||
}
|
||||
}
|
||||
|
||||
return mapping;
|
||||
}
|
||||
}
|
||||
|
@@ -299,8 +299,25 @@
|
||||
<property name="transactionService" ref="transactionService" />
|
||||
<property name="transformServiceRegistry" ref="transformServiceRegistry" />
|
||||
<property name="taggingService" ref="taggingService" />
|
||||
<property name="metadataExtractorPropertyMappingOverrides">
|
||||
<list>
|
||||
<ref bean="extracter.RFC822" /> <!-- The RM AMP overrides this bean, extending the base class -->
|
||||
</list>
|
||||
</property>
|
||||
</bean>
|
||||
|
||||
<!-- No longer used as an extractor but still extended by RM to provide additional mappings -->
|
||||
<bean id="extracter.RFC822" class="org.alfresco.repo.content.metadata.RFC822MetadataExtracter" parent="baseMetadataExtracter" >
|
||||
<property name="nodeService" ref="nodeService"/>
|
||||
<property name="supportedDateFormats">
|
||||
<list>
|
||||
<value>EEE, d MMM yyyy HH:mm:ss Z</value>
|
||||
<value>EEE, d MMM yy HH:mm:ss Z</value>
|
||||
<value>d MMM yyyy HH:mm:ss Z</value>
|
||||
</list>
|
||||
</property>
|
||||
</bean>
|
||||
|
||||
<!-- Content Transformation Regisitry -->
|
||||
<bean id="contentTransformerRegistry" class="org.alfresco.repo.content.transform.ContentTransformerRegistry" >
|
||||
<constructor-arg>
|
||||
|
@@ -1,12 +0,0 @@
|
||||
#
|
||||
# DWGMetadataExtracter - default mapping
|
||||
#
|
||||
# author: Nick Burch
|
||||
|
||||
# Namespaces
|
||||
namespace.prefix.cm=http://www.alfresco.org/model/content/1.0
|
||||
|
||||
# Mappings
|
||||
author=cm:author
|
||||
title=cm:title
|
||||
description=cm:description
|
@@ -1,12 +0,0 @@
|
||||
#
|
||||
# HtmlMetadataExtracter - default mapping
|
||||
#
|
||||
# author: Derek Hulley
|
||||
|
||||
# Namespaces
|
||||
namespace.prefix.cm=http://www.alfresco.org/model/content/1.0
|
||||
|
||||
# Mappings
|
||||
author=cm:author
|
||||
title=cm:title
|
||||
description=cm:description
|
@@ -1,12 +0,0 @@
|
||||
#
|
||||
# JodConverterMetadataExtracter - default mapping
|
||||
#
|
||||
# author: Neil McErlean
|
||||
|
||||
# Namespaces
|
||||
namespace.prefix.cm=http://www.alfresco.org/model/content/1.0
|
||||
|
||||
# Mappings
|
||||
author=cm:author
|
||||
title=cm:title
|
||||
description=cm:description
|
@@ -1,30 +0,0 @@
|
||||
#
|
||||
# MP3MetadataExtracter - default mapping
|
||||
#
|
||||
# author: Derek Hulley
|
||||
|
||||
# Namespaces
|
||||
namespace.prefix.cm=http://www.alfresco.org/model/content/1.0
|
||||
namespace.prefix.audio=http://www.alfresco.org/model/audio/1.0
|
||||
|
||||
# Core mappings
|
||||
author=cm:author
|
||||
title=cm:title
|
||||
description=cm:description
|
||||
created=cm:created
|
||||
|
||||
# Audio descriptive mappings
|
||||
xmpDM\:album=audio:album
|
||||
xmpDM\:artist=audio:artist
|
||||
xmpDM\:composer=audio:composer
|
||||
xmpDM\:engineer=audio:engineer
|
||||
xmpDM\:genre=audio:genre
|
||||
xmpDM\:trackNumber=audio:trackNumber
|
||||
xmpDM\:releaseDate=audio:releaseDate
|
||||
#xmpDM:logComment
|
||||
|
||||
# Audio specific mappings
|
||||
xmpDM\:audioSampleRate=audio:sampleRate
|
||||
xmpDM\:audioSampleType=audio:sampleType
|
||||
xmpDM\:audioChannelType=audio:channelType
|
||||
xmpDM\:audioCompressor=audio:compressor
|
@@ -1,14 +0,0 @@
|
||||
#
|
||||
# MailMetadataExtracter - default mapping
|
||||
#
|
||||
# author: Derek Hulley
|
||||
|
||||
# Namespaces
|
||||
namespace.prefix.cm=http://www.alfresco.org/model/content/1.0
|
||||
|
||||
# Mappings
|
||||
sentDate=cm:sentdate
|
||||
originator=cm:originator, cm:author
|
||||
addressee=cm:addressee
|
||||
addressees=cm:addressees
|
||||
subjectLine=cm:subjectline, cm:description
|
@@ -1,14 +0,0 @@
|
||||
#
|
||||
# OfficeMetadataExtracter - default mapping
|
||||
#
|
||||
# author: Derek Hulley
|
||||
|
||||
# Namespaces
|
||||
namespace.prefix.cm=http://www.alfresco.org/model/content/1.0
|
||||
|
||||
# Mappings
|
||||
author=cm:author
|
||||
title=cm:title
|
||||
subject=cm:description
|
||||
createDateTime=cm:created
|
||||
lastSaveDateTime=cm:modified
|
@@ -1,21 +0,0 @@
|
||||
#
|
||||
# OpenDocumentMetadataExtracter - default mapping
|
||||
#
|
||||
# author: Derek Hulley
|
||||
|
||||
# Namespaces
|
||||
namespace.prefix.cm=http://www.alfresco.org/model/content/1.0
|
||||
|
||||
# Mappings
|
||||
creationDate=cm:created
|
||||
creator=cm:author
|
||||
date=
|
||||
description=
|
||||
generator=
|
||||
initialCreator=
|
||||
keyword=
|
||||
language=
|
||||
printDate=
|
||||
printedBy=
|
||||
subject=cm:description
|
||||
title=cm:title
|
@@ -1,13 +0,0 @@
|
||||
#
|
||||
# PdfBoxMetadataExtracter - default mapping
|
||||
#
|
||||
# author: Derek Hulley
|
||||
|
||||
# Namespaces
|
||||
namespace.prefix.cm=http://www.alfresco.org/model/content/1.0
|
||||
|
||||
# Mappings
|
||||
author=cm:author
|
||||
title=cm:title
|
||||
subject=cm:description
|
||||
created=cm:created
|
@@ -1,13 +0,0 @@
|
||||
#
|
||||
# PoiMetadataExtracter - default mapping
|
||||
#
|
||||
# author: Neil McErlean
|
||||
|
||||
# Namespaces
|
||||
namespace.prefix.cm=http://www.alfresco.org/model/content/1.0
|
||||
|
||||
# Mappings
|
||||
author=cm:author
|
||||
title=cm:title
|
||||
description=cm:description
|
||||
created=cm:created
|
@@ -1,34 +0,0 @@
|
||||
#
|
||||
# TikaAudioMetadataExtracter - audio mapping
|
||||
#
|
||||
# This is used to map from the Tika audio metadata onto your
|
||||
# content model. This will be used for any Audio content
|
||||
# for which an explicit extractor isn't defined
|
||||
#
|
||||
# author: Nick Burch
|
||||
|
||||
# Namespaces
|
||||
namespace.prefix.cm=http://www.alfresco.org/model/content/1.0
|
||||
namespace.prefix.audio=http://www.alfresco.org/model/audio/1.0
|
||||
|
||||
# Core mappings
|
||||
author=cm:author
|
||||
title=cm:title
|
||||
description=cm:description
|
||||
created=cm:created
|
||||
|
||||
# Audio descriptive mappings
|
||||
xmpDM\:album=audio:album
|
||||
xmpDM\:artist=audio:artist
|
||||
xmpDM\:composer=audio:composer
|
||||
xmpDM\:engineer=audio:engineer
|
||||
xmpDM\:genre=audio:genre
|
||||
xmpDM\:trackNumber=audio:trackNumber
|
||||
xmpDM\:releaseDate=audio:releaseDate
|
||||
#xmpDM:logComment
|
||||
|
||||
# Audio specific mappings
|
||||
xmpDM\:audioSampleRate=audio:sampleRate
|
||||
xmpDM\:audioSampleType=audio:sampleType
|
||||
xmpDM\:audioChannelType=audio:channelType
|
||||
xmpDM\:audioCompressor=audio:compressor
|
@@ -1,52 +0,0 @@
|
||||
#
|
||||
# TikaAutoMetadataExtracter - default mapping
|
||||
#
|
||||
# This is used to map from the Tika and standard namespaces
|
||||
# onto your content model. This will be used for any
|
||||
# content for which an explicit extractor isn't defined,
|
||||
# by using Tika's auto-selection facilities.
|
||||
#
|
||||
# author: Nick Burch
|
||||
|
||||
# Namespaces
|
||||
namespace.prefix.cm=http://www.alfresco.org/model/content/1.0
|
||||
namespace.prefix.exif=http://www.alfresco.org/model/exif/1.0
|
||||
namespace.prefix.audio=http://www.alfresco.org/model/audio/1.0
|
||||
|
||||
# Mappings
|
||||
author=cm:author
|
||||
title=cm:title
|
||||
description=cm:description
|
||||
created=cm:created
|
||||
|
||||
geo\:lat=cm:latitude
|
||||
geo\:long=cm:longitude
|
||||
|
||||
tiff\:ImageWidth=exif:pixelXDimension
|
||||
tiff\:ImageLength=exif:pixelYDimension
|
||||
tiff\:Make=exif:manufacturer
|
||||
tiff\:Model=exif:model
|
||||
tiff\:Software=exif:software
|
||||
tiff\:Orientation=exif:orientation
|
||||
tiff\:XResolution=exif:xResolution
|
||||
tiff\:YResolution=exif:yResolution
|
||||
tiff\:ResolutionUnit=exif:resolutionUnit
|
||||
exif\:Flash=exif:flash
|
||||
exif\:ExposureTime=exif:exposureTime
|
||||
exif\:FNumber=exif:fNumber
|
||||
exif\:FocalLength=exif:focalLength
|
||||
exif\:IsoSpeedRatings=exif:isoSpeedRatings
|
||||
exif\:DateTimeOriginal=exif:dateTimeOriginal
|
||||
|
||||
xmpDM\:album=audio:album
|
||||
xmpDM\:artist=audio:artist
|
||||
xmpDM\:composer=audio:composer
|
||||
xmpDM\:engineer=audio:engineer
|
||||
xmpDM\:genre=audio:genre
|
||||
xmpDM\:trackNumber=audio:trackNumber
|
||||
xmpDM\:releaseDate=audio:releaseDate
|
||||
#xmpDM:logComment
|
||||
xmpDM\:audioSampleRate=audio:sampleRate
|
||||
xmpDM\:audioSampleType=audio:sampleType
|
||||
xmpDM\:audioChannelType=audio:channelType
|
||||
xmpDM\:audioCompressor=audio:compressor
|
@@ -1,20 +0,0 @@
|
||||
#
|
||||
# TikaSpringConfiguredMetadataExtracter.properties - default mapping
|
||||
#
|
||||
# This is used to map from the Tika and standard namespaces
|
||||
# onto your content model. This is used for custom tika parsers,
|
||||
# but one file is used across all custom parsers.
|
||||
#
|
||||
# author: Nick Burch
|
||||
|
||||
# Namespaces
|
||||
namespace.prefix.cm=http://www.alfresco.org/model/content/1.0
|
||||
|
||||
# Mappings
|
||||
author=cm:author
|
||||
title=cm:title
|
||||
description=cm:description
|
||||
created=cm:created
|
||||
|
||||
geo\:lat=cm:latitude
|
||||
geo\:long=cm:longitude
|
@@ -72,7 +72,8 @@ import org.springframework.context.ApplicationContext;
|
||||
org.alfresco.repo.content.transform.AppleIWorksContentTransformerTest.class,
|
||||
org.alfresco.repo.content.transform.ArchiveContentTransformerTest.class,
|
||||
|
||||
// Metadata tests - replaced with simplified test in LocalRenditionTest and ServiceRenditionTest
|
||||
// Metadata tests - replaced with simplified tests in LocalRenditionTest and ServiceRenditionTest
|
||||
org.alfresco.repo.content.metadata.RFC822MetadataExtracterTest.class,
|
||||
org.alfresco.repo.content.metadata.MappingMetadataExtracterTest.class,
|
||||
|
||||
// ----------------------------------------------------------------------
|
||||
|
@@ -0,0 +1,159 @@
|
||||
/*
|
||||
* #%L
|
||||
* Alfresco Repository
|
||||
* %%
|
||||
* Copyright (C) 2005 - 2020 Alfresco Software Limited
|
||||
* %%
|
||||
* This file is part of the Alfresco software.
|
||||
* If the software was purchased under a paid Alfresco license, the terms of
|
||||
* the paid license agreement will prevail. Otherwise, the software is
|
||||
* provided under the following open source license terms:
|
||||
*
|
||||
* Alfresco is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Lesser General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* Alfresco is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
|
||||
* #L%
|
||||
*/
|
||||
package org.alfresco.repo.content.metadata;
|
||||
|
||||
import org.alfresco.repo.content.MimetypeMap;
|
||||
import org.alfresco.service.cmr.repository.NodeRef;
|
||||
import org.alfresco.service.cmr.repository.NodeService;
|
||||
import org.alfresco.service.namespace.QName;
|
||||
import org.mockito.Mock;
|
||||
import org.mockito.MockitoAnnotations;
|
||||
|
||||
import java.io.Serializable;
|
||||
import java.util.Collections;
|
||||
import java.util.Map;
|
||||
import java.util.Properties;
|
||||
import java.util.Set;
|
||||
import java.util.StringJoiner;
|
||||
import java.util.concurrent.atomic.AtomicInteger;
|
||||
|
||||
import static org.alfresco.repo.content.metadata.RFC822MetadataExtracter.ASPECT_DOD_5015_RECORD;
|
||||
import static org.alfresco.repo.content.metadata.RFC822MetadataExtracter.ASPECT_RECORD;
|
||||
import static org.alfresco.repo.content.metadata.RFC822MetadataExtracter.DOD_URI;
|
||||
import static org.alfresco.repo.content.metadata.RFC822MetadataExtracter.RM_URI;
|
||||
import static org.alfresco.service.namespace.NamespaceService.CONTENT_MODEL_1_0_URI;
|
||||
import static org.mockito.Mockito.when;
|
||||
|
||||
/**
|
||||
* Test the ability of RFC822MetadataExtracter when overridden by RM, to control which properties are extracted
|
||||
* from T-Engines. RFC822MetadataExtracter no longer extracts.
|
||||
*
|
||||
* @author adavis
|
||||
*/
|
||||
//@RunWith(MockitoJUnitRunner.class)
|
||||
public class RFC822MetadataExtracterTest extends AbstractMetadataExtracterTest
|
||||
{
|
||||
private RFC822MetadataExtracter extracter;
|
||||
private RFC822MetadataExtracter rmExtracter;
|
||||
@Mock private NodeService mockNodeService;
|
||||
|
||||
private NodeRef nodeRefWithDodRecord = new NodeRef("workspace://spacesStore/test-dod");
|
||||
private NodeRef nodeRefWithRecord = new NodeRef("workspace://spacesStore/test-rm");
|
||||
private NodeRef nodeRefWithBoth = new NodeRef("workspace://spacesStore/test-both");
|
||||
private NodeRef nodeRefWithNeither = new NodeRef("workspace://spacesStore/test-neither");
|
||||
|
||||
private static final QName MESSAGE_FROM_TEST_PROPERTY =
|
||||
QName.createQName("MessageToTest");
|
||||
private static final QName MESSAGE_TO_TEST_PROPERTY =
|
||||
QName.createQName("MessageFromTest");
|
||||
private static final QName MESSAGE_CC_TEST_PROPERTY =
|
||||
QName.createQName("MessageCCTest");
|
||||
|
||||
@Override
|
||||
public void setUp() throws Exception
|
||||
{
|
||||
super.setUp();
|
||||
|
||||
extracter = (RFC822MetadataExtracter) ctx.getBean("extracter.RFC822");
|
||||
|
||||
MockitoAnnotations.initMocks(this);
|
||||
when(mockNodeService.hasAspect(nodeRefWithDodRecord, ASPECT_DOD_5015_RECORD)).thenReturn(true);
|
||||
when(mockNodeService.hasAspect(nodeRefWithRecord, ASPECT_RECORD)).thenReturn(true);
|
||||
when(mockNodeService.hasAspect(nodeRefWithBoth, ASPECT_DOD_5015_RECORD)).thenReturn(true);
|
||||
when(mockNodeService.hasAspect(nodeRefWithBoth, ASPECT_RECORD)).thenReturn(true);
|
||||
|
||||
rmExtracter = new RFC822MetadataExtracter()
|
||||
{
|
||||
@Override
|
||||
// Needed so the init method runs.
|
||||
protected Map<String, Set<QName>> getDefaultMapping()
|
||||
{
|
||||
return Collections.emptyMap();
|
||||
}
|
||||
};
|
||||
rmExtracter.setNodeService(mockNodeService);
|
||||
rmExtracter.init();
|
||||
}
|
||||
|
||||
@Override
|
||||
protected MetadataExtracter getExtracter()
|
||||
{
|
||||
return extracter;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void testFileSpecificMetadata(String mimetype, Map<QName, Serializable> properties)
|
||||
{
|
||||
// ignore as this is no longer an extractor
|
||||
}
|
||||
|
||||
public void testMatch()
|
||||
{
|
||||
assertFalse("Normal class should never match", extracter.match(MimetypeMap.MIMETYPE_RFC822));
|
||||
assertTrue("RM class should match with correct type", rmExtracter.match(MimetypeMap.MIMETYPE_RFC822));
|
||||
assertFalse("RM class should not match with other types", rmExtracter.match(MimetypeMap.MIMETYPE_PDF));
|
||||
}
|
||||
|
||||
public void testGetExtractMapping()
|
||||
{
|
||||
Properties properties = new Properties();
|
||||
properties.put("namespace.prefix.rm", RM_URI);
|
||||
properties.put("namespace.prefix.dod", DOD_URI);
|
||||
properties.put("namespace.prefix.cm", CONTENT_MODEL_1_0_URI);
|
||||
properties.put("a", "cm:a");
|
||||
properties.put("b", "rm:b, dod:b");
|
||||
properties.put("c", "rm:c");
|
||||
properties.put("d", "cm:d, rm:d1, rm:d2");
|
||||
rmExtracter.setMappingProperties(properties);
|
||||
|
||||
assertEquals("No properties should have been removed", 7, countSystemProperties(nodeRefWithBoth));
|
||||
assertEquals("The 1 dod and 4 record properties should have been removed", 2, countSystemProperties(nodeRefWithNeither));
|
||||
assertEquals("The 4 record properties should have been removed", 3, countSystemProperties(nodeRefWithDodRecord));
|
||||
assertEquals("The 1 dod property should have been removed", 6, countSystemProperties(nodeRefWithRecord));
|
||||
|
||||
// Check that we have the fully qualified version as the T-Engine know nothing about the repo's prefixes.
|
||||
// Check just one of them.
|
||||
assertEquals("{http://www.alfresco.org/model/content/1.0}d, " +
|
||||
"{http://www.alfresco.org/model/content/1.0}a, " +
|
||||
"{http://www.alfresco.org/model/dod5015/1.0}b", getSystemProperties(nodeRefWithDodRecord));
|
||||
}
|
||||
|
||||
private int countSystemProperties(NodeRef nodeRef)
|
||||
{
|
||||
Map<String, Set<String>> extractMapping = rmExtracter.getExtractMapping(nodeRef);
|
||||
AtomicInteger count = new AtomicInteger();
|
||||
extractMapping.forEach((k,v) -> count.addAndGet(v.size()));
|
||||
return count.get();
|
||||
}
|
||||
|
||||
private String getSystemProperties(NodeRef nodeRef)
|
||||
{
|
||||
Map<String, Set<String>> extractMapping = rmExtracter.getExtractMapping(nodeRef);
|
||||
StringJoiner sj = new StringJoiner(", ");
|
||||
extractMapping.forEach((k,v) -> v.forEach(p -> sj.add(p.toString())));
|
||||
return sj.toString();
|
||||
}
|
||||
}
|
Reference in New Issue
Block a user