REPO-5219 Allow AGS AMP to specify metadata extract mapping (REPO) (#169)

Added an optional extractMapping transform option to all metadata extractors to override the default one in the T-Engine.

In the case of the AGS AMP it extends the RFC822MetadataExtracter with its own class to specify a different set of document to system mappings. The class in the repo no longer does extractions, but is now used by the AsynchronousExtractor, which offloads extractions to T-Engines to obtain the mappings if it has been extended that are then passed to the T-Engine.
This commit is contained in:
Alan Davis
2020-11-19 17:04:52 +00:00
committed by GitHub
parent fc0fa4e4b7
commit 33f37731c7
19 changed files with 373 additions and 393 deletions

View File

@@ -89,6 +89,7 @@ public class AsynchronousExtractor extends AbstractMappingMetadataExtracter
private static final String EMBED = "embed";
private static final String MIMETYPE_METADATA_EXTRACT = "alfresco-metadata-extract";
private static final String MIMETYPE_METADATA_EMBED = "alfresco-metadata-embed";
private static final String EXTRACT_MAPPING = "extractMapping";
private static final String METADATA = "metadata";
private static final Map<String, Serializable> EMPTY_METADATA = Collections.emptyMap();
@@ -102,6 +103,7 @@ public class AsynchronousExtractor extends AbstractMappingMetadataExtracter
private TransactionService transactionService;
private TransformServiceRegistry transformServiceRegistry;
private TaggingService taggingService;
private List<MetadataExtractorPropertyMappingOverride> metadataExtractorPropertyMappingOverrides = Collections.emptyList();
public void setNodeService(NodeService nodeService)
{
@@ -143,6 +145,11 @@ public class AsynchronousExtractor extends AbstractMappingMetadataExtracter
this.taggingService = taggingService;
}
public void setMetadataExtractorPropertyMappingOverrides(List<MetadataExtractorPropertyMappingOverride> metadataExtractorPropertyMappingOverrides)
{
this.metadataExtractorPropertyMappingOverrides = metadataExtractorPropertyMappingOverrides;
}
@Override
protected Map<String, Set<QName>> getDefaultMapping()
{
@@ -223,7 +230,7 @@ public class AsynchronousExtractor extends AbstractMappingMetadataExtracter
}
@Override
// Not called. Overloaded method with the NodeRef is called.
// Not called. extractRawInThread is called.
protected Map<String, Serializable> extractRaw(ContentReader reader)
{
return null;
@@ -233,12 +240,48 @@ public class AsynchronousExtractor extends AbstractMappingMetadataExtracter
protected Map<String, Serializable> extractRawInThread(NodeRef nodeRef, ContentReader reader, MetadataExtracterLimits limits)
throws Throwable
{
long timeoutMs = limits.getTimeoutMs();
Map<String, String> options = Collections.singletonMap(TIMEOUT, Long.toString(timeoutMs));
Map<String, String> options = getExtractOptions(nodeRef, reader, limits);
transformInBackground(nodeRef, reader, MIMETYPE_METADATA_EXTRACT, EXTRACT, options);
return EMPTY_METADATA;
}
private Map<String, String> getExtractOptions(NodeRef nodeRef, ContentReader reader, MetadataExtracterLimits limits)
{
long timeoutMs = limits.getTimeoutMs();
// This is to allow the AGS (RM) AMP to specify the mapping of properties from the repository
// rather than doing it out of process in the T-Engine.
String sourceMimetype = reader.getMimetype();
for (MetadataExtractorPropertyMappingOverride override : metadataExtractorPropertyMappingOverrides)
{
if (override.match(sourceMimetype))
{
Map<String, Set<String>> extractMapping = override.getExtractMapping(nodeRef);
String extractMappingAsString = extractMappingToString(extractMapping);
Map<String, String> options = new HashMap<>(2);
options.put(TIMEOUT, Long.toString(timeoutMs));
options.put(EXTRACT_MAPPING, extractMappingAsString);
return options;
}
}
return Collections.singletonMap(TIMEOUT, Long.toString(timeoutMs));
}
private String extractMappingToString(Map<String, Set<String>> map)
{
try
{
return jsonObjectMapper.writeValueAsString(map);
}
catch (JsonProcessingException e)
{
logger.error("Failed to save extractMapping as Json", e);
return null;
}
}
@Override
protected void embedInternal(NodeRef nodeRef, Map<String, Serializable> metadata, ContentReader reader, ContentWriter writer)
{

View File

@@ -0,0 +1,59 @@
/*
* #%L
* Alfresco Repository
* %%
* Copyright (C) 2005 - 2020 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
*
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.repo.content.metadata;
import org.alfresco.service.cmr.repository.NodeRef;
import org.alfresco.service.namespace.QName;
import java.util.Map;
import java.util.Set;
/**
* @deprecated as code running inside the content repository process that overrides metadata extract properties should
* be moved out of process to reduce coupling of components, making upgrade simpler.
*
* @author adavis
*/
@Deprecated
public interface MetadataExtractorPropertyMappingOverride
{
/**
* Indicates if the {@link #getExtractMapping(NodeRef)} will provide extract properties
* to override those in the T-Engine.
*
* @param sourceMimetype of the node.
* @return {@code true} if there will be override extract properties.
*/
boolean match(String sourceMimetype);
/**
* Returns the extract mapping to be passed to the T-Engine.
*
* @param nodeRef of the node having its metadata extracted.
* @return the mapping of document properties to system properties
*/
Map<String, Set<String>> getExtractMapping(NodeRef nodeRef);
}

View File

@@ -25,33 +25,33 @@
*/
package org.alfresco.repo.content.metadata;
import java.io.IOException;
import java.io.InputStream;
import org.alfresco.repo.content.MimetypeMap;
import org.alfresco.service.cmr.repository.ContentReader;
import org.alfresco.service.cmr.repository.NodeRef;
import org.alfresco.service.cmr.repository.NodeService;
import org.alfresco.service.namespace.QName;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import javax.mail.Header;
import java.io.Serializable;
import java.io.UnsupportedEncodingException;
import java.util.Arrays;
import java.util.Date;
import java.util.Enumeration;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
import javax.mail.Header;
import javax.mail.internet.InternetAddress;
import javax.mail.internet.MimeMessage;
import javax.mail.internet.MimeUtility;
import javax.mail.internet.MimeMessage.RecipientType;
import org.alfresco.repo.content.MimetypeMap;
import org.alfresco.service.cmr.repository.ContentReader;
import org.alfresco.service.namespace.QName;
/**
* @deprecated OOTB extractors are being moved to T-Engines.
* @deprecated OOTB extractors have being moved to T-Engines.
*
* Metadata extractor for RFC822 mime emails.
* This class originally provided metadata extraction of RFC822 mimetype emails. It will no longer be used for that
* purpose as that work has been off loaded to a T-Engine via the AsynchronousExtractor. It still exists because the
* governance services (RM) AMP overrides it to provide alternate property mappings and to filter out some of
* these properties if the node does not have the "record" or "dod5015record" aspects.<p>
*
* Default configuration: (see RFC822MetadataExtractor.properties)
* We still also have the Default configuration file (RFC822MetadataExtracter.properties) file which contains the
* default set of properties, which may be manipulated by RM.
*
* <pre>
* <b>messageFrom:</b> -- imap:messageFrom, cm:originator
@@ -65,143 +65,49 @@ import org.alfresco.service.namespace.QName;
* <b>Message-ID:</b> -- imap:messageId
* </pre>
*
* @author Derek Hulley
* @since 3.2
* This class now provides an alternative property mapping in the request to the T-Engine. Unlike the previous
* implementation the filtering of properties takes place before rather than after the extraction. This is done in
* this class making the code within the org.alfresco.module.org_alfresco_module_rm.email.RFC822MetadataExtracter
* filterSystemProperties method redundant.
*
* @author adavis
*/
@Deprecated
public class RFC822MetadataExtracter extends AbstractMappingMetadataExtracter
implements MetadataExtractorPropertyMappingOverride
{
static String RM_URI = "http://www.alfresco.org/model/recordsmanagement/1.0";
static String DOD_URI = "http://www.alfresco.org/model/dod5015/1.0";
protected static final String KEY_MESSAGE_FROM = "messageFrom";
protected static final String KEY_MESSAGE_TO = "messageTo";
protected static final String KEY_MESSAGE_CC = "messageCc";
protected static final String KEY_MESSAGE_SUBJECT = "messageSubject";
protected static final String KEY_MESSAGE_SENT = "messageSent";
protected static final String KEY_MESSAGE_RECEIVED = "messageReceived";
static final String RECORD = "record";
static final String DOD_5015_RECORD = "dod5015record";
public static String[] SUPPORTED_MIMETYPES = new String[] { MimetypeMap.MIMETYPE_RFC822 };
static final QName ASPECT_RECORD = QName.createQName(RM_URI, RECORD);
static final QName ASPECT_DOD_5015_RECORD = QName.createQName(DOD_URI, DOD_5015_RECORD);
private static Log logger = LogFactory.getLog(RFC822MetadataExtracter.class);
private static final HashSet<String> SUPPORTED_MIMETYPES =
new HashSet<>(Arrays.asList(new String[] { MimetypeMap.MIMETYPE_RFC822 }));
public RFC822MetadataExtracter()
{
super(new HashSet<String>(Arrays.asList(SUPPORTED_MIMETYPES)));
super(SUPPORTED_MIMETYPES);
}
private NodeService nodeService;
public void setNodeService(NodeService nodeService)
{
this.nodeService = nodeService;
}
@Override
protected Map<String, Serializable> extractRaw(ContentReader reader) throws Throwable
{
Map<String, Serializable> rawProperties = newRawMap();
InputStream is = null;
try
{
is = reader.getContentInputStream();
MimeMessage mimeMessage = new MimeMessage(null, is);
if (mimeMessage != null)
{
/**
* Extract RFC822 values that doesn't match to headers and need to be encoded.
* Or those special fields that require some code to extract data
*/
String tmp = InternetAddress.toString(mimeMessage.getFrom());
tmp = tmp != null ? MimeUtility.decodeText(tmp) : null;
putRawValue(KEY_MESSAGE_FROM, tmp, rawProperties);
tmp = InternetAddress.toString(mimeMessage.getRecipients(RecipientType.TO));
tmp = tmp != null ? MimeUtility.decodeText(tmp) : null;
putRawValue(KEY_MESSAGE_TO, tmp, rawProperties);
tmp = InternetAddress.toString(mimeMessage.getRecipients(RecipientType.CC));
tmp = tmp != null ? MimeUtility.decodeText(tmp) : null;
putRawValue(KEY_MESSAGE_CC, tmp, rawProperties);
putRawValue(KEY_MESSAGE_SENT, mimeMessage.getSentDate(), rawProperties);
/**
* Received field from RFC 822
*
* "Received" ":" ; one per relay
* ["from" domain] ; sending host
* ["by" domain] ; receiving host
* ["via" atom] ; physical path
* ("with" atom) ; link/mail protocol
* ["id" msg-id] ; receiver msg id
* ["for" addr-spec] ; initial form
* ";" date-time ; time received
*/
Date rxDate = mimeMessage.getReceivedDate();
if(rxDate != null)
{
// The email implementation extracted the received date for us.
putRawValue(KEY_MESSAGE_RECEIVED, rxDate, rawProperties);
}
else
{
// the email implementation did not parse the received date for us.
String[] rx = mimeMessage.getHeader("received");
if(rx != null && rx.length > 0)
{
String lastReceived = rx[0];
lastReceived = MimeUtility.unfold(lastReceived);
int x = lastReceived.lastIndexOf(';');
if(x > 0)
{
String dateStr = lastReceived.substring(x + 1).trim();
putRawValue(KEY_MESSAGE_RECEIVED, dateStr, rawProperties);
}
}
}
String[] subj = mimeMessage.getHeader("Subject");
if (subj != null && subj.length > 0)
{
String decodedSubject = subj[0];
try
{
decodedSubject = MimeUtility.decodeText(decodedSubject);
}
catch (UnsupportedEncodingException e)
{
logger.warn(e.toString());
}
putRawValue(KEY_MESSAGE_SUBJECT, decodedSubject, rawProperties);
}
/*
* Extract values from all header fields, including extension fields "X-"
*/
Set<String> keys = getMapping().keySet();
@SuppressWarnings("unchecked")
Enumeration<Header> headers = mimeMessage.getAllHeaders();
while (headers.hasMoreElements())
{
Header header = (Header) headers.nextElement();
if (keys.contains(header.getName()))
{
tmp = header.getValue();
tmp = tmp != null ? MimeUtility.decodeText(tmp) : null;
putRawValue(header.getName(), tmp, rawProperties);
}
}
}
}
finally
{
if (is != null)
{
try
{
is.close();
}
catch (IOException e)
{
}
}
}
// Done
return rawProperties;
logger.error("RFC822MetadataExtracter.extractRaw should not have been called, " +
"as the extraction should have taken place in a T-Engine.");
return Collections.emptyMap(); // will result in no updates.
}
/**
@@ -212,4 +118,46 @@ public class RFC822MetadataExtracter extends AbstractMappingMetadataExtracter
{
return super.getMapping();
}
@Override
public boolean match(String sourceMimetype)
{
// When RM overrides the "extracter.RFC822" bean with its own class 'this' will be a sub class.
return SUPPORTED_MIMETYPES.contains(sourceMimetype) && this.getClass() != RFC822MetadataExtracter.class;
}
@Override
// Only include system properties depending on RM / DOD aspects on this nodeRef
public Map<String, Set<String>> getExtractMapping(NodeRef nodeRef)
{
Map<String, Set<QName>> customMapping = getMapping();
HashMap<String, Set<String>> mapping = new HashMap<>(customMapping.size());
boolean isARecord = nodeService.hasAspect(nodeRef, ASPECT_RECORD);
boolean isADodRecord = nodeService.hasAspect(nodeRef, ASPECT_DOD_5015_RECORD);
for (Map.Entry<String, Set<QName>> entry : customMapping.entrySet())
{
Set<QName> customSystemProperties = entry.getValue();
HashSet<String> systemProperties = new HashSet<>(customSystemProperties.size());
String documentProperty = entry.getKey();
for (QName customSystemProperty : customSystemProperties)
{
String uri = customSystemProperty.getNamespaceURI();
boolean rmProperty = RM_URI.equals(uri);
boolean dodProperty = DOD_URI.equals(uri);
if ((rmProperty && isARecord) || (dodProperty && isADodRecord) || (!rmProperty && !dodProperty))
{
systemProperties.add(customSystemProperty.toString());
}
}
if (!systemProperties.isEmpty())
{
mapping.put(documentProperty, systemProperties);
}
}
return mapping;
}
}

View File

@@ -299,8 +299,25 @@
<property name="transactionService" ref="transactionService" />
<property name="transformServiceRegistry" ref="transformServiceRegistry" />
<property name="taggingService" ref="taggingService" />
<property name="metadataExtractorPropertyMappingOverrides">
<list>
<ref bean="extracter.RFC822" /> <!-- The RM AMP overrides this bean, extending the base class -->
</list>
</property>
</bean>
<!-- No longer used as an extractor but still extended by RM to provide additional mappings -->
<bean id="extracter.RFC822" class="org.alfresco.repo.content.metadata.RFC822MetadataExtracter" parent="baseMetadataExtracter" >
<property name="nodeService" ref="nodeService"/>
<property name="supportedDateFormats">
<list>
<value>EEE, d MMM yyyy HH:mm:ss Z</value>
<value>EEE, d MMM yy HH:mm:ss Z</value>
<value>d MMM yyyy HH:mm:ss Z</value>
</list>
</property>
</bean>
<!-- Content Transformation Regisitry -->
<bean id="contentTransformerRegistry" class="org.alfresco.repo.content.transform.ContentTransformerRegistry" >
<constructor-arg>

View File

@@ -1,12 +0,0 @@
#
# DWGMetadataExtracter - default mapping
#
# author: Nick Burch
# Namespaces
namespace.prefix.cm=http://www.alfresco.org/model/content/1.0
# Mappings
author=cm:author
title=cm:title
description=cm:description

View File

@@ -1,12 +0,0 @@
#
# HtmlMetadataExtracter - default mapping
#
# author: Derek Hulley
# Namespaces
namespace.prefix.cm=http://www.alfresco.org/model/content/1.0
# Mappings
author=cm:author
title=cm:title
description=cm:description

View File

@@ -1,12 +0,0 @@
#
# JodConverterMetadataExtracter - default mapping
#
# author: Neil McErlean
# Namespaces
namespace.prefix.cm=http://www.alfresco.org/model/content/1.0
# Mappings
author=cm:author
title=cm:title
description=cm:description

View File

@@ -1,30 +0,0 @@
#
# MP3MetadataExtracter - default mapping
#
# author: Derek Hulley
# Namespaces
namespace.prefix.cm=http://www.alfresco.org/model/content/1.0
namespace.prefix.audio=http://www.alfresco.org/model/audio/1.0
# Core mappings
author=cm:author
title=cm:title
description=cm:description
created=cm:created
# Audio descriptive mappings
xmpDM\:album=audio:album
xmpDM\:artist=audio:artist
xmpDM\:composer=audio:composer
xmpDM\:engineer=audio:engineer
xmpDM\:genre=audio:genre
xmpDM\:trackNumber=audio:trackNumber
xmpDM\:releaseDate=audio:releaseDate
#xmpDM:logComment
# Audio specific mappings
xmpDM\:audioSampleRate=audio:sampleRate
xmpDM\:audioSampleType=audio:sampleType
xmpDM\:audioChannelType=audio:channelType
xmpDM\:audioCompressor=audio:compressor

View File

@@ -1,14 +0,0 @@
#
# MailMetadataExtracter - default mapping
#
# author: Derek Hulley
# Namespaces
namespace.prefix.cm=http://www.alfresco.org/model/content/1.0
# Mappings
sentDate=cm:sentdate
originator=cm:originator, cm:author
addressee=cm:addressee
addressees=cm:addressees
subjectLine=cm:subjectline, cm:description

View File

@@ -1,14 +0,0 @@
#
# OfficeMetadataExtracter - default mapping
#
# author: Derek Hulley
# Namespaces
namespace.prefix.cm=http://www.alfresco.org/model/content/1.0
# Mappings
author=cm:author
title=cm:title
subject=cm:description
createDateTime=cm:created
lastSaveDateTime=cm:modified

View File

@@ -1,21 +0,0 @@
#
# OpenDocumentMetadataExtracter - default mapping
#
# author: Derek Hulley
# Namespaces
namespace.prefix.cm=http://www.alfresco.org/model/content/1.0
# Mappings
creationDate=cm:created
creator=cm:author
date=
description=
generator=
initialCreator=
keyword=
language=
printDate=
printedBy=
subject=cm:description
title=cm:title

View File

@@ -1,13 +0,0 @@
#
# PdfBoxMetadataExtracter - default mapping
#
# author: Derek Hulley
# Namespaces
namespace.prefix.cm=http://www.alfresco.org/model/content/1.0
# Mappings
author=cm:author
title=cm:title
subject=cm:description
created=cm:created

View File

@@ -1,13 +0,0 @@
#
# PoiMetadataExtracter - default mapping
#
# author: Neil McErlean
# Namespaces
namespace.prefix.cm=http://www.alfresco.org/model/content/1.0
# Mappings
author=cm:author
title=cm:title
description=cm:description
created=cm:created

View File

@@ -1,34 +0,0 @@
#
# TikaAudioMetadataExtracter - audio mapping
#
# This is used to map from the Tika audio metadata onto your
# content model. This will be used for any Audio content
# for which an explicit extractor isn't defined
#
# author: Nick Burch
# Namespaces
namespace.prefix.cm=http://www.alfresco.org/model/content/1.0
namespace.prefix.audio=http://www.alfresco.org/model/audio/1.0
# Core mappings
author=cm:author
title=cm:title
description=cm:description
created=cm:created
# Audio descriptive mappings
xmpDM\:album=audio:album
xmpDM\:artist=audio:artist
xmpDM\:composer=audio:composer
xmpDM\:engineer=audio:engineer
xmpDM\:genre=audio:genre
xmpDM\:trackNumber=audio:trackNumber
xmpDM\:releaseDate=audio:releaseDate
#xmpDM:logComment
# Audio specific mappings
xmpDM\:audioSampleRate=audio:sampleRate
xmpDM\:audioSampleType=audio:sampleType
xmpDM\:audioChannelType=audio:channelType
xmpDM\:audioCompressor=audio:compressor

View File

@@ -1,52 +0,0 @@
#
# TikaAutoMetadataExtracter - default mapping
#
# This is used to map from the Tika and standard namespaces
# onto your content model. This will be used for any
# content for which an explicit extractor isn't defined,
# by using Tika's auto-selection facilities.
#
# author: Nick Burch
# Namespaces
namespace.prefix.cm=http://www.alfresco.org/model/content/1.0
namespace.prefix.exif=http://www.alfresco.org/model/exif/1.0
namespace.prefix.audio=http://www.alfresco.org/model/audio/1.0
# Mappings
author=cm:author
title=cm:title
description=cm:description
created=cm:created
geo\:lat=cm:latitude
geo\:long=cm:longitude
tiff\:ImageWidth=exif:pixelXDimension
tiff\:ImageLength=exif:pixelYDimension
tiff\:Make=exif:manufacturer
tiff\:Model=exif:model
tiff\:Software=exif:software
tiff\:Orientation=exif:orientation
tiff\:XResolution=exif:xResolution
tiff\:YResolution=exif:yResolution
tiff\:ResolutionUnit=exif:resolutionUnit
exif\:Flash=exif:flash
exif\:ExposureTime=exif:exposureTime
exif\:FNumber=exif:fNumber
exif\:FocalLength=exif:focalLength
exif\:IsoSpeedRatings=exif:isoSpeedRatings
exif\:DateTimeOriginal=exif:dateTimeOriginal
xmpDM\:album=audio:album
xmpDM\:artist=audio:artist
xmpDM\:composer=audio:composer
xmpDM\:engineer=audio:engineer
xmpDM\:genre=audio:genre
xmpDM\:trackNumber=audio:trackNumber
xmpDM\:releaseDate=audio:releaseDate
#xmpDM:logComment
xmpDM\:audioSampleRate=audio:sampleRate
xmpDM\:audioSampleType=audio:sampleType
xmpDM\:audioChannelType=audio:channelType
xmpDM\:audioCompressor=audio:compressor

View File

@@ -1,20 +0,0 @@
#
# TikaSpringConfiguredMetadataExtracter.properties - default mapping
#
# This is used to map from the Tika and standard namespaces
# onto your content model. This is used for custom tika parsers,
# but one file is used across all custom parsers.
#
# author: Nick Burch
# Namespaces
namespace.prefix.cm=http://www.alfresco.org/model/content/1.0
# Mappings
author=cm:author
title=cm:title
description=cm:description
created=cm:created
geo\:lat=cm:latitude
geo\:long=cm:longitude

View File

@@ -72,7 +72,8 @@ import org.springframework.context.ApplicationContext;
org.alfresco.repo.content.transform.AppleIWorksContentTransformerTest.class,
org.alfresco.repo.content.transform.ArchiveContentTransformerTest.class,
// Metadata tests - replaced with simplified test in LocalRenditionTest and ServiceRenditionTest
// Metadata tests - replaced with simplified tests in LocalRenditionTest and ServiceRenditionTest
org.alfresco.repo.content.metadata.RFC822MetadataExtracterTest.class,
org.alfresco.repo.content.metadata.MappingMetadataExtracterTest.class,
// ----------------------------------------------------------------------

View File

@@ -0,0 +1,159 @@
/*
* #%L
* Alfresco Repository
* %%
* Copyright (C) 2005 - 2020 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
*
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.repo.content.metadata;
import org.alfresco.repo.content.MimetypeMap;
import org.alfresco.service.cmr.repository.NodeRef;
import org.alfresco.service.cmr.repository.NodeService;
import org.alfresco.service.namespace.QName;
import org.mockito.Mock;
import org.mockito.MockitoAnnotations;
import java.io.Serializable;
import java.util.Collections;
import java.util.Map;
import java.util.Properties;
import java.util.Set;
import java.util.StringJoiner;
import java.util.concurrent.atomic.AtomicInteger;
import static org.alfresco.repo.content.metadata.RFC822MetadataExtracter.ASPECT_DOD_5015_RECORD;
import static org.alfresco.repo.content.metadata.RFC822MetadataExtracter.ASPECT_RECORD;
import static org.alfresco.repo.content.metadata.RFC822MetadataExtracter.DOD_URI;
import static org.alfresco.repo.content.metadata.RFC822MetadataExtracter.RM_URI;
import static org.alfresco.service.namespace.NamespaceService.CONTENT_MODEL_1_0_URI;
import static org.mockito.Mockito.when;
/**
* Test the ability of RFC822MetadataExtracter when overridden by RM, to control which properties are extracted
* from T-Engines. RFC822MetadataExtracter no longer extracts.
*
* @author adavis
*/
//@RunWith(MockitoJUnitRunner.class)
public class RFC822MetadataExtracterTest extends AbstractMetadataExtracterTest
{
private RFC822MetadataExtracter extracter;
private RFC822MetadataExtracter rmExtracter;
@Mock private NodeService mockNodeService;
private NodeRef nodeRefWithDodRecord = new NodeRef("workspace://spacesStore/test-dod");
private NodeRef nodeRefWithRecord = new NodeRef("workspace://spacesStore/test-rm");
private NodeRef nodeRefWithBoth = new NodeRef("workspace://spacesStore/test-both");
private NodeRef nodeRefWithNeither = new NodeRef("workspace://spacesStore/test-neither");
private static final QName MESSAGE_FROM_TEST_PROPERTY =
QName.createQName("MessageToTest");
private static final QName MESSAGE_TO_TEST_PROPERTY =
QName.createQName("MessageFromTest");
private static final QName MESSAGE_CC_TEST_PROPERTY =
QName.createQName("MessageCCTest");
@Override
public void setUp() throws Exception
{
super.setUp();
extracter = (RFC822MetadataExtracter) ctx.getBean("extracter.RFC822");
MockitoAnnotations.initMocks(this);
when(mockNodeService.hasAspect(nodeRefWithDodRecord, ASPECT_DOD_5015_RECORD)).thenReturn(true);
when(mockNodeService.hasAspect(nodeRefWithRecord, ASPECT_RECORD)).thenReturn(true);
when(mockNodeService.hasAspect(nodeRefWithBoth, ASPECT_DOD_5015_RECORD)).thenReturn(true);
when(mockNodeService.hasAspect(nodeRefWithBoth, ASPECT_RECORD)).thenReturn(true);
rmExtracter = new RFC822MetadataExtracter()
{
@Override
// Needed so the init method runs.
protected Map<String, Set<QName>> getDefaultMapping()
{
return Collections.emptyMap();
}
};
rmExtracter.setNodeService(mockNodeService);
rmExtracter.init();
}
@Override
protected MetadataExtracter getExtracter()
{
return extracter;
}
@Override
protected void testFileSpecificMetadata(String mimetype, Map<QName, Serializable> properties)
{
// ignore as this is no longer an extractor
}
public void testMatch()
{
assertFalse("Normal class should never match", extracter.match(MimetypeMap.MIMETYPE_RFC822));
assertTrue("RM class should match with correct type", rmExtracter.match(MimetypeMap.MIMETYPE_RFC822));
assertFalse("RM class should not match with other types", rmExtracter.match(MimetypeMap.MIMETYPE_PDF));
}
public void testGetExtractMapping()
{
Properties properties = new Properties();
properties.put("namespace.prefix.rm", RM_URI);
properties.put("namespace.prefix.dod", DOD_URI);
properties.put("namespace.prefix.cm", CONTENT_MODEL_1_0_URI);
properties.put("a", "cm:a");
properties.put("b", "rm:b, dod:b");
properties.put("c", "rm:c");
properties.put("d", "cm:d, rm:d1, rm:d2");
rmExtracter.setMappingProperties(properties);
assertEquals("No properties should have been removed", 7, countSystemProperties(nodeRefWithBoth));
assertEquals("The 1 dod and 4 record properties should have been removed", 2, countSystemProperties(nodeRefWithNeither));
assertEquals("The 4 record properties should have been removed", 3, countSystemProperties(nodeRefWithDodRecord));
assertEquals("The 1 dod property should have been removed", 6, countSystemProperties(nodeRefWithRecord));
// Check that we have the fully qualified version as the T-Engine know nothing about the repo's prefixes.
// Check just one of them.
assertEquals("{http://www.alfresco.org/model/content/1.0}d, " +
"{http://www.alfresco.org/model/content/1.0}a, " +
"{http://www.alfresco.org/model/dod5015/1.0}b", getSystemProperties(nodeRefWithDodRecord));
}
private int countSystemProperties(NodeRef nodeRef)
{
Map<String, Set<String>> extractMapping = rmExtracter.getExtractMapping(nodeRef);
AtomicInteger count = new AtomicInteger();
extractMapping.forEach((k,v) -> count.addAndGet(v.size()));
return count.get();
}
private String getSystemProperties(NodeRef nodeRef)
{
Map<String, Set<String>> extractMapping = rmExtracter.getExtractMapping(nodeRef);
StringJoiner sj = new StringJoiner(", ");
extractMapping.forEach((k,v) -> v.forEach(p -> sj.add(p.toString())));
return sj.toString();
}
}