From 33f37731c7f282a03f7d9557c73b6157c25a5498 Mon Sep 17 00:00:00 2001 From: Alan Davis Date: Thu, 19 Nov 2020 17:04:52 +0000 Subject: [PATCH] REPO-5219 Allow AGS AMP to specify metadata extract mapping (REPO) (#169) Added an optional extractMapping transform option to all metadata extractors to override the default one in the T-Engine. In the case of the AGS AMP it extends the RFC822MetadataExtracter with its own class to specify a different set of document to system mappings. The class in the repo no longer does extractions, but is now used by the AsynchronousExtractor, which offloads extractions to T-Engines to obtain the mappings if it has been extended that are then passed to the T-Engine. --- l10n.properties | 2 +- .../metadata/AsynchronousExtractor.java | 49 +++- ...adataExtractorPropertyMappingOverride.java | 59 +++++ .../metadata/RFC822MetadataExtracter.java | 230 +++++++----------- .../alfresco/content-services-context.xml | 17 ++ .../metadata/DWGMetadataExtracter.properties | 12 - .../metadata/HtmlMetadataExtracter.properties | 12 - .../JodConverterMetadataExtracter.properties | 12 - .../metadata/MP3MetadataExtracter.properties | 30 --- .../metadata/MailMetadataExtracter.properties | 14 -- .../OfficeMetadataExtracter.properties | 14 -- .../OpenDocumentMetadataExtracter.properties | 21 -- .../PdfBoxMetadataExtracter.properties | 13 - .../metadata/PoiMetadataExtracter.properties | 13 - .../TikaAudioMetadataExtracter.properties | 34 --- .../TikaAutoMetadataExtracter.properties | 52 ---- ...ringConfiguredMetadataExtracter.properties | 20 -- .../org/alfresco/MiscContextTestSuite.java | 3 +- .../metadata/RFC822MetadataExtracterTest.java | 159 ++++++++++++ 19 files changed, 373 insertions(+), 393 deletions(-) create mode 100644 repository/src/main/java/org/alfresco/repo/content/metadata/MetadataExtractorPropertyMappingOverride.java delete mode 100644 repository/src/main/resources/alfresco/metadata/DWGMetadataExtracter.properties delete mode 100644 repository/src/main/resources/alfresco/metadata/HtmlMetadataExtracter.properties delete mode 100644 repository/src/main/resources/alfresco/metadata/JodConverterMetadataExtracter.properties delete mode 100644 repository/src/main/resources/alfresco/metadata/MP3MetadataExtracter.properties delete mode 100644 repository/src/main/resources/alfresco/metadata/MailMetadataExtracter.properties delete mode 100644 repository/src/main/resources/alfresco/metadata/OfficeMetadataExtracter.properties delete mode 100644 repository/src/main/resources/alfresco/metadata/OpenDocumentMetadataExtracter.properties delete mode 100644 repository/src/main/resources/alfresco/metadata/PdfBoxMetadataExtracter.properties delete mode 100644 repository/src/main/resources/alfresco/metadata/PoiMetadataExtracter.properties delete mode 100644 repository/src/main/resources/alfresco/metadata/TikaAudioMetadataExtracter.properties delete mode 100644 repository/src/main/resources/alfresco/metadata/TikaAutoMetadataExtracter.properties delete mode 100644 repository/src/main/resources/alfresco/metadata/TikaSpringConfiguredMetadataExtracter.properties create mode 100644 repository/src/test/java/org/alfresco/repo/content/metadata/RFC822MetadataExtracterTest.java diff --git a/l10n.properties b/l10n.properties index 807adc7fd8..3ec4175c35 100644 --- a/l10n.properties +++ b/l10n.properties @@ -2,4 +2,4 @@ MESSAGE_SEARCH_PATH="data-model/src/main/resources/alfresco/messages/dictionary-model*.properties remote-api/src/main/resources/alfresco/messages/admin-console*.properties remote-api/src/main/resources/alfresco/messages/custommodel-restapi-messages*.properties remote-api/src/main/resources/alfresco/messages/rest-framework-messages*.properties remote-api/src/main/resources/alfresco/templates/webscripts/org/alfresco/repository/admin/admin-communitysummary.get*.properties remote-api/src/main/resources/alfresco/templates/webscripts/org/alfresco/repository/admin/consoles/admin-repoconsole.get*.properties remote-api/src/main/resources/alfresco/templates/webscripts/org/alfresco/repository/admin/consoles/admin-tenantconsole.get*.properties remote-api/src/main/resources/alfresco/templates/webscripts/org/alfresco/repository/admin/consoles/admin-workflowconsole.get*.properties remote-api/src/main/resources/alfresco/templates/webscripts/org/alfresco/repository/admin/support-tools/admin-nodebrowser.get*.properties remote-api/src/main/resources/alfresco/templates/webscripts/org/alfresco/repository/audit/entry*.properties remote-api/src/main/resources/alfresco/templates/webscripts/org/alfresco/repository/blogs/post/blog-post.delete*.properties remote-api/src/main/resources/alfresco/templates/webscripts/org/alfresco/repository/discussions/posts/forum-post.delete*.properties remote-api/src/main/resources/alfresco/templates/webscripts/org/alfresco/repository/links/links-delete.post*.properties remote-api/src/main/resources/alfresco/templates/webscripts/org/alfresco/repository/links/links.post*.properties remote-api/src/main/resources/alfresco/templates/webscripts/org/alfresco/repository/links/links.put*.properties remote-api/src/main/resources/alfresco/templates/webscripts/org/alfresco/repository/person/user-csv-upload.post*.properties remote-api/src/main/resources/alfresco/templates/webscripts/org/alfresco/slingshot/calendar/event.get*.properties remote-api/src/main/resources/alfresco/templates/webscripts/org/alfresco/slingshot/calendar/event.post*.properties remote-api/src/main/resources/alfresco/templates/webscripts/org/alfresco/slingshot/calendar/event.put*.properties repository/src/main/resources/alfresco/messages/action-config*.properties repository/src/main/resources/alfresco/messages/action-service*.properties repository/src/main/resources/alfresco/messages/activiti-engine-messages*.properties repository/src/main/resources/alfresco/messages/activities-service*.properties repository/src/main/resources/alfresco/messages/activity-list*.properties repository/src/main/resources/alfresco/messages/application-model*.properties repository/src/main/resources/alfresco/messages/authentication*.properties repository/src/main/resources/alfresco/messages/bootstrap-content-template-examples*.properties repository/src/main/resources/alfresco/messages/bootstrap-example-javascripts*.properties repository/src/main/resources/alfresco/messages/bootstrap-example-smartfoldertemplates*.properties repository/src/main/resources/alfresco/messages/bootstrap-imapScripts*.properties repository/src/main/resources/alfresco/messages/bootstrap-javascripts*.properties repository/src/main/resources/alfresco/messages/bootstrap-messages*.properties repository/src/main/resources/alfresco/messages/bootstrap-readme-template*.properties repository/src/main/resources/alfresco/messages/bootstrap-spaces*.properties repository/src/main/resources/alfresco/messages/bootstrap-templates*.properties repository/src/main/resources/alfresco/messages/bootstrap-tutorial*.properties repository/src/main/resources/alfresco/messages/bootstrap-webScripts*.properties repository/src/main/resources/alfresco/messages/bootstrap-webScriptsExtensions*.properties repository/src/main/resources/alfresco/messages/bpm-messages*.properties repository/src/main/resources/alfresco/messages/categories*.properties repository/src/main/resources/alfresco/messages/coci-service*.properties repository/src/main/resources/alfresco/messages/content-filter-languages*.properties repository/src/main/resources/alfresco/messages/content-model*.properties repository/src/main/resources/alfresco/messages/copy-service*.properties repository/src/main/resources/alfresco/messages/custommodel-service*.properties repository/src/main/resources/alfresco/messages/discussion-messages*.properties repository/src/main/resources/alfresco/messages/distributionpolicies-model*.properties repository/src/main/resources/alfresco/messages/doclink-service*.properties repository/src/main/resources/alfresco/messages/download-model*.properties repository/src/main/resources/alfresco/messages/email-server-model*.properties repository/src/main/resources/alfresco/messages/email-service*.properties repository/src/main/resources/alfresco/messages/file-folder-service*.properties repository/src/main/resources/alfresco/messages/form-service*.properties repository/src/main/resources/alfresco/messages/forum-model*.properties repository/src/main/resources/alfresco/messages/imap-service*.properties repository/src/main/resources/alfresco/messages/initiate-inplace*.properties repository/src/main/resources/alfresco/messages/invitation-service*.properties repository/src/main/resources/alfresco/messages/lock-service*.properties repository/src/main/resources/alfresco/messages/notification-service*.properties repository/src/main/resources/alfresco/messages/period-provider*.properties repository/src/main/resources/alfresco/messages/permissions-service*.properties repository/src/main/resources/alfresco/messages/quickshare-service*.properties repository/src/main/resources/alfresco/messages/rendition-config*.properties repository/src/main/resources/alfresco/messages/replication*.properties repository/src/main/resources/alfresco/messages/repoadmin-service*.properties repository/src/main/resources/alfresco/messages/reset-password-messages*.properties repository/src/main/resources/alfresco/messages/rule-config*.properties repository/src/main/resources/alfresco/messages/site-model*.properties repository/src/main/resources/alfresco/messages/site-service*.properties repository/src/main/resources/alfresco/messages/slingshot*.properties repository/src/main/resources/alfresco/messages/smartfolder-model*.properties repository/src/main/resources/alfresco/messages/subscription-service*.properties repository/src/main/resources/alfresco/messages/system-messages*.properties repository/src/main/resources/alfresco/messages/system-model*.properties repository/src/main/resources/alfresco/messages/template-service*.properties repository/src/main/resources/alfresco/messages/templates-messages*.properties repository/src/main/resources/alfresco/messages/transfer-model*.properties repository/src/main/resources/alfresco/messages/transfer-service*.properties repository/src/main/resources/alfresco/messages/ui-inplace*.properties repository/src/main/resources/alfresco/messages/webdav-messages*.properties repository/src/main/resources/alfresco/messages/workflow-package-messages*.properties repository/src/main/resources/alfresco/workflow/invitation-moderated-workflow-messages*.properties repository/src/main/resources/alfresco/workflow/invitation-nominated-workflow-messages*.properties repository/src/main/resources/alfresco/workflow/workflow-messages*.properties" -EXCLUDED_FILES="data-model/src/main/resources/alfresco/messages/dictionary-messages.properties data-model/src/main/resources/alfresco/model/dataTypeAnalyzers.properties data-model/src/main/resources/alfresco/model/dataTypeAnalyzers_cs.properties data-model/src/main/resources/alfresco/model/dataTypeAnalyzers_da.properties data-model/src/main/resources/alfresco/model/dataTypeAnalyzers_de.properties data-model/src/main/resources/alfresco/model/dataTypeAnalyzers_el.properties data-model/src/main/resources/alfresco/model/dataTypeAnalyzers_en.properties data-model/src/main/resources/alfresco/model/dataTypeAnalyzers_es.properties data-model/src/main/resources/alfresco/model/dataTypeAnalyzers_fi.properties data-model/src/main/resources/alfresco/model/dataTypeAnalyzers_fr.properties data-model/src/main/resources/alfresco/model/dataTypeAnalyzers_it.properties data-model/src/main/resources/alfresco/model/dataTypeAnalyzers_ja.properties data-model/src/main/resources/alfresco/model/dataTypeAnalyzers_ko.properties data-model/src/main/resources/alfresco/model/dataTypeAnalyzers_nl.properties data-model/src/main/resources/alfresco/model/dataTypeAnalyzers_no.properties data-model/src/main/resources/alfresco/model/dataTypeAnalyzers_pt.properties data-model/src/main/resources/alfresco/model/dataTypeAnalyzers_pt_BR.properties data-model/src/main/resources/alfresco/model/dataTypeAnalyzers_ru.properties data-model/src/main/resources/alfresco/model/dataTypeAnalyzers_sv.properties data-model/src/main/resources/alfresco/model/dataTypeAnalyzers_th.properties data-model/src/main/resources/alfresco/model/dataTypeAnalyzers_zh.properties remote-api/src/main/resources/alfresco/templates/webscripts/org/alfresco/repository/audit/control.properties repository/src/main/resources/alfresco/messages/content-service.properties repository/src/main/resources/alfresco/messages/module-messages.properties repository/src/main/resources/alfresco/messages/patch-service.properties repository/src/main/resources/alfresco/messages/repoadmin-interpreter-help.properties repository/src/main/resources/alfresco/messages/schema-update.properties repository/src/main/resources/alfresco/messages/tenant-interpreter-help.properties repository/src/main/resources/alfresco/messages/version-service.properties repository/src/main/resources/alfresco/messages/workflow-interpreter-help.properties repository/src/main/resources/alfresco/alfresco-shared.properties repository/src/main/resources/alfresco/caches.properties repository/src/main/resources/alfresco/repository.properties repository/src/main/resources/alfresco/client/config/repo-clients-apps.properties repository/src/main/resources/alfresco/domain/cache-strategies.properties repository/src/main/resources/alfresco/domain/hibernate-cfg.properties repository/src/main/resources/alfresco/domain/quartz.properties repository/src/main/resources/alfresco/domain/transaction.properties repository/src/main/resources/alfresco/keystore/keystore-passwords.properties repository/src/main/resources/alfresco/keystore/ssl-keystore-passwords.properties repository/src/main/resources/alfresco/keystore/ssl-truststore-passwords.properties repository/src/main/resources/alfresco/metadata/DWGMetadataExtracter.properties repository/src/main/resources/alfresco/metadata/HtmlMetadataExtracter.properties repository/src/main/resources/alfresco/metadata/MailMetadataExtracter.properties repository/src/main/resources/alfresco/metadata/MP3MetadataExtracter.properties repository/src/main/resources/alfresco/metadata/OfficeMetadataExtracter.properties repository/src/main/resources/alfresco/metadata/PdfBoxMetadataExtracter.properties repository/src/main/resources/alfresco/metadata/PoiMetadataExtracter.properties repository/src/main/resources/alfresco/metadata/RFC822MetadataExtracter.properties repository/src/main/resources/alfresco/metadata/TikaAudioMetadataExtracter.properties repository/src/main/resources/alfresco/metadata/TikaAutoMetadataExtracter.properties repository/src/main/resources/alfresco/metadata/TikaSpringConfiguredMetadataExtracter.properties repository/src/main/resources/alfresco/subsystems/ActivitiesFeed/default/activities-jobs.properties repository/src/main/resources/alfresco/subsystems/Authentication/alfrescoNtlm/alfresco-authentication.properties repository/src/main/resources/alfresco/subsystems/Authentication/external/external-authentication.properties repository/src/main/resources/alfresco/subsystems/Authentication/kerberos/kerberos-authentication.properties repository/src/main/resources/alfresco/subsystems/Authentication/ldap/ldap-authentication.properties repository/src/main/resources/alfresco/subsystems/Authentication/ldap-ad/ldap-ad-authentication.properties repository/src/main/resources/alfresco/subsystems/email/InboundSMTP/inboundSMTP.properties repository/src/main/resources/alfresco/subsystems/email/OutboundSMTP/outboundSMTP.properties repository/src/main/resources/alfresco/subsystems/fileServers/default/file-servers.properties repository/src/main/resources/alfresco/subsystems/imap/default/imap-server.properties repository/src/main/resources/alfresco/subsystems/Replication/default/replication.properties repository/src/main/resources/alfresco/subsystems/Search/noindex/common-search.properties repository/src/main/resources/alfresco/subsystems/Search/noindex/noindex-search.properties repository/src/main/resources/alfresco/subsystems/Search/solr/common-search.properties repository/src/main/resources/alfresco/subsystems/Search/solr/solr-backup.properties repository/src/main/resources/alfresco/subsystems/Search/solr/solr-search.properties repository/src/main/resources/alfresco/subsystems/Search/solr/facet/solr-facets-config.properties repository/src/main/resources/alfresco/subsystems/Search/solr4/common-search.properties repository/src/main/resources/alfresco/subsystems/Search/solr4/solr-backup.properties repository/src/main/resources/alfresco/subsystems/Search/solr4/solr-search.properties repository/src/main/resources/alfresco/subsystems/Search/solr6/common-search.properties repository/src/main/resources/alfresco/subsystems/Search/solr6/solr-backup.properties repository/src/main/resources/alfresco/subsystems/Search/solr6/solr-search.properties repository/src/main/resources/alfresco/subsystems/Subscriptions/default/subscription-service.properties repository/src/main/resources/alfresco/subsystems/Synchronization/default/default-synchronization.properties repository/src/main/resources/alfresco/subsystems/sysAdmin/default/sysadmin-parameter.properties repository/src/main/resources/alfresco/subsystems/thirdparty/default/alfresco-pdf-renderer-transform.properties repository/src/main/resources/alfresco/subsystems/thirdparty/default/imagemagick-transform.properties repository/src/main/resources/alfresco/subsystems/Transformers/default/transformers.properties repository/src/main/resources/org/alfresco/encryption/keystore-parameters.properties repository/src/main/resources/org/alfresco/repo/i18n/testMessages.properties repository/src/main/resources/org/alfresco/repo/module/tool/default-file-mapping.properties repository/src/main/resources/alfresco/metadata/JodConverterMetadataExtracter.properties repository/src/main/resources/alfresco/subsystems/OOoJodconverter/default/jodconverter.properties" +EXCLUDED_FILES="data-model/src/main/resources/alfresco/messages/dictionary-messages.properties data-model/src/main/resources/alfresco/model/dataTypeAnalyzers.properties data-model/src/main/resources/alfresco/model/dataTypeAnalyzers_cs.properties data-model/src/main/resources/alfresco/model/dataTypeAnalyzers_da.properties data-model/src/main/resources/alfresco/model/dataTypeAnalyzers_de.properties data-model/src/main/resources/alfresco/model/dataTypeAnalyzers_el.properties data-model/src/main/resources/alfresco/model/dataTypeAnalyzers_en.properties data-model/src/main/resources/alfresco/model/dataTypeAnalyzers_es.properties data-model/src/main/resources/alfresco/model/dataTypeAnalyzers_fi.properties data-model/src/main/resources/alfresco/model/dataTypeAnalyzers_fr.properties data-model/src/main/resources/alfresco/model/dataTypeAnalyzers_it.properties data-model/src/main/resources/alfresco/model/dataTypeAnalyzers_ja.properties data-model/src/main/resources/alfresco/model/dataTypeAnalyzers_ko.properties data-model/src/main/resources/alfresco/model/dataTypeAnalyzers_nl.properties data-model/src/main/resources/alfresco/model/dataTypeAnalyzers_no.properties data-model/src/main/resources/alfresco/model/dataTypeAnalyzers_pt.properties data-model/src/main/resources/alfresco/model/dataTypeAnalyzers_pt_BR.properties data-model/src/main/resources/alfresco/model/dataTypeAnalyzers_ru.properties data-model/src/main/resources/alfresco/model/dataTypeAnalyzers_sv.properties data-model/src/main/resources/alfresco/model/dataTypeAnalyzers_th.properties data-model/src/main/resources/alfresco/model/dataTypeAnalyzers_zh.properties remote-api/src/main/resources/alfresco/templates/webscripts/org/alfresco/repository/audit/control.properties repository/src/main/resources/alfresco/messages/content-service.properties repository/src/main/resources/alfresco/messages/module-messages.properties repository/src/main/resources/alfresco/messages/patch-service.properties repository/src/main/resources/alfresco/messages/repoadmin-interpreter-help.properties repository/src/main/resources/alfresco/messages/schema-update.properties repository/src/main/resources/alfresco/messages/tenant-interpreter-help.properties repository/src/main/resources/alfresco/messages/version-service.properties repository/src/main/resources/alfresco/messages/workflow-interpreter-help.properties repository/src/main/resources/alfresco/alfresco-shared.properties repository/src/main/resources/alfresco/caches.properties repository/src/main/resources/alfresco/repository.properties repository/src/main/resources/alfresco/client/config/repo-clients-apps.properties repository/src/main/resources/alfresco/domain/cache-strategies.properties repository/src/main/resources/alfresco/domain/hibernate-cfg.properties repository/src/main/resources/alfresco/domain/quartz.properties repository/src/main/resources/alfresco/domain/transaction.properties repository/src/main/resources/alfresco/keystore/keystore-passwords.properties repository/src/main/resources/alfresco/keystore/ssl-keystore-passwords.properties repository/src/main/resources/alfresco/keystore/ssl-truststore-passwords.properties repository/src/main/resources/alfresco/metadata/RFC822MetadataExtracter.properties repository/src/main/resources/alfresco/subsystems/ActivitiesFeed/default/activities-jobs.properties repository/src/main/resources/alfresco/subsystems/Authentication/alfrescoNtlm/alfresco-authentication.properties repository/src/main/resources/alfresco/subsystems/Authentication/external/external-authentication.properties repository/src/main/resources/alfresco/subsystems/Authentication/kerberos/kerberos-authentication.properties repository/src/main/resources/alfresco/subsystems/Authentication/ldap/ldap-authentication.properties repository/src/main/resources/alfresco/subsystems/Authentication/ldap-ad/ldap-ad-authentication.properties repository/src/main/resources/alfresco/subsystems/email/InboundSMTP/inboundSMTP.properties repository/src/main/resources/alfresco/subsystems/email/OutboundSMTP/outboundSMTP.properties repository/src/main/resources/alfresco/subsystems/fileServers/default/file-servers.properties repository/src/main/resources/alfresco/subsystems/imap/default/imap-server.properties repository/src/main/resources/alfresco/subsystems/Replication/default/replication.properties repository/src/main/resources/alfresco/subsystems/Search/noindex/common-search.properties repository/src/main/resources/alfresco/subsystems/Search/noindex/noindex-search.properties repository/src/main/resources/alfresco/subsystems/Search/solr/common-search.properties repository/src/main/resources/alfresco/subsystems/Search/solr/solr-backup.properties repository/src/main/resources/alfresco/subsystems/Search/solr/solr-search.properties repository/src/main/resources/alfresco/subsystems/Search/solr/facet/solr-facets-config.properties repository/src/main/resources/alfresco/subsystems/Search/solr4/common-search.properties repository/src/main/resources/alfresco/subsystems/Search/solr4/solr-backup.properties repository/src/main/resources/alfresco/subsystems/Search/solr4/solr-search.properties repository/src/main/resources/alfresco/subsystems/Search/solr6/common-search.properties repository/src/main/resources/alfresco/subsystems/Search/solr6/solr-backup.properties repository/src/main/resources/alfresco/subsystems/Search/solr6/solr-search.properties repository/src/main/resources/alfresco/subsystems/Subscriptions/default/subscription-service.properties repository/src/main/resources/alfresco/subsystems/Synchronization/default/default-synchronization.properties repository/src/main/resources/alfresco/subsystems/sysAdmin/default/sysadmin-parameter.properties repository/src/main/resources/alfresco/subsystems/thirdparty/default/alfresco-pdf-renderer-transform.properties repository/src/main/resources/alfresco/subsystems/thirdparty/default/imagemagick-transform.properties repository/src/main/resources/alfresco/subsystems/Transformers/default/transformers.properties repository/src/main/resources/org/alfresco/encryption/keystore-parameters.properties repository/src/main/resources/org/alfresco/repo/i18n/testMessages.properties repository/src/main/resources/org/alfresco/repo/module/tool/default-file-mapping.properties repository/src/main/resources/alfresco/subsystems/OOoJodconverter/default/jodconverter.properties" diff --git a/repository/src/main/java/org/alfresco/repo/content/metadata/AsynchronousExtractor.java b/repository/src/main/java/org/alfresco/repo/content/metadata/AsynchronousExtractor.java index efce5ae2d7..bafc147676 100644 --- a/repository/src/main/java/org/alfresco/repo/content/metadata/AsynchronousExtractor.java +++ b/repository/src/main/java/org/alfresco/repo/content/metadata/AsynchronousExtractor.java @@ -89,6 +89,7 @@ public class AsynchronousExtractor extends AbstractMappingMetadataExtracter private static final String EMBED = "embed"; private static final String MIMETYPE_METADATA_EXTRACT = "alfresco-metadata-extract"; private static final String MIMETYPE_METADATA_EMBED = "alfresco-metadata-embed"; + private static final String EXTRACT_MAPPING = "extractMapping"; private static final String METADATA = "metadata"; private static final Map EMPTY_METADATA = Collections.emptyMap(); @@ -102,6 +103,7 @@ public class AsynchronousExtractor extends AbstractMappingMetadataExtracter private TransactionService transactionService; private TransformServiceRegistry transformServiceRegistry; private TaggingService taggingService; + private List metadataExtractorPropertyMappingOverrides = Collections.emptyList(); public void setNodeService(NodeService nodeService) { @@ -143,6 +145,11 @@ public class AsynchronousExtractor extends AbstractMappingMetadataExtracter this.taggingService = taggingService; } + public void setMetadataExtractorPropertyMappingOverrides(List metadataExtractorPropertyMappingOverrides) + { + this.metadataExtractorPropertyMappingOverrides = metadataExtractorPropertyMappingOverrides; + } + @Override protected Map> getDefaultMapping() { @@ -223,7 +230,7 @@ public class AsynchronousExtractor extends AbstractMappingMetadataExtracter } @Override - // Not called. Overloaded method with the NodeRef is called. + // Not called. extractRawInThread is called. protected Map extractRaw(ContentReader reader) { return null; @@ -233,12 +240,48 @@ public class AsynchronousExtractor extends AbstractMappingMetadataExtracter protected Map extractRawInThread(NodeRef nodeRef, ContentReader reader, MetadataExtracterLimits limits) throws Throwable { - long timeoutMs = limits.getTimeoutMs(); - Map options = Collections.singletonMap(TIMEOUT, Long.toString(timeoutMs)); + Map options = getExtractOptions(nodeRef, reader, limits); transformInBackground(nodeRef, reader, MIMETYPE_METADATA_EXTRACT, EXTRACT, options); return EMPTY_METADATA; } + private Map getExtractOptions(NodeRef nodeRef, ContentReader reader, MetadataExtracterLimits limits) + { + long timeoutMs = limits.getTimeoutMs(); + + // This is to allow the AGS (RM) AMP to specify the mapping of properties from the repository + // rather than doing it out of process in the T-Engine. + String sourceMimetype = reader.getMimetype(); + for (MetadataExtractorPropertyMappingOverride override : metadataExtractorPropertyMappingOverrides) + { + if (override.match(sourceMimetype)) + { + Map> extractMapping = override.getExtractMapping(nodeRef); + String extractMappingAsString = extractMappingToString(extractMapping); + + Map options = new HashMap<>(2); + options.put(TIMEOUT, Long.toString(timeoutMs)); + options.put(EXTRACT_MAPPING, extractMappingAsString); + return options; + } + } + + return Collections.singletonMap(TIMEOUT, Long.toString(timeoutMs)); + } + + private String extractMappingToString(Map> map) + { + try + { + return jsonObjectMapper.writeValueAsString(map); + } + catch (JsonProcessingException e) + { + logger.error("Failed to save extractMapping as Json", e); + return null; + } + } + @Override protected void embedInternal(NodeRef nodeRef, Map metadata, ContentReader reader, ContentWriter writer) { diff --git a/repository/src/main/java/org/alfresco/repo/content/metadata/MetadataExtractorPropertyMappingOverride.java b/repository/src/main/java/org/alfresco/repo/content/metadata/MetadataExtractorPropertyMappingOverride.java new file mode 100644 index 0000000000..85f37c25d6 --- /dev/null +++ b/repository/src/main/java/org/alfresco/repo/content/metadata/MetadataExtractorPropertyMappingOverride.java @@ -0,0 +1,59 @@ +/* + * #%L + * Alfresco Repository + * %% + * Copyright (C) 2005 - 2020 Alfresco Software Limited + * %% + * This file is part of the Alfresco software. + * If the software was purchased under a paid Alfresco license, the terms of + * the paid license agreement will prevail. Otherwise, the software is + * provided under the following open source license terms: + * + * Alfresco is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Alfresco is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with Alfresco. If not, see . + * #L% + */ +package org.alfresco.repo.content.metadata; + +import org.alfresco.service.cmr.repository.NodeRef; +import org.alfresco.service.namespace.QName; + +import java.util.Map; +import java.util.Set; + +/** + * @deprecated as code running inside the content repository process that overrides metadata extract properties should + * be moved out of process to reduce coupling of components, making upgrade simpler. + * + * @author adavis + */ +@Deprecated +public interface MetadataExtractorPropertyMappingOverride +{ + /** + * Indicates if the {@link #getExtractMapping(NodeRef)} will provide extract properties + * to override those in the T-Engine. + * + * @param sourceMimetype of the node. + * @return {@code true} if there will be override extract properties. + */ + boolean match(String sourceMimetype); + + /** + * Returns the extract mapping to be passed to the T-Engine. + * + * @param nodeRef of the node having its metadata extracted. + * @return the mapping of document properties to system properties + */ + Map> getExtractMapping(NodeRef nodeRef); +} diff --git a/repository/src/main/java/org/alfresco/repo/content/metadata/RFC822MetadataExtracter.java b/repository/src/main/java/org/alfresco/repo/content/metadata/RFC822MetadataExtracter.java index 5c38acfebe..119d3b18a1 100644 --- a/repository/src/main/java/org/alfresco/repo/content/metadata/RFC822MetadataExtracter.java +++ b/repository/src/main/java/org/alfresco/repo/content/metadata/RFC822MetadataExtracter.java @@ -25,33 +25,33 @@ */ package org.alfresco.repo.content.metadata; -import java.io.IOException; -import java.io.InputStream; +import org.alfresco.repo.content.MimetypeMap; +import org.alfresco.service.cmr.repository.ContentReader; +import org.alfresco.service.cmr.repository.NodeRef; +import org.alfresco.service.cmr.repository.NodeService; +import org.alfresco.service.namespace.QName; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; + +import javax.mail.Header; import java.io.Serializable; -import java.io.UnsupportedEncodingException; import java.util.Arrays; -import java.util.Date; -import java.util.Enumeration; +import java.util.Collections; +import java.util.HashMap; import java.util.HashSet; import java.util.Map; import java.util.Set; -import javax.mail.Header; -import javax.mail.internet.InternetAddress; -import javax.mail.internet.MimeMessage; -import javax.mail.internet.MimeUtility; -import javax.mail.internet.MimeMessage.RecipientType; - -import org.alfresco.repo.content.MimetypeMap; -import org.alfresco.service.cmr.repository.ContentReader; -import org.alfresco.service.namespace.QName; - /** - * @deprecated OOTB extractors are being moved to T-Engines. + * @deprecated OOTB extractors have being moved to T-Engines. * - * Metadata extractor for RFC822 mime emails. + * This class originally provided metadata extraction of RFC822 mimetype emails. It will no longer be used for that + * purpose as that work has been off loaded to a T-Engine via the AsynchronousExtractor. It still exists because the + * governance services (RM) AMP overrides it to provide alternate property mappings and to filter out some of + * these properties if the node does not have the "record" or "dod5015record" aspects.

* - * Default configuration: (see RFC822MetadataExtractor.properties) + * We still also have the Default configuration file (RFC822MetadataExtracter.properties) file which contains the + * default set of properties, which may be manipulated by RM. * *

  *   messageFrom:              --      imap:messageFrom, cm:originator
@@ -65,143 +65,49 @@ import org.alfresco.service.namespace.QName;
  *      Message-ID:            --      imap:messageId
  * 
* - * @author Derek Hulley - * @since 3.2 + * This class now provides an alternative property mapping in the request to the T-Engine. Unlike the previous + * implementation the filtering of properties takes place before rather than after the extraction. This is done in + * this class making the code within the org.alfresco.module.org_alfresco_module_rm.email.RFC822MetadataExtracter + * filterSystemProperties method redundant. + * + * @author adavis */ @Deprecated public class RFC822MetadataExtracter extends AbstractMappingMetadataExtracter + implements MetadataExtractorPropertyMappingOverride { + static String RM_URI = "http://www.alfresco.org/model/recordsmanagement/1.0"; + static String DOD_URI = "http://www.alfresco.org/model/dod5015/1.0"; - protected static final String KEY_MESSAGE_FROM = "messageFrom"; - protected static final String KEY_MESSAGE_TO = "messageTo"; - protected static final String KEY_MESSAGE_CC = "messageCc"; - protected static final String KEY_MESSAGE_SUBJECT = "messageSubject"; - protected static final String KEY_MESSAGE_SENT = "messageSent"; - protected static final String KEY_MESSAGE_RECEIVED = "messageReceived"; + static final String RECORD = "record"; + static final String DOD_5015_RECORD = "dod5015record"; - public static String[] SUPPORTED_MIMETYPES = new String[] { MimetypeMap.MIMETYPE_RFC822 }; + static final QName ASPECT_RECORD = QName.createQName(RM_URI, RECORD); + static final QName ASPECT_DOD_5015_RECORD = QName.createQName(DOD_URI, DOD_5015_RECORD); + + private static Log logger = LogFactory.getLog(RFC822MetadataExtracter.class); + + private static final HashSet SUPPORTED_MIMETYPES = + new HashSet<>(Arrays.asList(new String[] { MimetypeMap.MIMETYPE_RFC822 })); public RFC822MetadataExtracter() { - super(new HashSet(Arrays.asList(SUPPORTED_MIMETYPES))); + super(SUPPORTED_MIMETYPES); + } + + private NodeService nodeService; + + public void setNodeService(NodeService nodeService) + { + this.nodeService = nodeService; } @Override protected Map extractRaw(ContentReader reader) throws Throwable { - Map rawProperties = newRawMap(); - - InputStream is = null; - try - { - is = reader.getContentInputStream(); - MimeMessage mimeMessage = new MimeMessage(null, is); - - if (mimeMessage != null) - { - /** - * Extract RFC822 values that doesn't match to headers and need to be encoded. - * Or those special fields that require some code to extract data - */ - String tmp = InternetAddress.toString(mimeMessage.getFrom()); - tmp = tmp != null ? MimeUtility.decodeText(tmp) : null; - putRawValue(KEY_MESSAGE_FROM, tmp, rawProperties); - - tmp = InternetAddress.toString(mimeMessage.getRecipients(RecipientType.TO)); - tmp = tmp != null ? MimeUtility.decodeText(tmp) : null; - putRawValue(KEY_MESSAGE_TO, tmp, rawProperties); - - tmp = InternetAddress.toString(mimeMessage.getRecipients(RecipientType.CC)); - tmp = tmp != null ? MimeUtility.decodeText(tmp) : null; - putRawValue(KEY_MESSAGE_CC, tmp, rawProperties); - - putRawValue(KEY_MESSAGE_SENT, mimeMessage.getSentDate(), rawProperties); - - /** - * Received field from RFC 822 - * - * "Received" ":" ; one per relay - * ["from" domain] ; sending host - * ["by" domain] ; receiving host - * ["via" atom] ; physical path - * ("with" atom) ; link/mail protocol - * ["id" msg-id] ; receiver msg id - * ["for" addr-spec] ; initial form - * ";" date-time ; time received - */ - Date rxDate = mimeMessage.getReceivedDate(); - - if(rxDate != null) - { - // The email implementation extracted the received date for us. - putRawValue(KEY_MESSAGE_RECEIVED, rxDate, rawProperties); - } - else - { - // the email implementation did not parse the received date for us. - String[] rx = mimeMessage.getHeader("received"); - if(rx != null && rx.length > 0) - { - String lastReceived = rx[0]; - lastReceived = MimeUtility.unfold(lastReceived); - int x = lastReceived.lastIndexOf(';'); - if(x > 0) - { - String dateStr = lastReceived.substring(x + 1).trim(); - putRawValue(KEY_MESSAGE_RECEIVED, dateStr, rawProperties); - } - } - } - - String[] subj = mimeMessage.getHeader("Subject"); - if (subj != null && subj.length > 0) - { - String decodedSubject = subj[0]; - try - { - decodedSubject = MimeUtility.decodeText(decodedSubject); - } - catch (UnsupportedEncodingException e) - { - logger.warn(e.toString()); - } - putRawValue(KEY_MESSAGE_SUBJECT, decodedSubject, rawProperties); - } - - /* - * Extract values from all header fields, including extension fields "X-" - */ - Set keys = getMapping().keySet(); - @SuppressWarnings("unchecked") - Enumeration
headers = mimeMessage.getAllHeaders(); - while (headers.hasMoreElements()) - { - Header header = (Header) headers.nextElement(); - if (keys.contains(header.getName())) - { - tmp = header.getValue(); - tmp = tmp != null ? MimeUtility.decodeText(tmp) : null; - - putRawValue(header.getName(), tmp, rawProperties); - } - } - } - } - finally - { - if (is != null) - { - try - { - is.close(); - } - catch (IOException e) - { - } - } - } - // Done - return rawProperties; + logger.error("RFC822MetadataExtracter.extractRaw should not have been called, " + + "as the extraction should have taken place in a T-Engine."); + return Collections.emptyMap(); // will result in no updates. } /** @@ -212,4 +118,46 @@ public class RFC822MetadataExtracter extends AbstractMappingMetadataExtracter { return super.getMapping(); } + + @Override + public boolean match(String sourceMimetype) + { + // When RM overrides the "extracter.RFC822" bean with its own class 'this' will be a sub class. + return SUPPORTED_MIMETYPES.contains(sourceMimetype) && this.getClass() != RFC822MetadataExtracter.class; + } + + @Override + // Only include system properties depending on RM / DOD aspects on this nodeRef + public Map> getExtractMapping(NodeRef nodeRef) + { + Map> customMapping = getMapping(); + HashMap> mapping = new HashMap<>(customMapping.size()); + + boolean isARecord = nodeService.hasAspect(nodeRef, ASPECT_RECORD); + boolean isADodRecord = nodeService.hasAspect(nodeRef, ASPECT_DOD_5015_RECORD); + + for (Map.Entry> entry : customMapping.entrySet()) + { + Set customSystemProperties = entry.getValue(); + HashSet systemProperties = new HashSet<>(customSystemProperties.size()); + String documentProperty = entry.getKey(); + + for (QName customSystemProperty : customSystemProperties) + { + String uri = customSystemProperty.getNamespaceURI(); + boolean rmProperty = RM_URI.equals(uri); + boolean dodProperty = DOD_URI.equals(uri); + if ((rmProperty && isARecord) || (dodProperty && isADodRecord) || (!rmProperty && !dodProperty)) + { + systemProperties.add(customSystemProperty.toString()); + } + } + if (!systemProperties.isEmpty()) + { + mapping.put(documentProperty, systemProperties); + } + } + + return mapping; + } } diff --git a/repository/src/main/resources/alfresco/content-services-context.xml b/repository/src/main/resources/alfresco/content-services-context.xml index e9ffb4d638..cea89db3ca 100644 --- a/repository/src/main/resources/alfresco/content-services-context.xml +++ b/repository/src/main/resources/alfresco/content-services-context.xml @@ -299,8 +299,25 @@ + + + + + + + + + + + EEE, d MMM yyyy HH:mm:ss Z + EEE, d MMM yy HH:mm:ss Z + d MMM yyyy HH:mm:ss Z + + + + diff --git a/repository/src/main/resources/alfresco/metadata/DWGMetadataExtracter.properties b/repository/src/main/resources/alfresco/metadata/DWGMetadataExtracter.properties deleted file mode 100644 index 6c28f6920a..0000000000 --- a/repository/src/main/resources/alfresco/metadata/DWGMetadataExtracter.properties +++ /dev/null @@ -1,12 +0,0 @@ -# -# DWGMetadataExtracter - default mapping -# -# author: Nick Burch - -# Namespaces -namespace.prefix.cm=http://www.alfresco.org/model/content/1.0 - -# Mappings -author=cm:author -title=cm:title -description=cm:description diff --git a/repository/src/main/resources/alfresco/metadata/HtmlMetadataExtracter.properties b/repository/src/main/resources/alfresco/metadata/HtmlMetadataExtracter.properties deleted file mode 100644 index 7a5496b2ad..0000000000 --- a/repository/src/main/resources/alfresco/metadata/HtmlMetadataExtracter.properties +++ /dev/null @@ -1,12 +0,0 @@ -# -# HtmlMetadataExtracter - default mapping -# -# author: Derek Hulley - -# Namespaces -namespace.prefix.cm=http://www.alfresco.org/model/content/1.0 - -# Mappings -author=cm:author -title=cm:title -description=cm:description diff --git a/repository/src/main/resources/alfresco/metadata/JodConverterMetadataExtracter.properties b/repository/src/main/resources/alfresco/metadata/JodConverterMetadataExtracter.properties deleted file mode 100644 index fd4bc33317..0000000000 --- a/repository/src/main/resources/alfresco/metadata/JodConverterMetadataExtracter.properties +++ /dev/null @@ -1,12 +0,0 @@ -# -# JodConverterMetadataExtracter - default mapping -# -# author: Neil McErlean - -# Namespaces -namespace.prefix.cm=http://www.alfresco.org/model/content/1.0 - -# Mappings -author=cm:author -title=cm:title -description=cm:description diff --git a/repository/src/main/resources/alfresco/metadata/MP3MetadataExtracter.properties b/repository/src/main/resources/alfresco/metadata/MP3MetadataExtracter.properties deleted file mode 100644 index eba36d7d57..0000000000 --- a/repository/src/main/resources/alfresco/metadata/MP3MetadataExtracter.properties +++ /dev/null @@ -1,30 +0,0 @@ -# -# MP3MetadataExtracter - default mapping -# -# author: Derek Hulley - -# Namespaces -namespace.prefix.cm=http://www.alfresco.org/model/content/1.0 -namespace.prefix.audio=http://www.alfresco.org/model/audio/1.0 - -# Core mappings -author=cm:author -title=cm:title -description=cm:description -created=cm:created - -# Audio descriptive mappings -xmpDM\:album=audio:album -xmpDM\:artist=audio:artist -xmpDM\:composer=audio:composer -xmpDM\:engineer=audio:engineer -xmpDM\:genre=audio:genre -xmpDM\:trackNumber=audio:trackNumber -xmpDM\:releaseDate=audio:releaseDate -#xmpDM:logComment - -# Audio specific mappings -xmpDM\:audioSampleRate=audio:sampleRate -xmpDM\:audioSampleType=audio:sampleType -xmpDM\:audioChannelType=audio:channelType -xmpDM\:audioCompressor=audio:compressor diff --git a/repository/src/main/resources/alfresco/metadata/MailMetadataExtracter.properties b/repository/src/main/resources/alfresco/metadata/MailMetadataExtracter.properties deleted file mode 100644 index 514fa1fc05..0000000000 --- a/repository/src/main/resources/alfresco/metadata/MailMetadataExtracter.properties +++ /dev/null @@ -1,14 +0,0 @@ -# -# MailMetadataExtracter - default mapping -# -# author: Derek Hulley - -# Namespaces -namespace.prefix.cm=http://www.alfresco.org/model/content/1.0 - -# Mappings -sentDate=cm:sentdate -originator=cm:originator, cm:author -addressee=cm:addressee -addressees=cm:addressees -subjectLine=cm:subjectline, cm:description \ No newline at end of file diff --git a/repository/src/main/resources/alfresco/metadata/OfficeMetadataExtracter.properties b/repository/src/main/resources/alfresco/metadata/OfficeMetadataExtracter.properties deleted file mode 100644 index 912279af45..0000000000 --- a/repository/src/main/resources/alfresco/metadata/OfficeMetadataExtracter.properties +++ /dev/null @@ -1,14 +0,0 @@ -# -# OfficeMetadataExtracter - default mapping -# -# author: Derek Hulley - -# Namespaces -namespace.prefix.cm=http://www.alfresco.org/model/content/1.0 - -# Mappings -author=cm:author -title=cm:title -subject=cm:description -createDateTime=cm:created -lastSaveDateTime=cm:modified diff --git a/repository/src/main/resources/alfresco/metadata/OpenDocumentMetadataExtracter.properties b/repository/src/main/resources/alfresco/metadata/OpenDocumentMetadataExtracter.properties deleted file mode 100644 index a74de9d296..0000000000 --- a/repository/src/main/resources/alfresco/metadata/OpenDocumentMetadataExtracter.properties +++ /dev/null @@ -1,21 +0,0 @@ -# -# OpenDocumentMetadataExtracter - default mapping -# -# author: Derek Hulley - -# Namespaces -namespace.prefix.cm=http://www.alfresco.org/model/content/1.0 - -# Mappings -creationDate=cm:created -creator=cm:author -date= -description= -generator= -initialCreator= -keyword= -language= -printDate= -printedBy= -subject=cm:description -title=cm:title diff --git a/repository/src/main/resources/alfresco/metadata/PdfBoxMetadataExtracter.properties b/repository/src/main/resources/alfresco/metadata/PdfBoxMetadataExtracter.properties deleted file mode 100644 index c5a92bd177..0000000000 --- a/repository/src/main/resources/alfresco/metadata/PdfBoxMetadataExtracter.properties +++ /dev/null @@ -1,13 +0,0 @@ -# -# PdfBoxMetadataExtracter - default mapping -# -# author: Derek Hulley - -# Namespaces -namespace.prefix.cm=http://www.alfresco.org/model/content/1.0 - -# Mappings -author=cm:author -title=cm:title -subject=cm:description -created=cm:created diff --git a/repository/src/main/resources/alfresco/metadata/PoiMetadataExtracter.properties b/repository/src/main/resources/alfresco/metadata/PoiMetadataExtracter.properties deleted file mode 100644 index 0211e61c8d..0000000000 --- a/repository/src/main/resources/alfresco/metadata/PoiMetadataExtracter.properties +++ /dev/null @@ -1,13 +0,0 @@ -# -# PoiMetadataExtracter - default mapping -# -# author: Neil McErlean - -# Namespaces -namespace.prefix.cm=http://www.alfresco.org/model/content/1.0 - -# Mappings -author=cm:author -title=cm:title -description=cm:description -created=cm:created diff --git a/repository/src/main/resources/alfresco/metadata/TikaAudioMetadataExtracter.properties b/repository/src/main/resources/alfresco/metadata/TikaAudioMetadataExtracter.properties deleted file mode 100644 index 542a71ce8f..0000000000 --- a/repository/src/main/resources/alfresco/metadata/TikaAudioMetadataExtracter.properties +++ /dev/null @@ -1,34 +0,0 @@ -# -# TikaAudioMetadataExtracter - audio mapping -# -# This is used to map from the Tika audio metadata onto your -# content model. This will be used for any Audio content -# for which an explicit extractor isn't defined -# -# author: Nick Burch - -# Namespaces -namespace.prefix.cm=http://www.alfresco.org/model/content/1.0 -namespace.prefix.audio=http://www.alfresco.org/model/audio/1.0 - -# Core mappings -author=cm:author -title=cm:title -description=cm:description -created=cm:created - -# Audio descriptive mappings -xmpDM\:album=audio:album -xmpDM\:artist=audio:artist -xmpDM\:composer=audio:composer -xmpDM\:engineer=audio:engineer -xmpDM\:genre=audio:genre -xmpDM\:trackNumber=audio:trackNumber -xmpDM\:releaseDate=audio:releaseDate -#xmpDM:logComment - -# Audio specific mappings -xmpDM\:audioSampleRate=audio:sampleRate -xmpDM\:audioSampleType=audio:sampleType -xmpDM\:audioChannelType=audio:channelType -xmpDM\:audioCompressor=audio:compressor diff --git a/repository/src/main/resources/alfresco/metadata/TikaAutoMetadataExtracter.properties b/repository/src/main/resources/alfresco/metadata/TikaAutoMetadataExtracter.properties deleted file mode 100644 index 6982bb96d9..0000000000 --- a/repository/src/main/resources/alfresco/metadata/TikaAutoMetadataExtracter.properties +++ /dev/null @@ -1,52 +0,0 @@ -# -# TikaAutoMetadataExtracter - default mapping -# -# This is used to map from the Tika and standard namespaces -# onto your content model. This will be used for any -# content for which an explicit extractor isn't defined, -# by using Tika's auto-selection facilities. -# -# author: Nick Burch - -# Namespaces -namespace.prefix.cm=http://www.alfresco.org/model/content/1.0 -namespace.prefix.exif=http://www.alfresco.org/model/exif/1.0 -namespace.prefix.audio=http://www.alfresco.org/model/audio/1.0 - -# Mappings -author=cm:author -title=cm:title -description=cm:description -created=cm:created - -geo\:lat=cm:latitude -geo\:long=cm:longitude - -tiff\:ImageWidth=exif:pixelXDimension -tiff\:ImageLength=exif:pixelYDimension -tiff\:Make=exif:manufacturer -tiff\:Model=exif:model -tiff\:Software=exif:software -tiff\:Orientation=exif:orientation -tiff\:XResolution=exif:xResolution -tiff\:YResolution=exif:yResolution -tiff\:ResolutionUnit=exif:resolutionUnit -exif\:Flash=exif:flash -exif\:ExposureTime=exif:exposureTime -exif\:FNumber=exif:fNumber -exif\:FocalLength=exif:focalLength -exif\:IsoSpeedRatings=exif:isoSpeedRatings -exif\:DateTimeOriginal=exif:dateTimeOriginal - -xmpDM\:album=audio:album -xmpDM\:artist=audio:artist -xmpDM\:composer=audio:composer -xmpDM\:engineer=audio:engineer -xmpDM\:genre=audio:genre -xmpDM\:trackNumber=audio:trackNumber -xmpDM\:releaseDate=audio:releaseDate -#xmpDM:logComment -xmpDM\:audioSampleRate=audio:sampleRate -xmpDM\:audioSampleType=audio:sampleType -xmpDM\:audioChannelType=audio:channelType -xmpDM\:audioCompressor=audio:compressor diff --git a/repository/src/main/resources/alfresco/metadata/TikaSpringConfiguredMetadataExtracter.properties b/repository/src/main/resources/alfresco/metadata/TikaSpringConfiguredMetadataExtracter.properties deleted file mode 100644 index 5a323c946f..0000000000 --- a/repository/src/main/resources/alfresco/metadata/TikaSpringConfiguredMetadataExtracter.properties +++ /dev/null @@ -1,20 +0,0 @@ -# -# TikaSpringConfiguredMetadataExtracter.properties - default mapping -# -# This is used to map from the Tika and standard namespaces -# onto your content model. This is used for custom tika parsers, -# but one file is used across all custom parsers. -# -# author: Nick Burch - -# Namespaces -namespace.prefix.cm=http://www.alfresco.org/model/content/1.0 - -# Mappings -author=cm:author -title=cm:title -description=cm:description -created=cm:created - -geo\:lat=cm:latitude -geo\:long=cm:longitude diff --git a/repository/src/test/java/org/alfresco/MiscContextTestSuite.java b/repository/src/test/java/org/alfresco/MiscContextTestSuite.java index 02b7723873..321810b85f 100644 --- a/repository/src/test/java/org/alfresco/MiscContextTestSuite.java +++ b/repository/src/test/java/org/alfresco/MiscContextTestSuite.java @@ -72,7 +72,8 @@ import org.springframework.context.ApplicationContext; org.alfresco.repo.content.transform.AppleIWorksContentTransformerTest.class, org.alfresco.repo.content.transform.ArchiveContentTransformerTest.class, - // Metadata tests - replaced with simplified test in LocalRenditionTest and ServiceRenditionTest + // Metadata tests - replaced with simplified tests in LocalRenditionTest and ServiceRenditionTest + org.alfresco.repo.content.metadata.RFC822MetadataExtracterTest.class, org.alfresco.repo.content.metadata.MappingMetadataExtracterTest.class, // ---------------------------------------------------------------------- diff --git a/repository/src/test/java/org/alfresco/repo/content/metadata/RFC822MetadataExtracterTest.java b/repository/src/test/java/org/alfresco/repo/content/metadata/RFC822MetadataExtracterTest.java new file mode 100644 index 0000000000..8c448a69b4 --- /dev/null +++ b/repository/src/test/java/org/alfresco/repo/content/metadata/RFC822MetadataExtracterTest.java @@ -0,0 +1,159 @@ +/* + * #%L + * Alfresco Repository + * %% + * Copyright (C) 2005 - 2020 Alfresco Software Limited + * %% + * This file is part of the Alfresco software. + * If the software was purchased under a paid Alfresco license, the terms of + * the paid license agreement will prevail. Otherwise, the software is + * provided under the following open source license terms: + * + * Alfresco is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Alfresco is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with Alfresco. If not, see . + * #L% + */ +package org.alfresco.repo.content.metadata; + +import org.alfresco.repo.content.MimetypeMap; +import org.alfresco.service.cmr.repository.NodeRef; +import org.alfresco.service.cmr.repository.NodeService; +import org.alfresco.service.namespace.QName; +import org.mockito.Mock; +import org.mockito.MockitoAnnotations; + +import java.io.Serializable; +import java.util.Collections; +import java.util.Map; +import java.util.Properties; +import java.util.Set; +import java.util.StringJoiner; +import java.util.concurrent.atomic.AtomicInteger; + +import static org.alfresco.repo.content.metadata.RFC822MetadataExtracter.ASPECT_DOD_5015_RECORD; +import static org.alfresco.repo.content.metadata.RFC822MetadataExtracter.ASPECT_RECORD; +import static org.alfresco.repo.content.metadata.RFC822MetadataExtracter.DOD_URI; +import static org.alfresco.repo.content.metadata.RFC822MetadataExtracter.RM_URI; +import static org.alfresco.service.namespace.NamespaceService.CONTENT_MODEL_1_0_URI; +import static org.mockito.Mockito.when; + +/** + * Test the ability of RFC822MetadataExtracter when overridden by RM, to control which properties are extracted + * from T-Engines. RFC822MetadataExtracter no longer extracts. + * + * @author adavis + */ +//@RunWith(MockitoJUnitRunner.class) +public class RFC822MetadataExtracterTest extends AbstractMetadataExtracterTest +{ + private RFC822MetadataExtracter extracter; + private RFC822MetadataExtracter rmExtracter; + @Mock private NodeService mockNodeService; + + private NodeRef nodeRefWithDodRecord = new NodeRef("workspace://spacesStore/test-dod"); + private NodeRef nodeRefWithRecord = new NodeRef("workspace://spacesStore/test-rm"); + private NodeRef nodeRefWithBoth = new NodeRef("workspace://spacesStore/test-both"); + private NodeRef nodeRefWithNeither = new NodeRef("workspace://spacesStore/test-neither"); + + private static final QName MESSAGE_FROM_TEST_PROPERTY = + QName.createQName("MessageToTest"); + private static final QName MESSAGE_TO_TEST_PROPERTY = + QName.createQName("MessageFromTest"); + private static final QName MESSAGE_CC_TEST_PROPERTY = + QName.createQName("MessageCCTest"); + + @Override + public void setUp() throws Exception + { + super.setUp(); + + extracter = (RFC822MetadataExtracter) ctx.getBean("extracter.RFC822"); + + MockitoAnnotations.initMocks(this); + when(mockNodeService.hasAspect(nodeRefWithDodRecord, ASPECT_DOD_5015_RECORD)).thenReturn(true); + when(mockNodeService.hasAspect(nodeRefWithRecord, ASPECT_RECORD)).thenReturn(true); + when(mockNodeService.hasAspect(nodeRefWithBoth, ASPECT_DOD_5015_RECORD)).thenReturn(true); + when(mockNodeService.hasAspect(nodeRefWithBoth, ASPECT_RECORD)).thenReturn(true); + + rmExtracter = new RFC822MetadataExtracter() + { + @Override + // Needed so the init method runs. + protected Map> getDefaultMapping() + { + return Collections.emptyMap(); + } + }; + rmExtracter.setNodeService(mockNodeService); + rmExtracter.init(); + } + + @Override + protected MetadataExtracter getExtracter() + { + return extracter; + } + + @Override + protected void testFileSpecificMetadata(String mimetype, Map properties) + { + // ignore as this is no longer an extractor + } + + public void testMatch() + { + assertFalse("Normal class should never match", extracter.match(MimetypeMap.MIMETYPE_RFC822)); + assertTrue("RM class should match with correct type", rmExtracter.match(MimetypeMap.MIMETYPE_RFC822)); + assertFalse("RM class should not match with other types", rmExtracter.match(MimetypeMap.MIMETYPE_PDF)); + } + + public void testGetExtractMapping() + { + Properties properties = new Properties(); + properties.put("namespace.prefix.rm", RM_URI); + properties.put("namespace.prefix.dod", DOD_URI); + properties.put("namespace.prefix.cm", CONTENT_MODEL_1_0_URI); + properties.put("a", "cm:a"); + properties.put("b", "rm:b, dod:b"); + properties.put("c", "rm:c"); + properties.put("d", "cm:d, rm:d1, rm:d2"); + rmExtracter.setMappingProperties(properties); + + assertEquals("No properties should have been removed", 7, countSystemProperties(nodeRefWithBoth)); + assertEquals("The 1 dod and 4 record properties should have been removed", 2, countSystemProperties(nodeRefWithNeither)); + assertEquals("The 4 record properties should have been removed", 3, countSystemProperties(nodeRefWithDodRecord)); + assertEquals("The 1 dod property should have been removed", 6, countSystemProperties(nodeRefWithRecord)); + + // Check that we have the fully qualified version as the T-Engine know nothing about the repo's prefixes. + // Check just one of them. + assertEquals("{http://www.alfresco.org/model/content/1.0}d, " + + "{http://www.alfresco.org/model/content/1.0}a, " + + "{http://www.alfresco.org/model/dod5015/1.0}b", getSystemProperties(nodeRefWithDodRecord)); + } + + private int countSystemProperties(NodeRef nodeRef) + { + Map> extractMapping = rmExtracter.getExtractMapping(nodeRef); + AtomicInteger count = new AtomicInteger(); + extractMapping.forEach((k,v) -> count.addAndGet(v.size())); + return count.get(); + } + + private String getSystemProperties(NodeRef nodeRef) + { + Map> extractMapping = rmExtracter.getExtractMapping(nodeRef); + StringJoiner sj = new StringJoiner(", "); + extractMapping.forEach((k,v) -> v.forEach(p -> sj.add(p.toString()))); + return sj.toString(); + } +}