From d70790f0c2eb64ae0cf0e2f6be66a3b4cb364e39 Mon Sep 17 00:00:00 2001 From: Alan Davis Date: Thu, 12 Nov 2020 14:08:04 +0000 Subject: [PATCH] REPO-5190 Remove Metadata Extractors that are been offloaded in 7.0.0 (#148) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Removed all the Extractors that now exist in the T-Engines: JodConnverterMetadataExtracter TikaPoweredMetadataExtracter – the abstract base class used by other extractors -- MailMetadataExtracter -- PoiMetadataExtracter -- TikaAutoMetadataExtracter -- MP3MetadataExtracter -- TikaSpringConfiguredMetadataExtracter - removed as it required Spring config and would run in process -- PdfBoxMetadataExtracter -- OpenDocumentMetadataExtracter -- OfficeMetadataExtracter -- DWGMetadataExtracter HtmlMetadataExtracter RFC822MetadataExtracter XmlMetadataExtracter and XPathMetadataExtracter still exist but don't provide any extraction out of the box. The reason they still exist is to support custom transforms (in AMPs) to extract from XML. There are no XML extractors in the T-Engines at the moment, but that is where the custom transformer code really should be moved. There are new tests to ensure the async transforms take place as expected. Additionally many of the existing tests still exist (those not related to a specific extractor). Some of these have been modified to reflect that the extract is now async and to no longer check the modified value has not changed (it is now expected to change). There are also a number of new metadata extract smoke tests that ensure that a selected subset of extracts are supported by the OOTB T-Engines. --- .travis.yml | 4 + .../alfresco/rest/api/tests/NodeApiTest.java | 3 +- .../rest/api/tests/RenditionsTest.java | 4 +- .../rest/api/tests/SharedLinkApiTest.java | 12 +- .../org/alfresco/rest/api/tests/TestCMIS.java | 93 +-- .../src/test/resources/log4j.properties | 19 +- .../repo/action/ActionServiceImpl.java | 32 +- .../repo/action/executer/ActionExecuter.java | 64 +- .../executer/ContentMetadataExtracter.java | 29 + .../AbstractMappingMetadataExtracter.java | 9 +- .../metadata/AsynchronousExtractor.java | 64 +- .../metadata/DWGMetadataExtracter.java | 95 --- .../metadata/HtmlMetadataExtracter.java | 213 ------ .../JodConverterMetadataExtracter.java | 113 --- .../JodConverterMetadataExtracterWorker.java | 290 -------- .../metadata/MP3MetadataExtracter.java | 113 --- .../metadata/MailMetadataExtracter.java | 110 --- .../metadata/OfficeMetadataExtracter.java | 143 ---- .../OpenDocumentMetadataExtracter.java | 182 ----- .../metadata/OpenOfficeMetadataWorker.java | 53 -- .../metadata/PdfBoxMetadataExtracter.java | 91 --- .../metadata/PoiMetadataExtracter.java | 79 --- .../metadata/RFC822MetadataExtracter.java | 216 ------ .../metadata/TikaAudioMetadataExtracter.java | 192 ----- .../metadata/TikaAutoMetadataExtracter.java | 139 ---- .../TikaPoweredMetadataExtracter.java | 662 ------------------ ...TikaSpringConfiguredMetadataExtracter.java | 129 ---- .../LocalTransformServiceRegistry.java | 6 +- .../alfresco/content-services-context.xml | 70 +- .../default/jodconverter-context.xml | 9 - .../org/alfresco/MiscContextTestSuite.java | 41 +- .../filesys/repo/ContentDiskDriverTest.java | 63 +- .../repo/action/ActionServiceImpl2Test.java | 12 +- .../executer/ContentMetadataEmbedderTest.java | 9 - ...ontentMetadataExtracterTagMappingTest.java | 5 - .../ContentMetadataExtracterTest.java | 83 ++- ...oncurrencyOfficeMetadataExtracterTest.java | 82 --- ...oncurrencyPdfBoxMetadataExtracterTest.java | 156 ----- .../metadata/DWGMetadataExtracterTest.java | 181 ----- .../metadata/HtmlMetadataExtracterTest.java | 117 ---- .../metadata/JodMetadataExtractorOOoTest.java | 144 ---- .../metadata/MP3MetadataExtracterTest.java | 132 ---- .../metadata/MailMetadataExtracterTest.java | 172 ----- .../metadata/OfficeMetadataExtracterTest.java | 185 ----- .../OpenDocumentMetadataExtracterTest.java | 124 ---- .../metadata/PdfBoxMetadataExtracterTest.java | 141 ---- .../metadata/PoiMetadataExtracterTest.java | 214 ------ .../metadata/RFC822MetadataExtracterTest.java | 272 ------- .../TikaAudioMetadataExtracterTest.java | 172 ----- .../TikaAutoMetadataExtracterTest.java | 399 ----------- .../AbstractRenditionIntegrationTest.java | 100 ++- .../rendition2/AbstractRenditionTest.java | 63 ++ .../repo/rendition2/LegacyRenditionTest.java | 15 + .../repo/rendition2/NoneRenditionTest.java | 7 + 54 files changed, 487 insertions(+), 5640 deletions(-) delete mode 100644 repository/src/main/java/org/alfresco/repo/content/metadata/DWGMetadataExtracter.java delete mode 100644 repository/src/main/java/org/alfresco/repo/content/metadata/HtmlMetadataExtracter.java delete mode 100644 repository/src/main/java/org/alfresco/repo/content/metadata/JodConverterMetadataExtracter.java delete mode 100644 repository/src/main/java/org/alfresco/repo/content/metadata/JodConverterMetadataExtracterWorker.java delete mode 100644 repository/src/main/java/org/alfresco/repo/content/metadata/MP3MetadataExtracter.java delete mode 100644 repository/src/main/java/org/alfresco/repo/content/metadata/MailMetadataExtracter.java delete mode 100644 repository/src/main/java/org/alfresco/repo/content/metadata/OfficeMetadataExtracter.java delete mode 100644 repository/src/main/java/org/alfresco/repo/content/metadata/OpenDocumentMetadataExtracter.java delete mode 100644 repository/src/main/java/org/alfresco/repo/content/metadata/OpenOfficeMetadataWorker.java delete mode 100644 repository/src/main/java/org/alfresco/repo/content/metadata/PdfBoxMetadataExtracter.java delete mode 100644 repository/src/main/java/org/alfresco/repo/content/metadata/PoiMetadataExtracter.java delete mode 100644 repository/src/main/java/org/alfresco/repo/content/metadata/RFC822MetadataExtracter.java delete mode 100644 repository/src/main/java/org/alfresco/repo/content/metadata/TikaAudioMetadataExtracter.java delete mode 100644 repository/src/main/java/org/alfresco/repo/content/metadata/TikaAutoMetadataExtracter.java delete mode 100644 repository/src/main/java/org/alfresco/repo/content/metadata/TikaPoweredMetadataExtracter.java delete mode 100644 repository/src/main/java/org/alfresco/repo/content/metadata/TikaSpringConfiguredMetadataExtracter.java delete mode 100644 repository/src/test/java/org/alfresco/repo/content/metadata/ConcurrencyOfficeMetadataExtracterTest.java delete mode 100644 repository/src/test/java/org/alfresco/repo/content/metadata/ConcurrencyPdfBoxMetadataExtracterTest.java delete mode 100644 repository/src/test/java/org/alfresco/repo/content/metadata/DWGMetadataExtracterTest.java delete mode 100644 repository/src/test/java/org/alfresco/repo/content/metadata/HtmlMetadataExtracterTest.java delete mode 100644 repository/src/test/java/org/alfresco/repo/content/metadata/JodMetadataExtractorOOoTest.java delete mode 100644 repository/src/test/java/org/alfresco/repo/content/metadata/MP3MetadataExtracterTest.java delete mode 100644 repository/src/test/java/org/alfresco/repo/content/metadata/MailMetadataExtracterTest.java delete mode 100644 repository/src/test/java/org/alfresco/repo/content/metadata/OfficeMetadataExtracterTest.java delete mode 100644 repository/src/test/java/org/alfresco/repo/content/metadata/OpenDocumentMetadataExtracterTest.java delete mode 100644 repository/src/test/java/org/alfresco/repo/content/metadata/PdfBoxMetadataExtracterTest.java delete mode 100644 repository/src/test/java/org/alfresco/repo/content/metadata/PoiMetadataExtracterTest.java delete mode 100644 repository/src/test/java/org/alfresco/repo/content/metadata/RFC822MetadataExtracterTest.java delete mode 100644 repository/src/test/java/org/alfresco/repo/content/metadata/TikaAudioMetadataExtracterTest.java delete mode 100644 repository/src/test/java/org/alfresco/repo/content/metadata/TikaAutoMetadataExtracterTest.java diff --git a/.travis.yml b/.travis.yml index 1af1449510..65b42470b0 100644 --- a/.travis.yml +++ b/.travis.yml @@ -70,6 +70,7 @@ jobs: before_script: - docker run -d -p 5433:5432 -e POSTGRES_PASSWORD=alfresco -e POSTGRES_USER=alfresco -e POSTGRES_DB=alfresco postgres:11.7 postgres -c 'max_connections=300' - docker run -d -p 61616:61616 -p 5672:5672 alfresco/alfresco-activemq:5.15.8 + - docker run -d -p 8090:8090 -e JAVA_OPTS=" -Xms256m -Xmx256m" alfresco/alfresco-transform-core-aio:2.3.5 script: travis_wait 20 mvn -B test -pl repository -Dtest=AppContext01TestSuite -Ddb.driver=org.postgresql.Driver -Ddb.name=alfresco -Ddb.url=jdbc:postgresql://localhost:5433/alfresco -Ddb.username=alfresco -Ddb.password=alfresco - name: "Repository - AppContext02TestSuite" @@ -113,6 +114,7 @@ jobs: before_script: - docker run -d -p 5433:5432 -e POSTGRES_PASSWORD=alfresco -e POSTGRES_USER=alfresco -e POSTGRES_DB=alfresco postgres:11.7 postgres -c 'max_connections=300' - docker run -d -p 61616:61616 -p 5672:5672 alfresco/alfresco-activemq:5.15.8 + - docker run -d -p 8090:8090 -e JAVA_OPTS=" -Xms256m -Xmx256m" alfresco/alfresco-transform-core-aio:2.3.5 script: travis_wait 20 mvn -B test -pl repository -Dtest=AppContextExtraTestSuite -Ddb.driver=org.postgresql.Driver -Ddb.name=alfresco -Ddb.url=jdbc:postgresql://localhost:5433/alfresco -Ddb.username=alfresco -Ddb.password=alfresco - name: "Repository - MiscContextTestSuite" @@ -160,12 +162,14 @@ jobs: before_script: - docker run -d -p 5433:5432 -e POSTGRES_PASSWORD=alfresco -e POSTGRES_USER=alfresco -e POSTGRES_DB=alfresco postgres:11.7 postgres -c 'max_connections=300' - docker run -d -p 61616:61616 -p 5672:5672 alfresco/alfresco-activemq:5.15.8 + - docker run -d -p 8090:8090 -e JAVA_OPTS=" -Xms256m -Xmx256m" alfresco/alfresco-transform-core-aio:2.3.5 script: travis_wait 20 mvn -B test -pl remote-api -Dtest=AppContext02TestSuite -Ddb.driver=org.postgresql.Driver -Ddb.name=alfresco -Ddb.url=jdbc:postgresql://localhost:5433/alfresco -Ddb.username=alfresco -Ddb.password=alfresco - name: "Remote-api - AppContext03TestSuite" before_script: - docker run -d -p 5433:5432 -e POSTGRES_PASSWORD=alfresco -e POSTGRES_USER=alfresco -e POSTGRES_DB=alfresco postgres:11.7 postgres -c 'max_connections=300' - docker run -d -p 61616:61616 -p 5672:5672 alfresco/alfresco-activemq:5.15.8 + - docker run -d -p 8090:8090 -e JAVA_OPTS=" -Xms256m -Xmx256m" alfresco/alfresco-transform-core-aio:2.3.5 script: travis_wait 20 mvn -B test -pl remote-api -Dtest=AppContext03TestSuite -Ddb.driver=org.postgresql.Driver -Ddb.name=alfresco -Ddb.url=jdbc:postgresql://localhost:5433/alfresco -Ddb.username=alfresco -Ddb.password=alfresco - name: "Remote-api - AppContext04TestSuite" diff --git a/remote-api/src/test/java/org/alfresco/rest/api/tests/NodeApiTest.java b/remote-api/src/test/java/org/alfresco/rest/api/tests/NodeApiTest.java index b7d46a9047..a6c34a1a96 100644 --- a/remote-api/src/test/java/org/alfresco/rest/api/tests/NodeApiTest.java +++ b/remote-api/src/test/java/org/alfresco/rest/api/tests/NodeApiTest.java @@ -713,7 +713,8 @@ public class NodeApiTest extends AbstractSingleNetworkSiteTest String contentName = "content " + RUNID + ".txt"; String content1Id = createTextFile(folderB_Id, contentName, "The quick brown fox jumps over the lazy dog.", "UTF-8", docProps).getId(); - + // TODO find a better solution to wait for the asynchronous metadata-extract/transform operation. E.g. awaitility + Thread.sleep(3000); // get node info response = getSingle(NodesEntityResource.class, content1Id, null, 200); Document documentResp = RestApiUtil.parseRestApiEntry(response.getJsonResponse(), Document.class); diff --git a/remote-api/src/test/java/org/alfresco/rest/api/tests/RenditionsTest.java b/remote-api/src/test/java/org/alfresco/rest/api/tests/RenditionsTest.java index b596a37bc2..02ca59812c 100644 --- a/remote-api/src/test/java/org/alfresco/rest/api/tests/RenditionsTest.java +++ b/remote-api/src/test/java/org/alfresco/rest/api/tests/RenditionsTest.java @@ -714,7 +714,7 @@ public class RenditionsTest extends AbstractBaseApiTest response = getSingle(NodesEntityResource.class, contentNodeId, params, 200); Document document1b = RestApiUtil.parseRestApiEntry(response.getJsonResponse(), Document.class); - assertEquals(document1b.getModifiedAt(), document1.getModifiedAt()); +// assertEquals(document1b.getModifiedAt(), document1.getModifiedAt()); assertEquals(document1b.getModifiedByUser().getId(), document1.getModifiedByUser().getId()); assertEquals(document1b.getModifiedByUser().getDisplayName(), document1.getModifiedByUser().getDisplayName()); @@ -749,7 +749,7 @@ public class RenditionsTest extends AbstractBaseApiTest response = getSingle(NodesEntityResource.class, contentNodeId, params, 200); Document document2b = RestApiUtil.parseRestApiEntry(response.getJsonResponse(), Document.class); - assertTrue(document2b.getModifiedAt().after(document1.getModifiedAt())); +// assertTrue(document2b.getModifiedAt().after(document1.getModifiedAt())); assertEquals(document2b.getModifiedByUser().getId(), document1.getModifiedByUser().getId()); assertEquals(document2b.getModifiedByUser().getDisplayName(), document1.getModifiedByUser().getDisplayName()); diff --git a/remote-api/src/test/java/org/alfresco/rest/api/tests/SharedLinkApiTest.java b/remote-api/src/test/java/org/alfresco/rest/api/tests/SharedLinkApiTest.java index 9f2f0d94ca..ee89070bc1 100644 --- a/remote-api/src/test/java/org/alfresco/rest/api/tests/SharedLinkApiTest.java +++ b/remote-api/src/test/java/org/alfresco/rest/api/tests/SharedLinkApiTest.java @@ -25,6 +25,7 @@ */ package org.alfresco.rest.api.tests; +import org.alfresco.repo.action.ActionServiceImpl; import org.alfresco.repo.content.MimetypeMap; import org.alfresco.repo.quickshare.QuickShareLinkExpiryActionImpl; import org.alfresco.repo.security.authentication.AuthenticationUtil; @@ -192,6 +193,9 @@ public class SharedLinkApiTest extends AbstractBaseApiTest Map body = new HashMap<>(); body.put("nodeId", d1Id); + // TODO find a better solution to wait for the asynchronous metadata-extract/transform operation. E.g. awaitility + Thread.sleep(3000); + response = post(URL_SHARED_LINKS, toJsonAsStringNonNull(body), 201); QuickShareLink resp = RestApiUtil.parseRestApiEntry(response.getJsonResponse(), QuickShareLink.class); @@ -209,7 +213,7 @@ public class SharedLinkApiTest extends AbstractBaseApiTest assertEquals(new Long(file1_originalBytes.length), resp.getContent().getSizeInBytes()); assertEquals("UTF-8", resp.getContent().getEncoding()); - assertEquals(docModifiedAt.getTime(), resp.getModifiedAt().getTime()); // not changed + // assertEquals(docModifiedAt.getTime(), resp.getModifiedAt().getTime()); // not changed assertEquals(docModifiedBy, resp.getModifiedByUser().getId()); // not changed (ie. not user2) assertEquals(UserInfo.getTestDisplayName(docModifiedBy), resp.getModifiedByUser().getDisplayName()); @@ -364,7 +368,7 @@ public class SharedLinkApiTest extends AbstractBaseApiTest // create rendition of pdf doc - note: for some reason create rendition of txt doc fail on build m/c (TBC) ? setRequestContext(user2); - + Rendition rendition = createAndGetRendition(d1Id, "doclib"); assertNotNull(rendition); assertEquals(Rendition.RenditionStatus.CREATED, rendition.getStatus()); @@ -417,12 +421,12 @@ public class SharedLinkApiTest extends AbstractBaseApiTest // -ve test - unauthenticated setRequestContext(null); deleteSharedLink(shared1Id, 401); - + setRequestContext(user1); // -ve test - user1 cannot delete shared link deleteSharedLink(shared1Id, 403); - + // -ve test - delete - cannot delete non-existent link deleteSharedLink("dummy", 404); } diff --git a/remote-api/src/test/java/org/alfresco/rest/api/tests/TestCMIS.java b/remote-api/src/test/java/org/alfresco/rest/api/tests/TestCMIS.java index 3b2ed25b41..8c3ec87f43 100644 --- a/remote-api/src/test/java/org/alfresco/rest/api/tests/TestCMIS.java +++ b/remote-api/src/test/java/org/alfresco/rest/api/tests/TestCMIS.java @@ -1025,87 +1025,6 @@ public class TestCMIS extends EnterpriseTestApi } } - /** - * Tests CMIS and non-CMIS public api interactions - */ - @SuppressWarnings("deprecation") - @Test - public void testScenario1() throws Exception - { - final TestNetwork network1 = getTestFixture().getRandomNetwork(); - Iterator personIt = network1.getPersonIds().iterator(); - final String person = personIt.next(); - assertNotNull(person); - - Sites sitesProxy = publicApiClient.sites(); - Comments commentsProxy = publicApiClient.comments(); - publicApiClient.setRequestContext(new RequestContext(network1.getId(), person)); - CmisSession cmisSession = publicApiClient.createPublicApiCMISSession(Binding.atom, CMIS_VERSION_10, AlfrescoObjectFactoryImpl.class.getName()); - - ListResponse sites = sitesProxy.getPersonSites(person, null); - assertTrue(sites.getList().size() > 0); - MemberOfSite siteMember = sites.getList().get(0); - String siteId = siteMember.getSite().getSiteId(); - - Folder documentLibrary = (Folder)cmisSession.getObjectByPath("/Sites/" + siteId + "/documentLibrary"); - - System.out.println("documentLibrary id = " + documentLibrary.getId()); - - Map fileProps = new HashMap(); - { - fileProps.put(PropertyIds.OBJECT_TYPE_ID, TYPE_CMIS_DOCUMENT); - fileProps.put(PropertyIds.NAME, "mydoc-" + GUID.generate() + ".txt"); - } - ContentStreamImpl fileContent = new ContentStreamImpl(); - { - ContentWriter writer = new FileContentWriter(TempFileProvider.createTempFile(GUID.generate(), ".txt")); - writer.putContent("Ipsum and so on"); - ContentReader reader = writer.getReader(); - fileContent.setMimeType(MimetypeMap.MIMETYPE_TEXT_PLAIN); - fileContent.setStream(reader.getContentInputStream()); - } - Document doc = documentLibrary.createDocument(fileProps, fileContent, VersioningState.MAJOR); - - System.out.println("Document id = " + doc.getId()); - - Comment c = commentsProxy.createNodeComment(doc.getId(), new Comment("comment title 1", "comment 1")); - - System.out.println("Comment = " + c); - - // Now lock the document - String nodeRefStr = (String) doc.getPropertyValue("alfcmis:nodeRef"); - final NodeRef nodeRef = new NodeRef(nodeRefStr); - final TenantRunAsWork runAsWork = new TenantRunAsWork() - { - @Override - public Void doWork() throws Exception - { - lockService.lock(nodeRef, LockType.WRITE_LOCK); - return null; - } - }; - RetryingTransactionCallback txnWork = new RetryingTransactionCallback() - { - @Override - public Void execute() throws Throwable - { - TenantUtil.runAsUserTenant(runAsWork, "bob", network1.getId()); - return null; - } - }; - transactionHelper.doInTransaction(txnWork); - - // Now attempt to update the document's metadata - try - { - doc.delete(); - } - catch (CmisUpdateConflictException e) - { - // Expected: ACE-762 BM-0012: NodeLockedException not handled by CMIS - } - } - //@Test public void testInvalidMethods() throws Exception { @@ -1275,7 +1194,7 @@ public class TestCMIS extends EnterpriseTestApi return null; } }, personId); - + NodeRef folderNodeRef = folders.get(0); NodeRef docNodeRef = documents.get(0); @@ -1789,6 +1708,9 @@ public class TestCMIS extends EnterpriseTestApi } Document autoVersionedDoc = docLibrary.createDocument(properties, fileContent, VersioningState.MAJOR); + // TODO find a better solution to wait for the asynchronous metadata-extract/transform operation. E.g. awaitility + Thread.sleep(5000); + String objectId = autoVersionedDoc.getId(); String bareObjectId = stripCMISSuffix(objectId); // create versions @@ -1807,6 +1729,8 @@ public class TestCMIS extends EnterpriseTestApi contentStream.setMimeType(MimetypeMap.MIMETYPE_TEXT_PLAIN); contentStream.setStream(reader.getContentInputStream()); } + // TODO find a better solution to wait for the asynchronous metadata-extract/transform operation. E.g. awaitility + Thread.sleep(5000); pwc.checkIn(true, Collections.EMPTY_MAP, contentStream, "checkin " + i); } @@ -2506,6 +2430,8 @@ public class TestCMIS extends EnterpriseTestApi /* Create document */ Document doc = docLibrary.createDocument(properties, fileContent, VersioningState.MAJOR); + // TODO find a better solution to wait for the asynchronous metadata-extract/transform operation. E.g. awaitility + Thread.sleep(3000); /* Checkout document */ ObjectId pwcId = doc.checkOut(); @@ -2867,6 +2793,7 @@ public class TestCMIS extends EnterpriseTestApi fileContent.setStream(stream); Document doc = docLibrary.createDocument(properties, fileContent, VersioningState.MAJOR); + Thread.sleep(5000); ObjectId pwcId = doc.checkOut(); Document pwc = (Document) cmisSession.getObject(pwcId.getId()); @@ -3272,6 +3199,8 @@ public class TestCMIS extends EnterpriseTestApi "This is just a test"); final Document document = folder.createDocument(props, cs, VersioningState.MAJOR); + // TODO find a better solution to wait for the asynchronous metadata-extract/transform operation. E.g. awaitility + Thread.sleep(3000); ObjectId pwcObjectId = document.checkOut(); diff --git a/remote-api/src/test/resources/log4j.properties b/remote-api/src/test/resources/log4j.properties index ae6e69fa4e..8df8259224 100644 --- a/remote-api/src/test/resources/log4j.properties +++ b/remote-api/src/test/resources/log4j.properties @@ -6,4 +6,21 @@ log4j.appender.Console.layout.ConversionPattern=%d{ISO8601} %x %-5p [%c{3}] [%t] log4j.logger.org.alfresco=WARN log4j.logger.org.alfresco.rest.api=DEBUG -log4j.logger.org.eclipse.jetty.util.log=INFO \ No newline at end of file +log4j.logger.org.eclipse.jetty.util.log=INFO + +# Renditions and Transforms +log4j.logger.org.alfresco.repo.content.transform.TransformerDebug=debug + +log4j.logger.org.alfresco.repo.rendition2=debug +#log4j.logger.org.alfresco.repo.rendition2.LocalTransformClient=debug +#log4j.logger.org.alfresco.repo.rendition2.LegacyTransformClient=debug +#log4j.logger.org.alfresco.repo.rendition.RenditionServiceImpl=debug +#log4j.logger.org.alfresco.enterprise.repo.rendition2.RemoteTransformClient=debug +log4j.logger.org.alfresco.repo.thumbnail.ThumbnailServiceImplTest=DEBUG +log4j.logger.org.alfresco.repo.rendition2.RenditionService2Impl=DEBUG + +#log4j.logger.org.alfresco.repo.content.transform.LocalTransformServiceRegistry=debug +#log4j.logger.org.alfresco.enterprise.repo.rendition2.RemoteTransformServiceRegistry=debug +#log4j.logger.org.alfresco.repo.rendition2.RenditionDefinitionRegistry2Impl=debug +#log4j.logger.org.alfresco.repo.content.MimetypeMap=debug +#log4j.logger.org.alfresco.repo.content.transform.LocalTransform=trace diff --git a/repository/src/main/java/org/alfresco/repo/action/ActionServiceImpl.java b/repository/src/main/java/org/alfresco/repo/action/ActionServiceImpl.java index b80bfab74a..23d0bfdcf2 100644 --- a/repository/src/main/java/org/alfresco/repo/action/ActionServiceImpl.java +++ b/repository/src/main/java/org/alfresco/repo/action/ActionServiceImpl.java @@ -2,7 +2,7 @@ * #%L * Alfresco Repository * %% - * Copyright (C) 2005 - 2016 Alfresco Software Limited + * Copyright (C) 2005 - 2020 Alfresco Software Limited * %% * This file is part of the Alfresco software. * If the software was purchased under a paid Alfresco license, the terms of @@ -38,6 +38,7 @@ import java.util.Set; import org.alfresco.model.ContentModel; import org.alfresco.repo.action.evaluator.ActionConditionEvaluator; import org.alfresco.repo.action.executer.ActionExecuter; +import org.alfresco.repo.action.executer.CompositeActionExecuter; import org.alfresco.repo.action.executer.LoggingAwareExecuter; import org.alfresco.repo.copy.CopyBehaviourCallback; import org.alfresco.repo.copy.CopyDetails; @@ -576,6 +577,11 @@ public class ActionServiceImpl implements ActionService, RuntimeActionService, A { Set actionChain = this.currentActionChain.get(); + // Like emails (see RuleServiceImpl), metadata extraction is now normally performed asynchronously. + // As a result we need to override the executeAsychronously value if this is the case so that + // changes to the actionedUponNodeRef will have been committed before the extract is performed. + executeAsychronously = isExecuteAsynchronously(action, actionedUponNodeRef, executeAsychronously); + if (executeAsychronously == false) { executeActionImpl(action, actionedUponNodeRef, checkConditions, false, actionChain); @@ -587,6 +593,30 @@ public class ActionServiceImpl implements ActionService, RuntimeActionService, A } } + private boolean isExecuteAsynchronously(Action action, NodeRef actionedUponNodeRef, boolean executeAsynchronously) + { + if (executeAsynchronously == false) + { + String actionDefinitionName = action.getActionDefinitionName(); + if (actionDefinitionName.equals(CompositeActionExecuter.NAME)) + { + for (Action subAction : ((CompositeAction)action).getActions()) + { + if (isExecuteAsynchronously(subAction, actionedUponNodeRef, false)) + { + return true; + } + } + } + else + { + ActionExecuter executer = (ActionExecuter) this.applicationContext.getBean(actionDefinitionName); + executeAsynchronously = executer.isExecuteAsynchronously(actionedUponNodeRef); + } + } + return executeAsynchronously; + } + /** * called by transaction service. */ diff --git a/repository/src/main/java/org/alfresco/repo/action/executer/ActionExecuter.java b/repository/src/main/java/org/alfresco/repo/action/executer/ActionExecuter.java index 330e6ecbe5..d8f0957a6a 100644 --- a/repository/src/main/java/org/alfresco/repo/action/executer/ActionExecuter.java +++ b/repository/src/main/java/org/alfresco/repo/action/executer/ActionExecuter.java @@ -1,33 +1,34 @@ -/* - * #%L - * Alfresco Repository - * %% - * Copyright (C) 2005 - 2016 Alfresco Software Limited - * %% - * This file is part of the Alfresco software. - * If the software was purchased under a paid Alfresco license, the terms of - * the paid license agreement will prevail. Otherwise, the software is - * provided under the following open source license terms: - * - * Alfresco is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Alfresco is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with Alfresco. If not, see . - * #L% - */ +/* + * #%L + * Alfresco Repository + * %% + * Copyright (C) 2005 - 2020 Alfresco Software Limited + * %% + * This file is part of the Alfresco software. + * If the software was purchased under a paid Alfresco license, the terms of + * the paid license agreement will prevail. Otherwise, the software is + * provided under the following open source license terms: + * + * Alfresco is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Alfresco is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with Alfresco. If not, see . + * #L% + */ package org.alfresco.repo.action.executer; import org.alfresco.api.AlfrescoPublicApi; import org.alfresco.service.cmr.action.Action; import org.alfresco.service.cmr.action.ActionDefinition; +import org.alfresco.service.cmr.action.ActionService; import org.alfresco.service.cmr.repository.NodeRef; /** @@ -83,4 +84,17 @@ public interface ActionExecuter * @param actionedUponNodeRef the actioned upon node reference */ void execute(Action action, NodeRef actionedUponNodeRef); + + /** + * Allows ActionExecuters to say that they should be run asynchronously even if + * requested to run synchronously. + * + * @param actionedUponNodeRef to processed + * @return false by default. true to override the executeAsychronously parameter in + * {@link ActionService#executeAction(Action, NodeRef, boolean, boolean)}. + */ + default boolean isExecuteAsynchronously(NodeRef actionedUponNodeRef) + { + return false; + } } diff --git a/repository/src/main/java/org/alfresco/repo/action/executer/ContentMetadataExtracter.java b/repository/src/main/java/org/alfresco/repo/action/executer/ContentMetadataExtracter.java index 47cba6d035..4b441429e8 100644 --- a/repository/src/main/java/org/alfresco/repo/action/executer/ContentMetadataExtracter.java +++ b/repository/src/main/java/org/alfresco/repo/action/executer/ContentMetadataExtracter.java @@ -55,6 +55,7 @@ import java.util.Set; import org.alfresco.model.ContentModel; import org.alfresco.repo.content.metadata.AbstractMappingMetadataExtracter; +import org.alfresco.repo.content.metadata.AsynchronousExtractor; import org.alfresco.repo.content.metadata.MetadataExtracter; import org.alfresco.repo.content.metadata.MetadataExtracterRegistry; import org.alfresco.service.cmr.action.Action; @@ -337,6 +338,34 @@ public class ContentMetadataExtracter extends ActionExecuterAbstractBase return result; } + /** + * Used by the action service to work out if it should override the executeAsychronously + * value when it is know the extract will take place asynchronously anyway. Results in + * the action being processed post commit, which allows it to see node changes. + * + * @param actionedUponNodeRef the node to be processed. + * @return true if the AsynchronousExtractor will be used. false otherwise. + */ + @Override + public boolean isExecuteAsynchronously(NodeRef actionedUponNodeRef) + { + if (!nodeService.exists(actionedUponNodeRef)) + { + return false; + } + + ContentReader reader = contentService.getReader(actionedUponNodeRef, ContentModel.PROP_CONTENT); + if (reader == null || reader.getMimetype() == null) + { + return false; + } + + String mimetype = reader.getMimetype(); + long sourceSizeInBytes = reader.getSize(); + MetadataExtracter extracter = metadataExtracterRegistry.getExtractor(mimetype, sourceSizeInBytes); + return extracter instanceof AsynchronousExtractor; + } + /** * @see org.alfresco.repo.action.executer.ActionExecuter#execute(Action, * NodeRef) diff --git a/repository/src/main/java/org/alfresco/repo/content/metadata/AbstractMappingMetadataExtracter.java b/repository/src/main/java/org/alfresco/repo/content/metadata/AbstractMappingMetadataExtracter.java index ece13df5b5..99b4cd1f9a 100644 --- a/repository/src/main/java/org/alfresco/repo/content/metadata/AbstractMappingMetadataExtracter.java +++ b/repository/src/main/java/org/alfresco/repo/content/metadata/AbstractMappingMetadataExtracter.java @@ -562,9 +562,7 @@ abstract public class AbstractMappingMetadataExtracter implements MetadataExtrac * configuration-driven, i.e. declaring further mappings will result in more values being * extracted from the documents. *

- * Most extractors will not be using this method. For an example of its use, see the - * {@linkplain OpenDocumentMetadataExtracter OpenDocument extractor}, which uses the mapping - * to select specific user properties from a document. + * Most extractors will not be using this method. */ protected final Map> getMapping() { @@ -2264,7 +2262,6 @@ abstract public class AbstractMappingMetadataExtracter implements MetadataExtrac // TODO make this an abstract method once more extracters support embedding } - // Originally in TikaPoweredMetadataExtracter public static Map convertMetadataToStrings(Map properties) { Map propertiesAsStrings = new HashMap<>(); @@ -2286,7 +2283,7 @@ abstract public class AbstractMappingMetadataExtracter implements MetadataExtrac } catch (TypeConversionException e) { - TikaPoweredMetadataExtracter.logger.info("Could not convert " + metadataKey + ": " + e.getMessage()); + logger.info("Could not convert " + metadataKey + ": " + e.getMessage()); } } } @@ -2299,7 +2296,7 @@ abstract public class AbstractMappingMetadataExtracter implements MetadataExtrac } catch (TypeConversionException e) { - TikaPoweredMetadataExtracter.logger.info("Could not convert " + metadataKey + ": " + e.getMessage()); + logger.info("Could not convert " + metadataKey + ": " + e.getMessage()); } } } diff --git a/repository/src/main/java/org/alfresco/repo/content/metadata/AsynchronousExtractor.java b/repository/src/main/java/org/alfresco/repo/content/metadata/AsynchronousExtractor.java index f2d9469bf6..efce5ae2d7 100644 --- a/repository/src/main/java/org/alfresco/repo/content/metadata/AsynchronousExtractor.java +++ b/repository/src/main/java/org/alfresco/repo/content/metadata/AsynchronousExtractor.java @@ -34,6 +34,9 @@ import org.alfresco.repo.content.transform.TransformerDebug; import org.alfresco.repo.rendition2.RenditionService2; import org.alfresco.repo.rendition2.TransformDefinition; import org.alfresco.repo.security.authentication.AuthenticationUtil; +import org.alfresco.repo.tenant.TenantUtil; +import org.alfresco.repo.tenant.TenantUtil.TenantRunAsWork; +import org.alfresco.repo.transaction.RetryingTransactionHelper.RetryingTransactionCallback; import org.alfresco.service.cmr.repository.ContentReader; import org.alfresco.service.cmr.repository.ContentService; import org.alfresco.service.cmr.repository.ContentWriter; @@ -247,17 +250,28 @@ public class AsynchronousExtractor extends AbstractMappingMetadataExtracter private void transformInBackground(NodeRef nodeRef, ContentReader reader, String targetMimetype, String embedOrExtract, Map options) { + final String domain = TenantUtil.getCurrentDomain(); + final String runAsUser = AuthenticationUtil.getRunAsUser(); + ExecutorService executorService = getExecutorService(); - executorService.execute(() -> - { - try - { - transform(nodeRef, reader, targetMimetype, embedOrExtract, options); - } - finally - { - extractRawThreadFinished(); - } + executorService.execute(() -> { + + TenantUtil.runAsUserTenant((TenantRunAsWork) () -> { + transactionService.getRetryingTransactionHelper() + .doInTransaction((RetryingTransactionCallback) () -> { + try + { + transform(nodeRef, reader, targetMimetype, embedOrExtract, options); + } + finally + { + extractRawThreadFinished(); + } + return null; + }, false); + + return null; + }, runAsUser, domain); }); } @@ -281,24 +295,18 @@ public class AsynchronousExtractor extends AbstractMappingMetadataExtracter logger.trace(sj); } - AuthenticationUtil.runAs( - (AuthenticationUtil.RunAsWork) () -> - transactionService.getRetryingTransactionHelper().doInTransaction(() -> - { - try - { - renditionService2.transform(nodeRef, transformDefinition); - } - catch (IllegalArgumentException e) - { - if (e.getMessage().endsWith("The supplied sourceNodeRef "+nodeRef+" does not exist.")) - { - throw new ConcurrencyFailureException( - "The original transaction may not have finished. " + e.getMessage()); - } - } - return null; - }), AuthenticationUtil.getSystemUserName()); + try + { + renditionService2.transform(nodeRef, transformDefinition); + } + catch (IllegalArgumentException e) + { + if (e.getMessage().endsWith("The supplied sourceNodeRef " + nodeRef + " does not exist.")) + { + throw new ConcurrencyFailureException( + "The original transaction may not have finished. " + e.getMessage()); + } + } } public void setMetadata(NodeRef nodeRef, InputStream transformInputStream) diff --git a/repository/src/main/java/org/alfresco/repo/content/metadata/DWGMetadataExtracter.java b/repository/src/main/java/org/alfresco/repo/content/metadata/DWGMetadataExtracter.java deleted file mode 100644 index 0138ab72d5..0000000000 --- a/repository/src/main/java/org/alfresco/repo/content/metadata/DWGMetadataExtracter.java +++ /dev/null @@ -1,95 +0,0 @@ -/* - * #%L - * Alfresco Repository - * %% - * Copyright (C) 2005 - 2020 Alfresco Software Limited - * %% - * This file is part of the Alfresco software. - * If the software was purchased under a paid Alfresco license, the terms of - * the paid license agreement will prevail. Otherwise, the software is - * provided under the following open source license terms: - * - * Alfresco is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Alfresco is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with Alfresco. If not, see . - * #L% - */ -package org.alfresco.repo.content.metadata; - -import java.io.Serializable; -import java.util.ArrayList; -import java.util.Map; - -import org.alfresco.repo.content.MimetypeMap; -import org.apache.tika.metadata.Metadata; -import org.apache.tika.parser.Parser; -import org.apache.tika.parser.dwg.DWGParser; - - -/** - * @deprecated OOTB extractors are being moved to T-Engines. - * - * Metadata extractor for the - * {@link org.alfresco.repo.content.MimetypeMap#MIMETYPE_APP_DWG MIMETYPE_APP_DWG} - * and - * {@link org.alfresco.repo.content.MimetypeMap#MIMETYPE_IMG_DWG MIMETYPE_IMG_DWG} - * mimetypes. - *

- *   title:           --      cm:title
- *   description:     --      cm:description
- *   author:          --      cm:author
- *   keywords:
- *   comments:
- *   lastauthor:
- * 
- * - * Uses Apache Tika - * - * @since 3.4 - * @author Nick Burch - */ -@Deprecated -public class DWGMetadataExtracter extends TikaPoweredMetadataExtracter -{ - private static final String KEY_KEYWORD = "keyword"; - private static final String KEY_LAST_AUTHOR = "lastAuthor"; - - public static ArrayList SUPPORTED_MIMETYPES = buildSupportedMimetypes( - new String[] { - MimetypeMap.MIMETYPE_APP_DWG, - MimetypeMap.MIMETYPE_IMG_DWG, - "image/x-dwg", // Was used before IANA registration - }, - new DWGParser() - ); - - public DWGMetadataExtracter() - { - super(SUPPORTED_MIMETYPES); - } - - @SuppressWarnings("deprecation") - @Override - protected Map extractSpecific(Metadata metadata, - Map properties, Map headers) - { - putRawValue(KEY_KEYWORD, metadata.get(Metadata.KEYWORDS), properties); - putRawValue(KEY_LAST_AUTHOR, metadata.get(Metadata.LAST_AUTHOR), properties); - return properties; - } - - @Override - protected Parser getParser() - { - return new DWGParser(); - } -} diff --git a/repository/src/main/java/org/alfresco/repo/content/metadata/HtmlMetadataExtracter.java b/repository/src/main/java/org/alfresco/repo/content/metadata/HtmlMetadataExtracter.java deleted file mode 100644 index 691a5ac707..0000000000 --- a/repository/src/main/java/org/alfresco/repo/content/metadata/HtmlMetadataExtracter.java +++ /dev/null @@ -1,213 +0,0 @@ -/* - * #%L - * Alfresco Repository - * %% - * Copyright (C) 2005 - 2020 Alfresco Software Limited - * %% - * This file is part of the Alfresco software. - * If the software was purchased under a paid Alfresco license, the terms of - * the paid license agreement will prevail. Otherwise, the software is - * provided under the following open source license terms: - * - * Alfresco is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Alfresco is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with Alfresco. If not, see . - * #L% - */ -/* - * Copyright (C) 2005 - 2020 Jesper Steen Møller - * - * This file is part of Alfresco - * - * Alfresco is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Alfresco is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with Alfresco. If not, see . - */ -package org.alfresco.repo.content.metadata; - -import java.io.InputStream; -import java.io.InputStreamReader; -import java.io.Reader; -import java.io.Serializable; -import java.util.HashSet; -import java.util.Map; -import java.util.Set; - -import javax.swing.text.ChangedCharSetException; -import javax.swing.text.MutableAttributeSet; -import javax.swing.text.html.HTML; -import javax.swing.text.html.HTMLEditorKit; -import javax.swing.text.html.parser.ParserDelegator; - -import org.alfresco.repo.content.MimetypeMap; -import org.alfresco.service.cmr.repository.ContentReader; - -/** - * @deprecated OOTB extractors are being moved to T-Engines. - * - * Extracts the following values from HTML documents: - *
- *   author:                 --      cm:author
- *   title:                  --      cm:title
- *   description:            --      cm:description
- * 
- * - * TIKA note - all metadata will be present, but will need to - * search for the varient names ourselves as tika puts them - * in as-is. - * - * @author Jesper Steen Møller - * @author Derek Hulley - */ -@Deprecated -public class HtmlMetadataExtracter extends AbstractMappingMetadataExtracter -{ - private static final String KEY_AUTHOR = "author"; - private static final String KEY_TITLE = "title"; - private static final String KEY_DESCRIPTION= "description"; - - public static final Set MIMETYPES = new HashSet(5); - static - { - MIMETYPES.add(MimetypeMap.MIMETYPE_HTML); - MIMETYPES.add(MimetypeMap.MIMETYPE_XHTML); - } - - public HtmlMetadataExtracter() - { - super(MIMETYPES); - } - - @Override - protected Map extractRaw(ContentReader reader) throws Throwable - { - final Map rawProperties = newRawMap(); - - HTMLEditorKit.ParserCallback callback = new HTMLEditorKit.ParserCallback() - { - StringBuffer title = null; - boolean inHead = false; - - public void handleText(char[] data, int pos) - { - if (title != null) - { - title.append(data); - } - } - - public void handleComment(char[] data, int pos) - { - // Perhaps sniff for Office 9+ metadata in here? - } - - public void handleStartTag(HTML.Tag t, MutableAttributeSet a, int pos) - { - if (HTML.Tag.HEAD.equals(t)) - { - inHead = true; - } - else if (HTML.Tag.TITLE.equals(t) && inHead) - { - title = new StringBuffer(); - } - else - handleSimpleTag(t, a, pos); - } - - public void handleEndTag(HTML.Tag t, int pos) - { - if (HTML.Tag.HEAD.equals(t)) - { - inHead = false; - } - else if (HTML.Tag.TITLE.equals(t) && title != null) - { - putRawValue(KEY_TITLE, title.toString(), rawProperties); - title = null; - } - } - - public void handleSimpleTag(HTML.Tag t, MutableAttributeSet a, int pos) - { - if (HTML.Tag.META.equals(t)) - { - Object nameO = a.getAttribute(HTML.Attribute.NAME); - Object valueO = a.getAttribute(HTML.Attribute.CONTENT); - if (nameO == null || valueO == null) - return; - - String name = nameO.toString(); - - if (name.equalsIgnoreCase("creator") || name.equalsIgnoreCase("author") - || name.equalsIgnoreCase("dc.creator")) - { - putRawValue(KEY_AUTHOR, valueO.toString(), rawProperties); - } - else if (name.equalsIgnoreCase("description") || name.equalsIgnoreCase("dc.description")) - { - putRawValue(KEY_DESCRIPTION, valueO.toString(), rawProperties); - } - } - } - - public void handleError(String errorMsg, int pos) - { - } - }; - - String charsetGuess = "UTF-8"; - int tries = 0; - while (tries < 3) - { - rawProperties.clear(); - Reader r = null; - InputStream cis = null; - try - { - cis = reader.getContentInputStream(); - // TODO: for now, use default charset; we should attempt to map from html meta-data - r = new InputStreamReader(cis, charsetGuess); - HTMLEditorKit.Parser parser = new ParserDelegator(); - parser.parse(r, callback, tries > 0); - break; - } - catch (ChangedCharSetException ccse) - { - tries++; - charsetGuess = ccse.getCharSetSpec(); - int begin = charsetGuess.indexOf("charset="); - if (begin > 0) - charsetGuess = charsetGuess.substring(begin + 8, charsetGuess.length()); - reader = reader.getReader(); - } - finally - { - if (r != null) - r.close(); - if (cis != null) - cis.close(); - } - } - // Done - return rawProperties; - } -} diff --git a/repository/src/main/java/org/alfresco/repo/content/metadata/JodConverterMetadataExtracter.java b/repository/src/main/java/org/alfresco/repo/content/metadata/JodConverterMetadataExtracter.java deleted file mode 100644 index 61783333ad..0000000000 --- a/repository/src/main/java/org/alfresco/repo/content/metadata/JodConverterMetadataExtracter.java +++ /dev/null @@ -1,113 +0,0 @@ -/* - * #%L - * Alfresco Repository - * %% - * Copyright (C) 2005 - 2020 Alfresco Software Limited - * %% - * This file is part of the Alfresco software. - * If the software was purchased under a paid Alfresco license, the terms of - * the paid license agreement will prevail. Otherwise, the software is - * provided under the following open source license terms: - * - * Alfresco is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Alfresco is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with Alfresco. If not, see . - * #L% - */ -package org.alfresco.repo.content.metadata; - -import org.alfresco.service.cmr.repository.ContentReader; -import org.alfresco.util.PropertyCheck; - -import java.io.Serializable; -import java.util.Collections; -import java.util.Map; -import java.util.Set; - -/** - * @deprecated OOTB extractors are being moved to T-Engines. - * - * Extracts values from Open Office documents into the following: - *
- *   author:                 --      cm:author
- *   title:                  --      cm:title
- *   description:            --      cm:description
- * 
- * - * @author Neil McErlean - */ -@Deprecated -public class JodConverterMetadataExtracter extends AbstractMappingMetadataExtracter implements OpenOfficeMetadataWorker -{ - private OpenOfficeMetadataWorker worker; - private static final Set typedEmptySet = Collections.emptySet(); - - public JodConverterMetadataExtracter() - { - this(typedEmptySet); - } - - public JodConverterMetadataExtracter(Set supportedMimetypes) - { - super(supportedMimetypes); - } - - public void setWorker(OpenOfficeMetadataWorker worker) - { - this.worker = worker; - } - - @Override - public synchronized void init() - { - PropertyCheck.mandatory("JodConverterMetadataExtracter", "worker", worker); - - // Base initialization - super.init(); - } - - /** - * {@inheritDoc} - */ - public boolean isConnected() - { - return worker.isConnected(); - } - - /** - * Perform the default check, but also check if the OpenOffice connection is good. - */ - @Override - public boolean isSupported(String sourceMimetype) - { - if (!isConnected()) - { - return false; - } - return super.isSupported(sourceMimetype); - } - - /** - * {@inheritDoc} - */ - @Override - public Map extractRaw(ContentReader reader) throws Throwable - { - Map rawProperties = newRawMap(); - Map result = this.worker.extractRaw(reader); - for (Map.Entry entry : result.entrySet()) - { - putRawValue(entry.getKey(), entry.getValue(), rawProperties); - } - return rawProperties; - } -} diff --git a/repository/src/main/java/org/alfresco/repo/content/metadata/JodConverterMetadataExtracterWorker.java b/repository/src/main/java/org/alfresco/repo/content/metadata/JodConverterMetadataExtracterWorker.java deleted file mode 100644 index 0a8a1e1ab0..0000000000 --- a/repository/src/main/java/org/alfresco/repo/content/metadata/JodConverterMetadataExtracterWorker.java +++ /dev/null @@ -1,290 +0,0 @@ -/* - * #%L - * Alfresco Repository - * %% - * Copyright (C) 2005 - 2020 Alfresco Software Limited - * %% - * This file is part of the Alfresco software. - * If the software was purchased under a paid Alfresco license, the terms of - * the paid license agreement will prevail. Otherwise, the software is - * provided under the following open source license terms: - * - * Alfresco is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Alfresco is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with Alfresco. If not, see . - * #L% - */ -package org.alfresco.repo.content.metadata; - -import static org.artofsolving.jodconverter.office.OfficeUtils.SERVICE_DESKTOP; -import static org.artofsolving.jodconverter.office.OfficeUtils.cast; -import static org.artofsolving.jodconverter.office.OfficeUtils.toUrl; - -import java.io.File; -import java.io.Serializable; -import java.util.HashMap; -import java.util.Map; - -import org.alfresco.repo.content.JodConverter; -import org.alfresco.service.cmr.repository.ContentReader; -import org.alfresco.service.cmr.repository.MimetypeService; -import org.alfresco.util.TempFileProvider; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; -import org.artofsolving.jodconverter.office.OfficeContext; -import org.artofsolving.jodconverter.office.OfficeException; -import org.artofsolving.jodconverter.office.OfficeTask; - -import com.sun.star.beans.PropertyValue; -import com.sun.star.beans.UnknownPropertyException; -import com.sun.star.beans.XPropertySet; -import com.sun.star.document.XDocumentInfoSupplier; -import com.sun.star.frame.XComponentLoader; -import com.sun.star.io.IOException; -import com.sun.star.lang.IllegalArgumentException; -import com.sun.star.lang.WrappedTargetException; -import com.sun.star.lang.XComponent; -import com.sun.star.task.ErrorCodeIOException; -import com.sun.star.util.CloseVetoException; -import com.sun.star.util.XCloseable; -import com.sun.star.util.XRefreshable; - -/** - * Extracts values from Open Office documents into the following: - *
- *   author:                 --      cm:author
- *   title:                  --      cm:title
- *   description:            --      cm:description
- * 
- * - * @deprecated The JodConverterMetadataExtracter has not been in use since 6.0.1 - * @deprecated OOTB extractors are being moved to T-Engines. - * - * @author Neil McErlean - */ -@Deprecated -public class JodConverterMetadataExtracterWorker implements - OpenOfficeMetadataWorker -{ - /** Logger */ - private static Log logger = LogFactory.getLog(JodConverterMetadataExtracterWorker.class); - - private JodConverter jodc; - private MimetypeService mimetypeService; - - /* - * @param mimetypeService the mimetype service. Set this if required. - */ - public void setMimetypeService(MimetypeService mimetypeService) - { - this.mimetypeService = mimetypeService; - } - - public void setJodConverter(JodConverter jodc) - { - this.jodc = jodc; - } - - /* - * @see org.alfresco.repo.content.metadata.OpenOfficeMetadataWorker#extractRaw - * (org.alfresco.service.cmr.repository. ContentReader) - */ - public Map extractRaw(ContentReader reader) - throws Throwable - { - String sourceMimetype = reader.getMimetype(); - - if (logger.isDebugEnabled()) - { - StringBuilder msg = new StringBuilder(); - msg.append("Extracting metadata content from ") - .append(sourceMimetype); - logger.debug(msg.toString()); - } - - // create temporary files to convert from and to - File tempFile = TempFileProvider.createTempFile(this.getClass() - .getSimpleName() - + "-", "." + mimetypeService.getExtension(sourceMimetype)); - - // download the content from the source reader - reader.getContent(tempFile); - - ResultsCallback callback = new ResultsCallback(); - jodc.getOfficeManager().execute(new ExtractMetadataOfficeTask(tempFile, callback)); - - return callback.getResults(); - } - - public boolean isConnected() - { - // the JodConverter library ensures that the connection is always there. - // If the extracter is not available then the isAvailable call should ensure that it is not used. - return true; - } -} - -@Deprecated -class ExtractMetadataOfficeTask implements OfficeTask -{ - /* - * These keys are used by Alfresco to map properties into a content model and do need to - * have lower-case initial letters. - */ - private static final String KEY_AUTHOR = "author"; - private static final String KEY_TITLE = "title"; - private static final String KEY_DESCRIPTION = "description"; - - private static Log logger = LogFactory.getLog(ExtractMetadataOfficeTask.class); - private File inputFile; - private ResultsCallback callback; - - public ExtractMetadataOfficeTask(File inputFile, ResultsCallback callback) - { - this.inputFile = inputFile; - this.callback = callback; - } - - public void execute(OfficeContext context) - { - if (logger.isDebugEnabled()) - { - logger.debug("Extracting metadata from file " + inputFile); - } - - XComponent document = null; - try - { - if (!inputFile.exists()) - { - throw new OfficeException("input document not found"); - } - XComponentLoader loader = cast(XComponentLoader.class, context - .getService(SERVICE_DESKTOP)); - - // Need to set the Hidden property to ensure that OOo GUI does not appear. - PropertyValue hiddenOOo = new PropertyValue(); - hiddenOOo.Name = "Hidden"; - hiddenOOo.Value = Boolean.TRUE; - PropertyValue readOnly = new PropertyValue(); - readOnly.Name = "ReadOnly"; - readOnly.Value = Boolean.TRUE; - - try - { - document = loader.loadComponentFromURL(toUrl(inputFile), "_blank", 0, - new PropertyValue[]{hiddenOOo, readOnly}); - } catch (IllegalArgumentException illegalArgumentException) - { - throw new OfficeException("could not load document: " - + inputFile.getName(), illegalArgumentException); - } catch (ErrorCodeIOException errorCodeIOException) - { - throw new OfficeException("could not load document: " - + inputFile.getName() + "; errorCode: " - + errorCodeIOException.ErrCode, errorCodeIOException); - } catch (IOException ioException) - { - throw new OfficeException("could not load document: " - + inputFile.getName(), ioException); - } - if (document == null) - { - throw new OfficeException("could not load document: " - + inputFile.getName()); - } - XRefreshable refreshable = cast(XRefreshable.class, document); - if (refreshable != null) - { - refreshable.refresh(); - } - - XDocumentInfoSupplier docInfoSupplier = cast(XDocumentInfoSupplier.class, document); - XPropertySet propSet = cast(XPropertySet.class, docInfoSupplier.getDocumentInfo()); - - // The strings below are property names as used by OOo. They need upper-case - // initial letters. - Object author = getPropertyValueIfAvailable(propSet, "Author"); - Object description = getPropertyValueIfAvailable(propSet, "Subject"); - Object title = getPropertyValueIfAvailable(propSet, "Title"); - - Map results = new HashMap(3); - results.put(KEY_AUTHOR, author == null ? null : author.toString()); - results.put(KEY_DESCRIPTION, description == null ? null : description.toString()); - results.put(KEY_TITLE, title == null ? null : title.toString()); - callback.setResults(results); - } catch (OfficeException officeException) - { - throw officeException; - } catch (Exception exception) - { - throw new OfficeException("conversion failed", exception); - } finally - { - if (document != null) - { - XCloseable closeable = cast(XCloseable.class, document); - if (closeable != null) - { - try - { - closeable.close(true); - } catch (CloseVetoException closeVetoException) - { - // whoever raised the veto should close the document - } - } else - { - document.dispose(); - } - } - } - } - - /** - * OOo throws exceptions if we ask for properties that aren't there, so we'll tread carefully. - * - * @param propSet - * @param propertyName property name as used by the OOo API. - * @return the propertyValue if it's there, else null. - * @throws UnknownPropertyException - * @throws WrappedTargetException - */ - private Object getPropertyValueIfAvailable(XPropertySet propSet, String propertyName) - throws UnknownPropertyException, WrappedTargetException - { - if (propSet.getPropertySetInfo().hasPropertyByName(propertyName)) - { - return propSet.getPropertyValue(propertyName); - } - else - { - return null; - } - } -} - -@Deprecated -class ResultsCallback -{ - private Map results = new HashMap(); - - public Map getResults() - { - return results; - } - - public void setResults(Map results) - { - this.results = results; - } -} diff --git a/repository/src/main/java/org/alfresco/repo/content/metadata/MP3MetadataExtracter.java b/repository/src/main/java/org/alfresco/repo/content/metadata/MP3MetadataExtracter.java deleted file mode 100644 index 3c05de6085..0000000000 --- a/repository/src/main/java/org/alfresco/repo/content/metadata/MP3MetadataExtracter.java +++ /dev/null @@ -1,113 +0,0 @@ -/* - * #%L - * Alfresco Repository - * %% - * Copyright (C) 2005 - 2020 Alfresco Software Limited - * %% - * This file is part of the Alfresco software. - * If the software was purchased under a paid Alfresco license, the terms of - * the paid license agreement will prevail. Otherwise, the software is - * provided under the following open source license terms: - * - * Alfresco is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Alfresco is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with Alfresco. If not, see . - * #L% - */ -package org.alfresco.repo.content.metadata; - -import java.io.Serializable; -import java.util.ArrayList; -import java.util.Map; - -import org.alfresco.repo.content.MimetypeMap; -import org.apache.tika.metadata.Metadata; -import org.apache.tika.metadata.XMPDM; -import org.apache.tika.parser.Parser; -import org.apache.tika.parser.mp3.Mp3Parser; - -/** - * @deprecated OOTB extractors are being moved to T-Engines. - * - * Extracts the following values from MP3 files: - *
- *   songTitle:              --      cm:title
- *   albumTitle:             --      audio:album
- *   artist:                 --      audio:artist, cm:author
- *   description:            --      cm:description
- *   comment:                --      
- *   yearReleased:           --      audio:releaseDate
- *   trackNumber:            --      audio:trackNumber
- *   genre:                  --      audio:genre
- *   composer:               --      audio:composer
- *   lyrics:                 --      
- * 
- * - * Note - XMPDM metadata keys are also emitted, in common with - * the other Tika powered extracters - * - * Uses Apache Tika - * - * @author Nick Burch - */ -@Deprecated -public class MP3MetadataExtracter extends TikaAudioMetadataExtracter -{ - private static final String KEY_SONG_TITLE = "songTitle"; - private static final String KEY_ALBUM_TITLE = "albumTitle"; - private static final String KEY_ARTIST = "artist"; - private static final String KEY_COMMENT = "comment"; - private static final String KEY_YEAR_RELEASED = "yearReleased"; - private static final String KEY_TRACK_NUMBER = "trackNumber"; - private static final String KEY_GENRE = "genre"; - private static final String KEY_COMPOSER = "composer"; - - public static ArrayList SUPPORTED_MIMETYPES = buildSupportedMimetypes( - new String[] { MimetypeMap.MIMETYPE_MP3 }, - new Mp3Parser() - ); - - public MP3MetadataExtracter() - { - super(SUPPORTED_MIMETYPES); - } - - @Override - protected Parser getParser() - { - return new Mp3Parser(); - } - - @SuppressWarnings("deprecation") - @Override - protected Map extractSpecific(Metadata metadata, - Map properties, Map headers) - { - // Do the normal Audio mappings - super.extractSpecific(metadata, properties, headers); - - // Now do the compatibility ones - // We only need these for people who had pre-existing mapping - // properties from before the proper audio model was added - putRawValue(KEY_ALBUM_TITLE, metadata.get(XMPDM.ALBUM), properties); - putRawValue(KEY_SONG_TITLE, metadata.get(Metadata.TITLE), properties); - putRawValue(KEY_ARTIST, metadata.get(XMPDM.ARTIST), properties); - putRawValue(KEY_COMMENT, metadata.get(XMPDM.LOG_COMMENT), properties); - putRawValue(KEY_TRACK_NUMBER, metadata.get(XMPDM.TRACK_NUMBER), properties); - putRawValue(KEY_GENRE, metadata.get(XMPDM.GENRE), properties); - putRawValue(KEY_YEAR_RELEASED, metadata.get(XMPDM.RELEASE_DATE), properties); - putRawValue(KEY_COMPOSER, metadata.get(XMPDM.COMPOSER), properties); - - // All done - return properties; - } -} diff --git a/repository/src/main/java/org/alfresco/repo/content/metadata/MailMetadataExtracter.java b/repository/src/main/java/org/alfresco/repo/content/metadata/MailMetadataExtracter.java deleted file mode 100644 index 19ac25c3cc..0000000000 --- a/repository/src/main/java/org/alfresco/repo/content/metadata/MailMetadataExtracter.java +++ /dev/null @@ -1,110 +0,0 @@ -/* - * #%L - * Alfresco Repository - * %% - * Copyright (C) 2005 - 2020 Alfresco Software Limited - * %% - * This file is part of the Alfresco software. - * If the software was purchased under a paid Alfresco license, the terms of - * the paid license agreement will prevail. Otherwise, the software is - * provided under the following open source license terms: - * - * Alfresco is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Alfresco is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with Alfresco. If not, see . - * #L% - */ -package org.alfresco.repo.content.metadata; - -import java.io.Serializable; -import java.util.ArrayList; -import java.util.Map; - -import org.alfresco.repo.content.MimetypeMap; -import org.apache.tika.metadata.Metadata; -import org.apache.tika.parser.Parser; -import org.apache.tika.parser.microsoft.OfficeParser; - -/** - * @deprecated OOTB extractors are being moved to T-Engines. - * - * Outlook MAPI format email meta-data extractor extracting the following values: - *
- *   sentDate:               --      cm:sentdate
- *   originator:             --      cm:originator,    cm:author
- *   addressee:              --      cm:addressee
- *   addressees:             --      cm:addressees
- *   subjectLine:            --      cm:subjectline,   cm:description
- *   toNames:                --
- *   ccNames:                --
- *   bccNames:               --
- * 
- * - * TIKA note - to/cc/bcc go into the html part, not the metadata. - * Also, email addresses not included as yet. - * - * @since 2.1 - * @author Kevin Roast - */ -@Deprecated -public class MailMetadataExtracter extends TikaPoweredMetadataExtracter -{ - private static final String KEY_SENT_DATE = "sentDate"; - private static final String KEY_ORIGINATOR = "originator"; - private static final String KEY_ADDRESSEE = "addressee"; - private static final String KEY_ADDRESSEES = "addressees"; - private static final String KEY_SUBJECT = "subjectLine"; - private static final String KEY_TO_NAMES = "toNames"; - private static final String KEY_CC_NAMES = "ccNames"; - private static final String KEY_BCC_NAMES = "bccNames"; - - public static ArrayList SUPPORTED_MIMETYPES = buildSupportedMimetypes( - new String[] {MimetypeMap.MIMETYPE_OUTLOOK_MSG}, - (Parser[])null - ); - - public MailMetadataExtracter() - { - super(SUPPORTED_MIMETYPES); - } - - @Override - protected Parser getParser() - { - // The office parser does Outlook as well as Word, Excel etc - return new OfficeParser(); - } - - @SuppressWarnings("deprecation") - @Override - protected Map extractSpecific(Metadata metadata, - Map properties, Map headers) - { - putRawValue(KEY_ORIGINATOR, metadata.get(Metadata.AUTHOR), properties); - putRawValue(KEY_SUBJECT, metadata.get(Metadata.TITLE), properties); - putRawValue(KEY_DESCRIPTION, metadata.get(Metadata.SUBJECT), properties); - putRawValue(KEY_SENT_DATE, metadata.get(Metadata.LAST_SAVED), properties); - - // Store the TO, but not cc/bcc in the addressee field - putRawValue(KEY_ADDRESSEE, metadata.get(Metadata.MESSAGE_TO), properties); - - // Store each of To, CC and BCC in their own fields - putRawValue(KEY_TO_NAMES, metadata.getValues(Metadata.MESSAGE_TO), properties); - putRawValue(KEY_CC_NAMES, metadata.getValues(Metadata.MESSAGE_CC), properties); - putRawValue(KEY_BCC_NAMES, metadata.getValues(Metadata.MESSAGE_BCC), properties); - - // But store all email addresses (to/cc/bcc) in the addresses field - putRawValue(KEY_ADDRESSEES, metadata.getValues(Metadata.MESSAGE_RECIPIENT_ADDRESS), properties); - - return properties; - } -} diff --git a/repository/src/main/java/org/alfresco/repo/content/metadata/OfficeMetadataExtracter.java b/repository/src/main/java/org/alfresco/repo/content/metadata/OfficeMetadataExtracter.java deleted file mode 100644 index 4d46e524ca..0000000000 --- a/repository/src/main/java/org/alfresco/repo/content/metadata/OfficeMetadataExtracter.java +++ /dev/null @@ -1,143 +0,0 @@ -/* - * #%L - * Alfresco Repository - * %% - * Copyright (C) 2005 - 2020 Alfresco Software Limited - * %% - * This file is part of the Alfresco software. - * If the software was purchased under a paid Alfresco license, the terms of - * the paid license agreement will prevail. Otherwise, the software is - * provided under the following open source license terms: - * - * Alfresco is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Alfresco is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with Alfresco. If not, see . - * #L% - */ -/* - * Copyright (C) 2005 - 2020 Jesper Steen Møller - * - * This file is part of Alfresco - * - * Alfresco is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Alfresco is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with Alfresco. If not, see . - */ -package org.alfresco.repo.content.metadata; - -import java.io.Serializable; -import java.util.ArrayList; -import java.util.Map; - -import org.alfresco.repo.content.MimetypeMap; -import org.apache.tika.metadata.Metadata; -import org.apache.tika.parser.Parser; -import org.apache.tika.parser.microsoft.OfficeParser; - -/** - * @deprecated OOTB extractors are being moved to T-Engines. - * - * Office file format Metadata Extracter. This extracter uses the POI library to extract - * the following: - *
- *   author:             --      cm:author
- *   title:              --      cm:title
- *   subject:            --      cm:description
- *   createDateTime:     --      cm:created
- *   lastSaveDateTime:   --      cm:modified
- *   comments:
- *   editTime:
- *   format:
- *   keywords:
- *   lastAuthor:
- *   lastPrinted:
- *   osVersion:
- *   thumbnail:
- *   pageCount:
- *   wordCount:
- * 
- * - * Uses Apache Tika - * - * @author Derek Hulley - * @author Nick Burch - */ -@Deprecated -public class OfficeMetadataExtracter extends TikaPoweredMetadataExtracter -{ - public static final String KEY_CREATE_DATETIME = "createDateTime"; - public static final String KEY_LAST_SAVE_DATETIME = "lastSaveDateTime"; - public static final String KEY_EDIT_TIME = "editTime"; - public static final String KEY_FORMAT = "format"; - public static final String KEY_KEYWORDS = "keywords"; - public static final String KEY_LAST_AUTHOR = "lastAuthor"; - public static final String KEY_LAST_PRINTED = "lastPrinted"; - public static final String KEY_OS_VERSION = "osVersion"; // TODO - public static final String KEY_THUMBNAIL = "thumbnail"; // TODO - public static final String KEY_PAGE_COUNT = "pageCount"; - public static final String KEY_PARAGRAPH_COUNT = "paragraphCount"; - public static final String KEY_WORD_COUNT = "wordCount"; - - public static ArrayList SUPPORTED_MIMETYPES = buildSupportedMimetypes( - new String[] { - MimetypeMap.MIMETYPE_WORD, - MimetypeMap.MIMETYPE_EXCEL, - MimetypeMap.MIMETYPE_PPT, - MimetypeMap.MIMETYPE_VISIO, - MimetypeMap.MIMETYPE_VISIO_2013 }, - new OfficeParser() - ); - static { - // Outlook has it's own one! - SUPPORTED_MIMETYPES.remove(MimetypeMap.MIMETYPE_OUTLOOK_MSG); - } - - public OfficeMetadataExtracter() - { - super(SUPPORTED_MIMETYPES); - } - - @Override - protected Parser getParser() - { - return new OfficeParser(); - } - - @SuppressWarnings("deprecation") - @Override - protected Map extractSpecific(Metadata metadata, - Map properties, Map headers) - { - putRawValue(KEY_CREATE_DATETIME, metadata.get(Metadata.CREATION_DATE), properties); - putRawValue(KEY_LAST_SAVE_DATETIME, metadata.get(Metadata.LAST_SAVED), properties); - putRawValue(KEY_EDIT_TIME, metadata.get(Metadata.EDIT_TIME), properties); - putRawValue(KEY_FORMAT, metadata.get(Metadata.FORMAT), properties); - putRawValue(KEY_KEYWORDS, metadata.get(Metadata.KEYWORDS), properties); - putRawValue(KEY_LAST_AUTHOR, metadata.get(Metadata.LAST_AUTHOR), properties); - putRawValue(KEY_LAST_PRINTED, metadata.get(Metadata.LAST_PRINTED), properties); -// putRawValue(KEY_OS_VERSION, metadata.get(Metadata.OS_VERSION), properties); -// putRawValue(KEY_THUMBNAIL, metadata.get(Metadata.THUMBNAIL), properties); - putRawValue(KEY_PAGE_COUNT, metadata.get(Metadata.PAGE_COUNT), properties); - putRawValue(KEY_PARAGRAPH_COUNT, metadata.get(Metadata.PARAGRAPH_COUNT), properties); - putRawValue(KEY_WORD_COUNT, metadata.get(Metadata.WORD_COUNT), properties); - return properties; - } -} diff --git a/repository/src/main/java/org/alfresco/repo/content/metadata/OpenDocumentMetadataExtracter.java b/repository/src/main/java/org/alfresco/repo/content/metadata/OpenDocumentMetadataExtracter.java deleted file mode 100644 index 25034754c6..0000000000 --- a/repository/src/main/java/org/alfresco/repo/content/metadata/OpenDocumentMetadataExtracter.java +++ /dev/null @@ -1,182 +0,0 @@ -/* - * #%L - * Alfresco Repository - * %% - * Copyright (C) 2005 - 2020 Alfresco Software Limited - * %% - * This file is part of the Alfresco software. - * If the software was purchased under a paid Alfresco license, the terms of - * the paid license agreement will prevail. Otherwise, the software is - * provided under the following open source license terms: - * - * Alfresco is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Alfresco is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with Alfresco. If not, see . - * #L% - */ -/* - * Copyright (C) 2005 - 2020 Antti Jokipii - * - * This file is part of Alfresco - * - * Alfresco is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Alfresco is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with Alfresco. If not, see . - */ -package org.alfresco.repo.content.metadata; - -import java.io.Serializable; -import java.util.ArrayList; -import java.util.Date; -import java.util.Map; -import java.util.Set; - -import org.alfresco.repo.content.MimetypeMap; -import org.alfresco.service.namespace.QName; -import org.apache.tika.metadata.Metadata; -import org.apache.tika.parser.Parser; -import org.apache.tika.parser.odf.OpenDocumentParser; -import org.joda.time.format.DateTimeFormat; -import org.joda.time.format.DateTimeFormatter; - - -/** - * @deprecated OOTB extractors are being moved to T-Engines. - * - * Metadata extractor for the - * {@link org.alfresco.repo.content.MimetypeMap#MIMETYPE_OPENDOCUMENT_TEXT MIMETYPE_OPENDOCUMENT_XXX} - * mimetypes. - *
- *   creationDate:           --      cm:created
- *   creator:                --      cm:author
- *   date:
- *   description:            --      cm:description
- *   generator:
- *   initialCreator:
- *   keyword:
- *   language:
- *   printDate:
- *   printedBy:
- *   subject:
- *   title:                  --      cm:title
- *   All user properties
- * 
- * - * Uses Apache Tika - * - * TODO decide if we need the few print info bits that - * Tika currently doesn't handle - * - * @author Antti Jokipii - * @author Derek Hulley - */ -@Deprecated -public class OpenDocumentMetadataExtracter extends TikaPoweredMetadataExtracter -{ - private static final String KEY_CREATION_DATE = "creationDate"; - private static final String KEY_CREATOR = "creator"; - private static final String KEY_DATE = "date"; - private static final String KEY_GENERATOR = "generator"; - private static final String KEY_INITIAL_CREATOR = "initialCreator"; - private static final String KEY_KEYWORD = "keyword"; - private static final String KEY_LANGUAGE = "language"; -// private static final String KEY_PRINT_DATE = "printDate"; -// private static final String KEY_PRINTED_BY = "printedBy"; - - private static final String CUSTOM_PREFIX = "custom:"; - - public static ArrayList SUPPORTED_MIMETYPES = buildSupportedMimetypes( - new String[] { - MimetypeMap.MIMETYPE_OPENDOCUMENT_TEXT, - MimetypeMap.MIMETYPE_OPENDOCUMENT_TEXT_TEMPLATE, - MimetypeMap.MIMETYPE_OPENDOCUMENT_GRAPHICS, - MimetypeMap.MIMETYPE_OPENDOCUMENT_GRAPHICS_TEMPLATE, - MimetypeMap.MIMETYPE_OPENDOCUMENT_PRESENTATION, - MimetypeMap.MIMETYPE_OPENDOCUMENT_PRESENTATION_TEMPLATE, - MimetypeMap.MIMETYPE_OPENDOCUMENT_SPREADSHEET, - MimetypeMap.MIMETYPE_OPENDOCUMENT_SPREADSHEET_TEMPLATE, - MimetypeMap.MIMETYPE_OPENDOCUMENT_CHART, - MimetypeMap.MIMETYPE_OPENDOCUMENT_CHART_TEMPLATE, - MimetypeMap.MIMETYPE_OPENDOCUMENT_IMAGE, - MimetypeMap.MIMETYPE_OPENDOCUMENT_IMAGE_TEMPLATE, - MimetypeMap.MIMETYPE_OPENDOCUMENT_FORMULA, - MimetypeMap.MIMETYPE_OPENDOCUMENT_FORMULA_TEMPLATE, - MimetypeMap.MIMETYPE_OPENDOCUMENT_TEXT_MASTER, - MimetypeMap.MIMETYPE_OPENDOCUMENT_TEXT_WEB, - MimetypeMap.MIMETYPE_OPENDOCUMENT_DATABASE - }, new OpenDocumentParser() - ); - - private static final DateTimeFormatter dateFormatter = DateTimeFormat.forPattern("yyyy-MM-dd'T'HH:mm:ss"); - - public OpenDocumentMetadataExtracter() - { - super(SUPPORTED_MIMETYPES); - } - - @Override - protected Parser getParser() - { - return new OpenDocumentParser(); - } - - @SuppressWarnings("deprecation") - @Override - protected Map extractSpecific(Metadata metadata, - Map properties, Map headers) - { - putRawValue(KEY_CREATION_DATE, getDateOrNull(metadata.get(Metadata.CREATION_DATE)), properties); - putRawValue(KEY_CREATOR, metadata.get(Metadata.CREATOR), properties); - putRawValue(KEY_DATE, getDateOrNull(metadata.get(Metadata.DATE)), properties); - putRawValue(KEY_DESCRIPTION, metadata.get(Metadata.DESCRIPTION), properties); - putRawValue(KEY_GENERATOR, metadata.get("generator"), properties); - putRawValue(KEY_INITIAL_CREATOR, metadata.get("initial-creator"), properties); - putRawValue(KEY_KEYWORD, metadata.get(Metadata.KEYWORDS), properties); - putRawValue(KEY_LANGUAGE, metadata.get(Metadata.LANGUAGE), properties); -// putRawValue(KEY_PRINT_DATE, getDateOrNull(metadata.get(Metadata.)), rawProperties); -// putRawValue(KEY_PRINTED_BY, metadata.get(Metadata.), rawProperties); - - // Handle user-defined properties dynamically - Map> mapping = super.getMapping(); - for (String key : mapping.keySet()) - { - if (metadata.get(CUSTOM_PREFIX + key) != null) - { - putRawValue(key, metadata.get(CUSTOM_PREFIX + key), properties); - } - } - - return properties; - } - - private Date getDateOrNull(String dateString) - { - if (dateString != null && dateString.length() != 0) - { - try - { - return dateFormatter.parseDateTime(dateString).toDate(); - } - catch (IllegalArgumentException e) {} - } - return null; - } -} diff --git a/repository/src/main/java/org/alfresco/repo/content/metadata/OpenOfficeMetadataWorker.java b/repository/src/main/java/org/alfresco/repo/content/metadata/OpenOfficeMetadataWorker.java deleted file mode 100644 index 353249e55d..0000000000 --- a/repository/src/main/java/org/alfresco/repo/content/metadata/OpenOfficeMetadataWorker.java +++ /dev/null @@ -1,53 +0,0 @@ -/* - * #%L - * Alfresco Repository - * %% - * Copyright (C) 2005 - 2020 Alfresco Software Limited - * %% - * This file is part of the Alfresco software. - * If the software was purchased under a paid Alfresco license, the terms of - * the paid license agreement will prevail. Otherwise, the software is - * provided under the following open source license terms: - * - * Alfresco is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Alfresco is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with Alfresco. If not, see . - * #L% - */ -package org.alfresco.repo.content.metadata; - -import java.io.Serializable; -import java.util.Map; - -import org.alfresco.service.cmr.repository.ContentReader; - -/** - * @deprecated OOTB extractors are being moved to T-Engines. - * - * An interface that allows separation between the metadata extractor registry and the third party subsystem owning the - * open office connection. - * - * @author dward - */ -@Deprecated -public interface OpenOfficeMetadataWorker -{ - /** - * @return Returns true if a connection to the Uno server could be established - */ - public boolean isConnected(); - - /** - * @see AbstractMappingMetadataExtracter#extractRaw(ContentReader) - */ - public Map extractRaw(ContentReader reader) throws Throwable; -} \ No newline at end of file diff --git a/repository/src/main/java/org/alfresco/repo/content/metadata/PdfBoxMetadataExtracter.java b/repository/src/main/java/org/alfresco/repo/content/metadata/PdfBoxMetadataExtracter.java deleted file mode 100644 index 346fda7e57..0000000000 --- a/repository/src/main/java/org/alfresco/repo/content/metadata/PdfBoxMetadataExtracter.java +++ /dev/null @@ -1,91 +0,0 @@ -/* - * #%L - * Alfresco Repository - * %% - * Copyright (C) 2005 - 2016 Alfresco Software Limited - * %% - * This file is part of the Alfresco software. - * If the software was purchased under a paid Alfresco license, the terms of - * the paid license agreement will prevail. Otherwise, the software is - * provided under the following open source license terms: - * - * Alfresco is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Alfresco is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with Alfresco. If not, see . - * #L% - */ -/* - * Copyright (C) 2005 - 2020 Jesper Steen Møller - * - * This file is part of Alfresco - * - * Alfresco is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Alfresco is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with Alfresco. If not, see . - */ -package org.alfresco.repo.content.metadata; - -import java.util.ArrayList; - -import org.alfresco.repo.content.MimetypeMap; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; -import org.apache.tika.parser.Parser; -import org.apache.tika.parser.pdf.PDFParser; - -/** - * @deprecated OOTB extractors are being moved to T-Engines. - * - * Metadata extractor for the PDF documents. - *
- *   author:                 --      cm:author
- *   title:                  --      cm:title
- *   subject:                --      cm:description
- *   created:                --      cm:created
- *   (custom metadata):      --
- * 
- * - * Uses Apache Tika - * - * @author Jesper Steen Møller - * @author Derek Hulley - */ -@Deprecated -public class PdfBoxMetadataExtracter extends TikaPoweredMetadataExtracter -{ - protected static Log pdfLogger = LogFactory.getLog(PdfBoxMetadataExtracter.class); - - public static ArrayList SUPPORTED_MIMETYPES = buildSupportedMimetypes( - new String[] { MimetypeMap.MIMETYPE_PDF, MimetypeMap.MIMETYPE_APPLICATION_ILLUSTRATOR }, - new PDFParser() - ); - - public PdfBoxMetadataExtracter() - { - super(SUPPORTED_MIMETYPES); - } - - @Override - protected Parser getParser() - { - return new PDFParser(); - } -} diff --git a/repository/src/main/java/org/alfresco/repo/content/metadata/PoiMetadataExtracter.java b/repository/src/main/java/org/alfresco/repo/content/metadata/PoiMetadataExtracter.java deleted file mode 100644 index cdd9ea2655..0000000000 --- a/repository/src/main/java/org/alfresco/repo/content/metadata/PoiMetadataExtracter.java +++ /dev/null @@ -1,79 +0,0 @@ -/* - * #%L - * Alfresco Repository - * %% - * Copyright (C) 2005 - 2020 Alfresco Software Limited - * %% - * This file is part of the Alfresco software. - * If the software was purchased under a paid Alfresco license, the terms of - * the paid license agreement will prevail. Otherwise, the software is - * provided under the following open source license terms: - * - * Alfresco is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Alfresco is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with Alfresco. If not, see . - * #L% - */ -package org.alfresco.repo.content.metadata; - -import java.util.ArrayList; -import java.util.Set; - -import org.alfresco.repo.content.MimetypeMap; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; -import org.apache.tika.parser.Parser; -import org.apache.tika.parser.microsoft.ooxml.OOXMLParser; -import org.springframework.beans.factory.InitializingBean; - -/** - * @deprecated OOTB extractors are being moved to T-Engines. - * - * POI-based metadata extractor for Office 07 documents. - * See http://poi.apache.org/ for information on POI. - *
- *   author:                 --      cm:author
- *   title:                  --      cm:title
- *   subject:                --      cm:description
- *   created:                --      cm:created
- *   Any custom property:    --      [not mapped]
- * 
- * - * Uses Apache Tika - * - * @author Nick Burch - * @author Neil McErlean - * @author Dmitry Velichkevich - */ -@Deprecated -public class PoiMetadataExtracter extends TikaPoweredMetadataExtracter -{ - protected static Log logger = LogFactory.getLog(PoiMetadataExtracter.class); - - public static ArrayList SUPPORTED_MIMETYPES = buildSupportedMimetypes( - new String[] {MimetypeMap.MIMETYPE_OPENXML_WORDPROCESSING, - MimetypeMap.MIMETYPE_OPENXML_SPREADSHEET, - MimetypeMap.MIMETYPE_OPENXML_PRESENTATION}, - new OOXMLParser() - ); - - public PoiMetadataExtracter() - { - super(PoiMetadataExtracter.class.getName(), SUPPORTED_MIMETYPES); - } - - @Override - protected Parser getParser() - { - return new OOXMLParser(); - } -} diff --git a/repository/src/main/java/org/alfresco/repo/content/metadata/RFC822MetadataExtracter.java b/repository/src/main/java/org/alfresco/repo/content/metadata/RFC822MetadataExtracter.java deleted file mode 100644 index d7e82fd4c2..0000000000 --- a/repository/src/main/java/org/alfresco/repo/content/metadata/RFC822MetadataExtracter.java +++ /dev/null @@ -1,216 +0,0 @@ -/* - * #%L - * Alfresco Repository - * %% - * Copyright (C) 2005 - 2020 Alfresco Software Limited - * %% - * This file is part of the Alfresco software. - * If the software was purchased under a paid Alfresco license, the terms of - * the paid license agreement will prevail. Otherwise, the software is - * provided under the following open source license terms: - * - * Alfresco is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Alfresco is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with Alfresco. If not, see . - * #L% - */ -package org.alfresco.repo.content.metadata; - -import java.io.IOException; -import java.io.InputStream; -import java.io.Serializable; -import java.io.UnsupportedEncodingException; -import java.util.Arrays; -import java.util.Date; -import java.util.Enumeration; -import java.util.HashSet; -import java.util.Map; -import java.util.Set; - -import javax.mail.Header; -import javax.mail.internet.InternetAddress; -import javax.mail.internet.MimeMessage; -import javax.mail.internet.MimeUtility; -import javax.mail.internet.MimeMessage.RecipientType; - -import org.alfresco.repo.content.MimetypeMap; -import org.alfresco.service.cmr.repository.ContentReader; -import org.alfresco.service.namespace.QName; - -/** - * @deprecated OOTB extractors are being moved to T-Engines. - * - * Metadata extractor for RFC822 mime emails. - * - * Default configuration: (see RFC822MetadataExtractor.properties) - * - *
- *   messageFrom:              --      imap:messageFrom, cm:originator
- *   messageTo:                --      imap:messageTo
- *   messageCc:                --      imap:messageCc
- *   messageSubject:           --      imap:messageSubject, cm:title, cm:description, cm:subjectline
- *   messageSent:              --      imap:dateSent, cm:sentdate
- *   messageReceived:          --      imap:dateReceived
- *   All {@link Header#getName() header names}:
- *      Thread-Index:          --      imap:threadIndex
- *      Message-ID:            --      imap:messageId
- * 
- * - * @author Derek Hulley - * @since 3.2 - */ -@Deprecated -public class RFC822MetadataExtracter extends AbstractMappingMetadataExtracter -{ - - protected static final String KEY_MESSAGE_FROM = "messageFrom"; - protected static final String KEY_MESSAGE_TO = "messageTo"; - protected static final String KEY_MESSAGE_CC = "messageCc"; - protected static final String KEY_MESSAGE_SUBJECT = "messageSubject"; - protected static final String KEY_MESSAGE_SENT = "messageSent"; - protected static final String KEY_MESSAGE_RECEIVED = "messageReceived"; - - public static String[] SUPPORTED_MIMETYPES = new String[] { MimetypeMap.MIMETYPE_RFC822 }; - - public RFC822MetadataExtracter() - { - super(new HashSet(Arrays.asList(SUPPORTED_MIMETYPES))); - } - - @Override - protected Map extractRaw(ContentReader reader) throws Throwable - { - Map rawProperties = newRawMap(); - - InputStream is = null; - try - { - is = reader.getContentInputStream(); - MimeMessage mimeMessage = new MimeMessage(null, is); - - if (mimeMessage != null) - { - /** - * Extract RFC822 values that doesn't match to headers and need to be encoded. - * Or those special fields that require some code to extract data - */ - String tmp = InternetAddress.toString(mimeMessage.getFrom()); - tmp = tmp != null ? MimeUtility.decodeText(tmp) : null; - putRawValue(KEY_MESSAGE_FROM, tmp, rawProperties); - - tmp = InternetAddress.toString(mimeMessage.getRecipients(RecipientType.TO)); - tmp = tmp != null ? MimeUtility.decodeText(tmp) : null; - putRawValue(KEY_MESSAGE_TO, tmp, rawProperties); - - tmp = InternetAddress.toString(mimeMessage.getRecipients(RecipientType.CC)); - tmp = tmp != null ? MimeUtility.decodeText(tmp) : null; - putRawValue(KEY_MESSAGE_CC, tmp, rawProperties); - - putRawValue(KEY_MESSAGE_SENT, mimeMessage.getSentDate(), rawProperties); - - /** - * Received field from RFC 822 - * - * "Received" ":" ; one per relay - * ["from" domain] ; sending host - * ["by" domain] ; receiving host - * ["via" atom] ; physical path - * ("with" atom) ; link/mail protocol - * ["id" msg-id] ; receiver msg id - * ["for" addr-spec] ; initial form - * ";" date-time ; time received - */ - Date rxDate = mimeMessage.getReceivedDate(); - - if(rxDate != null) - { - // The email implementation extracted the received date for us. - putRawValue(KEY_MESSAGE_RECEIVED, rxDate, rawProperties); - } - else - { - // the email implementation did not parse the received date for us. - String[] rx = mimeMessage.getHeader("received"); - if(rx != null && rx.length > 0) - { - String lastReceived = rx[0]; - lastReceived = MimeUtility.unfold(lastReceived); - int x = lastReceived.lastIndexOf(';'); - if(x > 0) - { - String dateStr = lastReceived.substring(x + 1).trim(); - putRawValue(KEY_MESSAGE_RECEIVED, dateStr, rawProperties); - } - } - } - - String[] subj = mimeMessage.getHeader("Subject"); - if (subj != null && subj.length > 0) - { - String decodedSubject = subj[0]; - try - { - decodedSubject = MimeUtility.decodeText(decodedSubject); - } - catch (UnsupportedEncodingException e) - { - logger.warn(e.toString()); - } - putRawValue(KEY_MESSAGE_SUBJECT, decodedSubject, rawProperties); - } - - /* - * Extract values from all header fields, including extension fields "X-" - */ - Set keys = getMapping().keySet(); - @SuppressWarnings("unchecked") - Enumeration
headers = mimeMessage.getAllHeaders(); - while (headers.hasMoreElements()) - { - Header header = (Header) headers.nextElement(); - if (keys.contains(header.getName())) - { - tmp = header.getValue(); - tmp = tmp != null ? MimeUtility.decodeText(tmp) : null; - - putRawValue(header.getName(), tmp, rawProperties); - } - } - } - } - finally - { - if (is != null) - { - try - { - is.close(); - } - catch (IOException e) - { - } - } - } - // Done - return rawProperties; - } - - /** - * Back door for RM - * @return Map - */ - public final Map> getCurrentMapping() - { - return super.getMapping(); - } - -} diff --git a/repository/src/main/java/org/alfresco/repo/content/metadata/TikaAudioMetadataExtracter.java b/repository/src/main/java/org/alfresco/repo/content/metadata/TikaAudioMetadataExtracter.java deleted file mode 100644 index c2222f604c..0000000000 --- a/repository/src/main/java/org/alfresco/repo/content/metadata/TikaAudioMetadataExtracter.java +++ /dev/null @@ -1,192 +0,0 @@ -/* - * #%L - * Alfresco Repository - * %% - * Copyright (C) 2005 - 2020 Alfresco Software Limited - * %% - * This file is part of the Alfresco software. - * If the software was purchased under a paid Alfresco license, the terms of - * the paid license agreement will prevail. Otherwise, the software is - * provided under the following open source license terms: - * - * Alfresco is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Alfresco is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with Alfresco. If not, see . - * #L% - */ -package org.alfresco.repo.content.metadata; - -import java.io.Serializable; -import java.util.ArrayList; -import java.util.Calendar; -import java.util.Date; -import java.util.Map; - -import org.alfresco.repo.content.MimetypeMap; -import org.apache.tika.config.TikaConfig; -import org.apache.tika.metadata.Metadata; -import org.apache.tika.metadata.XMPDM; -import org.apache.tika.parser.CompositeParser; -import org.apache.tika.parser.Parser; -import org.apache.tika.parser.mp4.MP4Parser; -import org.gagravarr.tika.FlacParser; -import org.gagravarr.tika.VorbisParser; - -/** - * @deprecated OOTB extractors are being moved to T-Engines. - * - * A Metadata Extractor which makes use of the Apache - * Tika Audio Parsers to extract metadata from your - * media files. - * For backwards compatibility reasons, this doesn't - * handle the MP3 format, which has its own dedicated - * extractor in {@link MP3MetadataExtracter} - - *
- *   author:                 --      cm:author
- *   title:                  --      cm:title
- *   created:                --      cm:created
- *   xmpDM:artist            --      audio:artist
- *   xmpDM:composer          --      audio:composer
- *   xmpDM:engineer          --      audio:engineer
- *   xmpDM:genre             --      audio:genre
- *   xmpDM:trackNumber       --      audio:trackNumber
- *   xmpDM:releaseDate       --      audio:releaseDate
- * 
- * - * @since 4.0 - * @author Nick Burch - */ -@Deprecated -public class TikaAudioMetadataExtracter extends TikaPoweredMetadataExtracter -{ - protected static final String KEY_LYRICS = "lyrics"; - - // The Audio related parsers we use - private static Parser[] parsers = new Parser[] { - new VorbisParser(), - new FlacParser(), - new MP4Parser() - }; - // The explicit mimetypes we support (plus any others from the parsers) - public static ArrayList SUPPORTED_MIMETYPES = buildSupportedMimetypes( - new String[] { - MimetypeMap.MIMETYPE_VORBIS, MimetypeMap.MIMETYPE_FLAC, - MimetypeMap.MIMETYPE_AUDIO_MP4, - }, parsers - ); - - protected TikaConfig tikaConfig; - public void setTikaConfig(TikaConfig tikaConfig) - { - this.tikaConfig = tikaConfig; - } - - public TikaAudioMetadataExtracter() - { - this(SUPPORTED_MIMETYPES); - } - public TikaAudioMetadataExtracter(ArrayList supportedMimeTypes) - { - super(supportedMimeTypes); - } - - @Override - protected Parser getParser() - { - return new CompositeParser( - tikaConfig.getMediaTypeRegistry(), parsers - ); - } - - @Override - protected Map extractSpecific(Metadata metadata, - Map properties, Map headers) - { - // Most things can go with the default Tika -> Alfresco Mapping - // Handle the few special cases here - - // The description is special - putRawValue(KEY_DESCRIPTION, generateDescription(metadata), properties); - - // The release date can be fiddly - Date releaseDate = generateReleaseDate(metadata); - putRawValue(KEY_CREATED, releaseDate, properties); - putRawValue(XMPDM.RELEASE_DATE.getName(), releaseDate, properties); - - // TODO Get the Lyrics from the content - //putRawValue(KEY_LYRICS, getLyrics(), properties); - - // All done - return properties; - } - - /** - * Generates the release date - */ - private Date generateReleaseDate(Metadata metadata) - { - String date = metadata.get(XMPDM.RELEASE_DATE); - if(date == null || date.length() == 0) - { - return null; - } - - // Is it just a year? - if(date.matches("\\d\\d\\d\\d")) - { - // Just a year, we need a full date - // Go for the 1st of the 1st - Calendar c = Calendar.getInstance(); - c.set( - Integer.parseInt(date), Calendar.JANUARY, 1, - 0, 0, 0 - ); - c.set(Calendar.MILLISECOND, 0); - return c.getTime(); - } - - // Treat as a normal date - return makeDate(date); - } - - /** - * Generate the description - * - * @param metadata the metadata extracted from the file - * @return the description - */ - @SuppressWarnings("deprecation") - private String generateDescription(Metadata metadata) - { - StringBuilder result = new StringBuilder(); - if (metadata.get(Metadata.TITLE) != null) - { - result.append(metadata.get(Metadata.TITLE)); - if (metadata.get(XMPDM.ALBUM) != null) - { - result - .append(" - ") - .append(metadata.get(XMPDM.ALBUM)); - } - if (metadata.get(XMPDM.ARTIST) != null) - { - result - .append(" (") - .append(metadata.get(XMPDM.ARTIST)) - .append(")"); - } - } - - return result.toString(); - } -} diff --git a/repository/src/main/java/org/alfresco/repo/content/metadata/TikaAutoMetadataExtracter.java b/repository/src/main/java/org/alfresco/repo/content/metadata/TikaAutoMetadataExtracter.java deleted file mode 100644 index 63c1ad441a..0000000000 --- a/repository/src/main/java/org/alfresco/repo/content/metadata/TikaAutoMetadataExtracter.java +++ /dev/null @@ -1,139 +0,0 @@ -/* - * #%L - * Alfresco Repository - * %% - * Copyright (C) 2005 - 2020 Alfresco Software Limited - * %% - * This file is part of the Alfresco software. - * If the software was purchased under a paid Alfresco license, the terms of - * the paid license agreement will prevail. Otherwise, the software is - * provided under the following open source license terms: - * - * Alfresco is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Alfresco is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with Alfresco. If not, see . - * #L% - */ -package org.alfresco.repo.content.metadata; - -import java.io.Serializable; -import java.util.ArrayList; -import java.util.Map; - -import org.alfresco.repo.content.MimetypeMap; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; -import org.apache.tika.config.TikaConfig; -import org.apache.tika.metadata.Metadata; -import org.apache.tika.metadata.TIFF; -import org.apache.tika.mime.MediaType; -import org.apache.tika.parser.AutoDetectParser; -import org.apache.tika.parser.Parser; - -/** - * @deprecated OOTB extractors are being moved to T-Engines. - * - * A Metadata Extractor which makes use of the Apache - * Tika auto-detection to select the best parser - * to extract the metadata from your document. - * This will be used for all files which Tika can - * handle, but where no other more explicit - * extractor is defined. - - *
- *   author:                 --      cm:author
- *   title:                  --      cm:title
- *   subject:                --      cm:description
- *   created:                --      cm:created
- *   comments:
- *   geo:lat:                --      cm:latitude
- *   geo:long:               --      cm:longitude
- * 
- * - * @since 3.4 - * @author Nick Burch - */ -@Deprecated -public class TikaAutoMetadataExtracter extends TikaPoweredMetadataExtracter -{ - protected static Log logger = LogFactory.getLog(TikaAutoMetadataExtracter.class); - private static AutoDetectParser parser; - private static TikaConfig config; - private static String EXIF_IMAGE_HEIGHT_TAG = "Exif SubIFD:Exif Image Height"; - private static String EXIF_IMAGE_WIDTH_TAG = "Exif SubIFD:Exif Image Width"; - private static String JPEG_IMAGE_HEIGHT_TAG = "Image Height"; - private static String JPEG_IMAGE_WIDTH_TAG = "Image Width"; - - public static ArrayList SUPPORTED_MIMETYPES; - private static ArrayList buildMimeTypes(TikaConfig tikaConfig) - { - config = tikaConfig; - parser = new AutoDetectParser(config); - - SUPPORTED_MIMETYPES = new ArrayList(); - for(MediaType mt : parser.getParsers().keySet()) - { - // Add the canonical mime type - SUPPORTED_MIMETYPES.add( mt.toString() ); - - // And add any aliases of the mime type too - Alfresco uses some - // non canonical forms of various mimetypes, so we need all of them - for(MediaType alias : config.getMediaTypeRegistry().getAliases(mt)) - { - SUPPORTED_MIMETYPES.add( alias.toString() ); - } - } - return SUPPORTED_MIMETYPES; - } - - public TikaAutoMetadataExtracter(TikaConfig tikaConfig) - { - super( buildMimeTypes(tikaConfig) ); - } - - /** - * Does auto-detection to select the best Tika - * Parser. - */ - @Override - protected Parser getParser() - { - return parser; - } - - /** - * Because some editors use JPEG_IMAGE_HEIGHT_TAG when - * saving JPEG images , a more reliable source for - * image size are the values provided by Tika - * and not the exif/tiff metadata read from the file - * This will override the tiff:Image size - * which gets embedded into the alfresco node properties - * for jpeg files that contain such exif information - */ - @Override - protected Map extractSpecific(Metadata metadata, - Map properties, Map headers) - { - if (MimetypeMap.MIMETYPE_IMAGE_JPEG.equals(metadata.get(Metadata.CONTENT_TYPE))) - { - //check if the image has exif information - if (metadata.get(EXIF_IMAGE_WIDTH_TAG) != null && metadata.get(EXIF_IMAGE_HEIGHT_TAG) != null) - { - //replace the exif size properties that will be embedded in the node with - //the guessed dimensions from Tika - putRawValue(TIFF.IMAGE_LENGTH.getName(), extractSize(metadata.get(JPEG_IMAGE_HEIGHT_TAG)), properties); - putRawValue(TIFF.IMAGE_WIDTH.getName(), extractSize(metadata.get(JPEG_IMAGE_WIDTH_TAG)), properties); - } - } - return properties; - } -} diff --git a/repository/src/main/java/org/alfresco/repo/content/metadata/TikaPoweredMetadataExtracter.java b/repository/src/main/java/org/alfresco/repo/content/metadata/TikaPoweredMetadataExtracter.java deleted file mode 100644 index 0c89f63cfe..0000000000 --- a/repository/src/main/java/org/alfresco/repo/content/metadata/TikaPoweredMetadataExtracter.java +++ /dev/null @@ -1,662 +0,0 @@ -/* - * #%L - * Alfresco Repository - * %% - * Copyright (C) 2005 - 2020 Alfresco Software Limited - * %% - * This file is part of the Alfresco software. - * If the software was purchased under a paid Alfresco license, the terms of - * the paid license agreement will prevail. Otherwise, the software is - * provided under the following open source license terms: - * - * Alfresco is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Alfresco is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with Alfresco. If not, see . - * #L% - */ -package org.alfresco.repo.content.metadata; - -import java.io.IOException; -import java.io.InputStream; -import java.io.OutputStream; -import java.io.Serializable; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Date; -import java.util.HashMap; -import java.util.HashSet; -import java.util.LinkedHashSet; -import java.util.Locale; -import java.util.Map; -import java.util.Set; - -import org.alfresco.api.AlfrescoPublicApi; -import org.alfresco.repo.content.MimetypeMap; -import org.alfresco.repo.content.filestore.FileContentReader; -import org.alfresco.service.cmr.repository.ContentReader; -import org.alfresco.service.cmr.repository.ContentWriter; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; -import org.apache.tika.embedder.Embedder; -import org.apache.tika.extractor.DocumentSelector; -import org.apache.tika.io.TemporaryResources; -import org.apache.tika.io.TikaInputStream; -import org.apache.tika.metadata.Metadata; -import org.apache.tika.mime.MediaType; -import org.apache.tika.parser.ParseContext; -import org.apache.tika.parser.Parser; -import org.apache.tika.sax.ContentHandlerDecorator; -import org.apache.tika.sax.XHTMLContentHandler; -import org.apache.tika.sax.xpath.Matcher; -import org.apache.tika.sax.xpath.MatchingContentHandler; -import org.apache.tika.sax.xpath.XPathParser; -import org.joda.time.DateTimeZone; -import org.joda.time.format.DateTimeFormat; -import org.joda.time.format.DateTimeFormatter; -import org.joda.time.format.DateTimeFormatterBuilder; -import org.joda.time.format.DateTimeParser; -import org.xml.sax.Attributes; -import org.xml.sax.ContentHandler; -import org.xml.sax.Locator; -import org.xml.sax.SAXException; - - -/** - * @deprecated extractors have been moved to a T-Engine. - * - * The parent of all Metadata Extractors which use - * Apache Tika under the hood. This handles all the - * common parts of processing the files, and the common - * mappings. Individual extractors extend from this - * to do custom mappings. - - *
- *   author:                 --      cm:author
- *   title:                  --      cm:title
- *   subject:                --      cm:description
- *   created:                --      cm:created
- *   comments:
- * 
- * - * @since 3.4 - * @author Nick Burch - */ -@AlfrescoPublicApi -@Deprecated -public abstract class TikaPoweredMetadataExtracter - extends AbstractMappingMetadataExtracter - implements MetadataEmbedder -{ - protected static Log logger = LogFactory.getLog(TikaPoweredMetadataExtracter.class); - - protected static final String KEY_AUTHOR = "author"; - protected static final String KEY_TITLE = "title"; - protected static final String KEY_SUBJECT = "subject"; - protected static final String KEY_CREATED = "created"; - protected static final String KEY_DESCRIPTION = "description"; - protected static final String KEY_COMMENTS = "comments"; - protected static final String KEY_TAGS = "dc:subject"; - - private DateTimeFormatter tikaUTCDateFormater; - private DateTimeFormatter tikaDateFormater; - protected DocumentSelector documentSelector; - - private String extractorContext = null; - - private String metadataSeparator = ","; // Default separator. - - public String getMetadataSeparator() - { - return metadataSeparator; - } - - public void setMetadataSeparator(String metadataSeparator) - { - this.metadataSeparator = metadataSeparator; - } - - /** - * Builds up a list of supported mime types by merging - * an explicit list with any that Tika also claims to support - */ - protected static ArrayList buildSupportedMimetypes(String[] explicitTypes, Parser... tikaParsers) - { - ArrayList types = new ArrayList(); - for(String type : explicitTypes) - { - if(!types.contains(type)) - { - types.add(type); - } - } - if(tikaParsers != null) - { - for(Parser tikaParser : tikaParsers) - { - for(MediaType mt : tikaParser.getSupportedTypes(new ParseContext())) - { - String type = mt.toString(); - if(!types.contains(type)) - { - types.add(type); - } - } - } - } - return types; - } - - public TikaPoweredMetadataExtracter(String extractorContext, ArrayList supportedMimeTypes) - { - this(extractorContext, new HashSet(supportedMimeTypes), null); - } - - public TikaPoweredMetadataExtracter(ArrayList supportedMimeTypes) - { - this(null, new HashSet(supportedMimeTypes), null); - } - - public TikaPoweredMetadataExtracter(ArrayList supportedMimeTypes, ArrayList supportedEmbedMimeTypes) - { - this(null, new HashSet(supportedMimeTypes), new HashSet(supportedEmbedMimeTypes)); - } - - public TikaPoweredMetadataExtracter(HashSet supportedMimeTypes) - { - this(null, supportedMimeTypes, null); - } - - public TikaPoweredMetadataExtracter(HashSet supportedMimeTypes, HashSet supportedEmbedMimeTypes) - { - this(null, supportedMimeTypes, supportedEmbedMimeTypes); - } - - public TikaPoweredMetadataExtracter(String extractorContext, HashSet supportedMimeTypes, HashSet supportedEmbedMimeTypes) - { - super(supportedMimeTypes, supportedEmbedMimeTypes); - - this.extractorContext = extractorContext; - - // TODO Once TIKA-451 is fixed this list will get nicer - DateTimeParser[] parsersUTC = { - DateTimeFormat.forPattern("yyyy-MM-dd'T'HH:mm:ss'Z'").getParser(), - DateTimeFormat.forPattern("yyyy-MM-dd'T'HH:mm:ssZ").getParser() - }; - DateTimeParser[] parsers = { - DateTimeFormat.forPattern("yyyy-MM-dd'T'HH:mm:ss").getParser(), - DateTimeFormat.forPattern("yyyy-MM-dd").getParser(), - DateTimeFormat.forPattern("yyyy/MM/dd HH:mm:ss").getParser(), - DateTimeFormat.forPattern("yyyy/MM/dd").getParser(), - DateTimeFormat.forPattern("EEE MMM dd hh:mm:ss zzz yyyy").getParser() - }; - - this.tikaUTCDateFormater = new DateTimeFormatterBuilder().append(null, parsersUTC).toFormatter().withZone(DateTimeZone.UTC); - this.tikaDateFormater = new DateTimeFormatterBuilder().append(null, parsers).toFormatter(); - } - - /** - * Gets context for the current implementation - * - * @return {@link String} value which determines current context - */ - protected String getExtractorContext() - { - return extractorContext; - } - - /** - * Version which also tries the ISO-8601 formats (in order..), - * and similar formats, which Tika makes use of - */ - @Override - protected Date makeDate(String dateStr) - { - // Try our formats first, in order - try - { - return this.tikaUTCDateFormater.parseDateTime(dateStr).toDate(); - } - catch (IllegalArgumentException e) {} - - try - { - return this.tikaUTCDateFormater.withLocale(Locale.US).parseDateTime(dateStr).toDate(); - } - catch (IllegalArgumentException e) {} - - try - { - return this.tikaDateFormater.parseDateTime(dateStr).toDate(); - } - catch (IllegalArgumentException e) {} - - try - { - return this.tikaDateFormater.withLocale(Locale.US).parseDateTime(dateStr).toDate(); - } - catch (IllegalArgumentException e) {} - - // Fall back to the normal ones - return super.makeDate(dateStr); - } - - /** - * Returns the correct Tika Parser to process the document. - * If you don't know which you want, use {@link TikaAutoMetadataExtracter} - * which makes use of the Tika auto-detection. - */ - protected abstract Parser getParser(); - - /** - * Returns the Tika Embedder to modify - * the document. - * - * @return the Tika embedder - */ - protected Embedder getEmbedder() - { - // TODO make this an abstract method once more extracters support embedding - return null; - } - - /** - * Do we care about the contents of the - * extracted header, or nothing at all? - */ - protected boolean needHeaderContents() - { - return false; - } - - /** - * Allows implementation specific mappings to be done. - */ - protected Map extractSpecific(Metadata metadata, - Map properties, Map headers) - { - return properties; - } - - /** - * There seems to be some sort of issue with some downstream - * 3rd party libraries, and input streams that come from - * a {@link ContentReader}. This happens most often with - * JPEG and Tiff files. - * For these cases, buffer out to a local file if not - * already there - */ - protected InputStream getInputStream(ContentReader reader) throws IOException - { - // Prefer the File if available, it's generally quicker - if(reader instanceof FileContentReader) - { - return TikaInputStream.get( ((FileContentReader)reader).getFile() ); - } - - // Grab the InputStream for the Content - InputStream input = reader.getContentInputStream(); - - // Images currently always require a file - if(MimetypeMap.MIMETYPE_IMAGE_JPEG.equals(reader.getMimetype()) || - MimetypeMap.MIMETYPE_IMAGE_TIFF.equals(reader.getMimetype())) - { - TemporaryResources tmp = new TemporaryResources(); - TikaInputStream stream = TikaInputStream.get(input, tmp); - stream.getFile(); // Have it turned into File backed - return stream; - } - else - { - // The regular Content InputStream should be fine - return input; - } - } - - /** - * Sets the document selector, used for determining whether to parse embedded resources. - * - * @param documentSelector - */ - public void setDocumentSelector(DocumentSelector documentSelector) - { - this.documentSelector = documentSelector; - } - /** - * Gets the document selector, used for determining whether to parse embedded resources, - * null by default so parse all. - * - * @param metadata - * @param targetMimeType - * @return the document selector - */ - protected DocumentSelector getDocumentSelector(Metadata metadata, String targetMimeType) - { - return documentSelector; - } - - /** - * By default returns a new ParseContent - * - * @param metadata - * @param sourceMimeType - * @return the parse context - */ - protected ParseContext buildParseContext(Metadata metadata, String sourceMimeType) - { - ParseContext context = new ParseContext(); - DocumentSelector selector = getDocumentSelector(metadata, sourceMimeType); - if (selector != null) - { - context.set(DocumentSelector.class, selector); - } - return context; - } - - @SuppressWarnings("deprecation") - @Override - protected Map extractRaw(ContentReader reader) throws Throwable - { - Map rawProperties = newRawMap(); - - InputStream is = null; - - try - { - is = getInputStream(reader); - Parser parser = getParser(); - - Metadata metadata = new Metadata(); - metadata.add(Metadata.CONTENT_TYPE, reader.getMimetype()); - - ParseContext context = buildParseContext(metadata, reader.getMimetype()); - - ContentHandler handler; - Map headers = null; - if(needHeaderContents()) - { - MapCaptureContentHandler headerCapture = - new MapCaptureContentHandler(); - headers = headerCapture.tags; - handler = new HeadContentHandler(headerCapture); - } - else - { - handler = new NullContentHandler(); - } - - parser.parse(is, handler, metadata, context); - - // First up, copy all the Tika metadata over - // This allows people to map any of the Tika - // keys onto their own content model - for(String tikaKey : metadata.names()) - { - // TODO review this change (part of MNT-15267) - should we really force string concatenation here !? - putRawValue(tikaKey, getMetadataValue(metadata, tikaKey), rawProperties); - } - - // Now, map the common Tika metadata keys onto - // the common Alfresco metadata keys. This allows - // existing mapping properties files to continue - // to work without needing any changes - - // The simple ones - putRawValue(KEY_AUTHOR, getMetadataValue(metadata, Metadata.AUTHOR), rawProperties); - putRawValue(KEY_TITLE, getMetadataValue(metadata, Metadata.TITLE), rawProperties); - putRawValue(KEY_COMMENTS, getMetadataValue(metadata, Metadata.COMMENTS), rawProperties); - - // Tags - putRawValue(KEY_TAGS, getMetadataValues(metadata, KEY_TAGS), rawProperties); - - // Get the subject and description, despite things not - // being nearly as consistent as one might hope - String subject = getMetadataValue(metadata, Metadata.SUBJECT); - String description = getMetadataValue(metadata, Metadata.DESCRIPTION); - if(subject != null && description != null) - { - putRawValue(KEY_DESCRIPTION, description, rawProperties); - putRawValue(KEY_SUBJECT, subject, rawProperties); - } - else if(subject != null) - { - putRawValue(KEY_DESCRIPTION, subject, rawProperties); - putRawValue(KEY_SUBJECT, subject, rawProperties); - } - else if(description != null) - { - putRawValue(KEY_DESCRIPTION, description, rawProperties); - putRawValue(KEY_SUBJECT, description, rawProperties); - } - - // Try for the dates two different ways too - if(metadata.get(Metadata.CREATION_DATE) != null) - { - putRawValue(KEY_CREATED, metadata.get(Metadata.CREATION_DATE), rawProperties); - } - else if(metadata.get(Metadata.DATE) != null) - { - putRawValue(KEY_CREATED, metadata.get(Metadata.DATE), rawProperties); - } - - // If people created a specific instance - // (eg OfficeMetadataExtractor), then allow that - // instance to map the Tika keys onto its - // existing namespace so that older properties - // files continue to map correctly - rawProperties = extractSpecific(metadata, rawProperties, headers); - } - finally - { - if (is != null) - { - try { is.close(); } catch (IOException e) {} - } - } - - return rawProperties; - } - - @Override - protected void embedInternal(Map properties, ContentReader reader, ContentWriter writer) throws Throwable - { - Embedder embedder = getEmbedder(); - if (embedder == null) - { - return; - } - - Map metadataAsStrings = convertMetadataToStrings(properties); - Metadata metadataToEmbed = new Metadata(); - metadataAsStrings.forEach((k,v)->metadataToEmbed.add(k, v)); - - InputStream inputStream = getInputStream(reader); - OutputStream outputStream = writer.getContentOutputStream(); - embedder.embed(metadataToEmbed, inputStream, outputStream, null); - } - - private Serializable getMetadataValues(Metadata metadata, String key) - { - // Use Set to prevent duplicates. - Set valuesSet = new LinkedHashSet(); - String[] values = metadata.getValues(key); - - for (int i = 0; i < values.length; i++) - { - String[] parts = values[i].split(metadataSeparator); - - for (String subPart : parts) - { - valuesSet.add(subPart.trim()); - } - } - - Object[] objArrayValues = valuesSet.toArray(); - values = Arrays.copyOf(objArrayValues, objArrayValues.length, String[].class); - - return values.length == 0 ? null : (values.length == 1 ? values[0] : values); - } - - private String getMetadataValue(Metadata metadata, String key) - { - if (metadata.isMultiValued(key)) - { - String[] parts = metadata.getValues(key); - - // use Set to prevent duplicates - Set value = new LinkedHashSet(parts.length); - - for (int i = 0; i < parts.length; i++) - { - value.add(parts[i]); - } - - String valueStr = value.toString(); - - // remove leading/trailing braces [] - return valueStr.substring(1, valueStr.length() - 1); - } - else - { - return metadata.get(key); - } - } - - /** - * Exif metadata for size also returns the string "pixels" - * after the number value , this function will - * stop at the first non digit character found in the text - * @param sizeText string text - * @return the size value - */ - protected String extractSize(String sizeText) - { - StringBuilder sizeValue = new StringBuilder(); - for(char c : sizeText.toCharArray()) - { - if(Character.isDigit(c)) - { - sizeValue.append(c); - } - else - { - break; - } - } - return sizeValue.toString(); - } - - /** - * This content handler will capture entries from within - * the header of the Tika content XHTML, but ignore the - * rest. - */ - protected static class HeadContentHandler extends ContentHandlerDecorator - { - /** - * XHTML XPath parser. - */ - private static final XPathParser PARSER = - new XPathParser("xhtml", XHTMLContentHandler.XHTML); - - /** - * The XPath matcher used to select the XHTML body contents. - */ - private static final Matcher MATCHER = - PARSER.parse("/xhtml:html/xhtml:head/descendant:node()"); - - /** - * Creates a content handler that passes all XHTML body events to the - * given underlying content handler. - * - * @param handler content handler - */ - protected HeadContentHandler(ContentHandler handler) - { - super(new MatchingContentHandler(handler, MATCHER)); - } - } - /** - * This content handler will grab all tags and attributes, - * and record the textual content of the last seen one - * of them. - * Normally only used with {@link HeadContentHandler} - */ - protected static class MapCaptureContentHandler implements ContentHandler - { - protected Map tags = new HashMap(); - private StringBuffer text; - - public void characters(char[] ch, int start, int len) - { - if(text != null) - { - text.append(ch, start, len); - } - } - - public void endElement(String namespace, String localname, String qname) - { - if(text != null && text.length() > 0) - { - tags.put(qname, text.toString()); - } - text = null; - } - - public void startElement(String namespace, String localname, String qname, Attributes attrs) - { - for(int i=0; i. - * #L% - */ -package org.alfresco.repo.content.metadata; - -import java.util.ArrayList; -import java.util.HashSet; - -import org.alfresco.api.AlfrescoPublicApi; -import org.alfresco.error.AlfrescoRuntimeException; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; -import org.apache.tika.mime.MediaType; -import org.apache.tika.parser.ParseContext; -import org.apache.tika.parser.Parser; - -/** - * @deprecated OOTB extractors are being moved to T-Engines. - * - * A Metadata Extractor which makes use of Apache Tika, - * and allows the selection of the Tika parser to be - * sprung-in to extract the metadata from your document. - * This is typically used with custom Tika Parsers. - - *
- *   author:                 --      cm:author
- *   title:                  --      cm:title
- *   subject:                --      cm:description
- *   created:                --      cm:created
- *   comments:
- *   geo:lat:                --      cm:latitude
- *   geo:long:               --      cm:longitude
- * 
- * - * @since 3.4 - * @author Nick Burch - */ -@AlfrescoPublicApi -@Deprecated -public class TikaSpringConfiguredMetadataExtracter extends TikaPoweredMetadataExtracter -{ - protected static Log logger = LogFactory.getLog(TikaSpringConfiguredMetadataExtracter.class); - - private Parser tikaParser; - private String tikaParserClassName; - private Class tikaParserClass; - - /** - * Injects the name of the Tika parser to use - * @param className - */ - @SuppressWarnings("unchecked") - public void setTikaParserName(String className) - { - tikaParserClassName = className; - - // Load the class - try { - tikaParserClass = (Class)Class.forName(tikaParserClassName); - setTikaParser(getParser()); - } catch(ClassNotFoundException e) { - throw new AlfrescoRuntimeException("Specified Tika Parser '" + tikaParserClassName + "' not found"); - } - } - - /** - * Injects the Tika parser to use - * @param tikaParser - */ - public void setTikaParser(Parser tikaParser) - { - this.tikaParser = tikaParser; - - // Build the mime types, updating the copy our parent - // holds for us as we go along - ArrayList mimetypes = new ArrayList(); - for(MediaType mt : tikaParser.getSupportedTypes(new ParseContext())) - { - mimetypes.add( mt.toString() ); - } - super.setSupportedMimetypes(mimetypes); - } - - public TikaSpringConfiguredMetadataExtracter() - { - super(new HashSet()); - } - - /** - * Returns the Tika parser - */ - protected Parser getParser() - { - // If we were given a whole parser, return it - if(tikaParser != null) - return tikaParser; - - // Otherwise create a new one - try { - return tikaParserClass.newInstance(); - } catch (InstantiationException e) { - throw new AlfrescoRuntimeException("Unable to create specified Parser", e); - } catch (IllegalAccessException e) { - throw new AlfrescoRuntimeException("Unable to create specified Parser", e); - } - } -} diff --git a/repository/src/main/java/org/alfresco/repo/content/transform/LocalTransformServiceRegistry.java b/repository/src/main/java/org/alfresco/repo/content/transform/LocalTransformServiceRegistry.java index ca791d4852..d74c1a103e 100644 --- a/repository/src/main/java/org/alfresco/repo/content/transform/LocalTransformServiceRegistry.java +++ b/repository/src/main/java/org/alfresco/repo/content/transform/LocalTransformServiceRegistry.java @@ -2,7 +2,7 @@ * #%L * Alfresco Repository * %% - * Copyright (C) 2019 Alfresco Software Limited + * Copyright (C) 2019 - 2020 Alfresco Software Limited * %% * This file is part of the Alfresco software. * If the software was purchased under a paid Alfresco license, the terms of @@ -491,6 +491,10 @@ public class LocalTransformServiceRegistry extends TransformServiceRegistryImpl public LocalTransform getLocalTransform(String sourceMimetype, long sourceSizeInBytes, String targetMimetype, Map actualOptions, String renditionName) { + if (!enabled) + { + return null; + } String name = findTransformerName(sourceMimetype, sourceSizeInBytes, targetMimetype, actualOptions, renditionName); LocalData data = getData(); Map localTransforms = data.localTransforms; diff --git a/repository/src/main/resources/alfresco/content-services-context.xml b/repository/src/main/resources/alfresco/content-services-context.xml index 95648c0748..e9ffb4d638 100644 --- a/repository/src/main/resources/alfresco/content-services-context.xml +++ b/repository/src/main/resources/alfresco/content-services-context.xml @@ -289,48 +289,7 @@ - - - - - - - - - - - - - - - - - - - ${content.metadataExtracter.pdf.overwritePolicy} - - - - - - - - - - - - EEE, d MMM yyyy HH:mm:ss Z - EEE, d MMM yy HH:mm:ss Z - d MMM yyyy HH:mm:ss Z - - - - - - - - - + @@ -596,7 +555,7 @@ - + @@ -608,31 +567,6 @@ - - - - - - - extracter.worker.JodConverter - - - - org.alfresco.repo.content.metadata.OpenOfficeMetadataWorker - - - - - - - - - - - diff --git a/repository/src/main/resources/alfresco/subsystems/OOoJodconverter/default/jodconverter-context.xml b/repository/src/main/resources/alfresco/subsystems/OOoJodconverter/default/jodconverter-context.xml index ac1235ce85..b726fb1df8 100644 --- a/repository/src/main/resources/alfresco/subsystems/OOoJodconverter/default/jodconverter-context.xml +++ b/repository/src/main/resources/alfresco/subsystems/OOoJodconverter/default/jodconverter-context.xml @@ -40,15 +40,6 @@ - - - - - - - - - diff --git a/repository/src/test/java/org/alfresco/MiscContextTestSuite.java b/repository/src/test/java/org/alfresco/MiscContextTestSuite.java index 3d2c6a843d..02b7723873 100644 --- a/repository/src/test/java/org/alfresco/MiscContextTestSuite.java +++ b/repository/src/test/java/org/alfresco/MiscContextTestSuite.java @@ -72,37 +72,24 @@ import org.springframework.context.ApplicationContext; org.alfresco.repo.content.transform.AppleIWorksContentTransformerTest.class, org.alfresco.repo.content.transform.ArchiveContentTransformerTest.class, - // Metadata tests - org.alfresco.repo.content.metadata.DWGMetadataExtracterTest.class, - org.alfresco.repo.content.metadata.HtmlMetadataExtracterTest.class, - org.alfresco.repo.content.metadata.MailMetadataExtracterTest.class, - org.alfresco.repo.content.metadata.MP3MetadataExtracterTest.class, - org.alfresco.repo.content.metadata.OfficeMetadataExtracterTest.class, - org.alfresco.repo.content.metadata.OpenDocumentMetadataExtracterTest.class, - org.alfresco.repo.content.metadata.JodMetadataExtractorOOoTest.class, - org.alfresco.repo.content.metadata.PdfBoxMetadataExtracterTest.class, - org.alfresco.repo.content.metadata.ConcurrencyPdfBoxMetadataExtracterTest.class, - org.alfresco.repo.content.metadata.PoiMetadataExtracterTest.class, - org.alfresco.repo.content.metadata.RFC822MetadataExtracterTest.class, - org.alfresco.repo.content.metadata.TikaAutoMetadataExtracterTest.class, - + // Metadata tests - replaced with simplified test in LocalRenditionTest and ServiceRenditionTest org.alfresco.repo.content.metadata.MappingMetadataExtracterTest.class, - // ---------------------------------------------------------------------- - // Transformer/Rendition contexts - // - // The following tests can be extracted in a separate test suite - // if/when we decide to move the transformations in a separate component - // ---------------------------------------------------------------------- + // ---------------------------------------------------------------------- + // Transformer/Rendition contexts + // + // The following tests can be extracted in a separate test suite + // if/when we decide to move the transformations in a separate component + // ---------------------------------------------------------------------- - // [classpath:alfresco/application-context.xml, classpath:org/alfresco/repo/thumbnail/test-thumbnail-context.xml] - // some tests fail locally - on windows - org.alfresco.repo.thumbnail.ThumbnailServiceImplTest.class, + // [classpath:alfresco/application-context.xml, classpath:org/alfresco/repo/thumbnail/test-thumbnail-context.xml] + // some tests fail locally - on windows + org.alfresco.repo.thumbnail.ThumbnailServiceImplTest.class, - // [classpath:/test/alfresco/test-renditions-context.xml, classpath:alfresco/application-context.xml, - // classpath:alfresco/test/global-integration-test-context.xml] - // this does NOT passes locally - org.alfresco.repo.rendition.RenditionServicePermissionsTest.class, + // [classpath:/test/alfresco/test-renditions-context.xml, classpath:alfresco/application-context.xml, + // classpath:alfresco/test/global-integration-test-context.xml] + // this does NOT passes locally + org.alfresco.repo.rendition.RenditionServicePermissionsTest.class, // ---------------------------------------------------------------------- // Misc contexts diff --git a/repository/src/test/java/org/alfresco/filesys/repo/ContentDiskDriverTest.java b/repository/src/test/java/org/alfresco/filesys/repo/ContentDiskDriverTest.java index beabbb28d6..4b92643f0a 100644 --- a/repository/src/test/java/org/alfresco/filesys/repo/ContentDiskDriverTest.java +++ b/repository/src/test/java/org/alfresco/filesys/repo/ContentDiskDriverTest.java @@ -2,7 +2,7 @@ * #%L * Alfresco Repository * %% - * Copyright (C) 2005 - 2016 Alfresco Software Limited + * Copyright (C) 2005 - 2020 Alfresco Software Limited * %% * This file is part of the Alfresco software. * If the software was purchased under a paid Alfresco license, the terms of @@ -2175,7 +2175,7 @@ public class ContentDiskDriverTest extends TestCase try { - tran.doInTransaction(deleteGarbageDirCB); + transactionService.getRetryingTransactionHelper().doInTransaction(deleteGarbageDirCB); } catch (Exception e) { @@ -2205,8 +2205,8 @@ public class ContentDiskDriverTest extends TestCase } - }; - tran.doInTransaction(createTestDirCB); + }; + transactionService.getRetryingTransactionHelper().doInTransaction(createTestDirCB); logger.debug("Create rule on test dir"); RetryingTransactionCallback createRuleCB = new RetryingTransactionCallback() { @@ -2235,7 +2235,7 @@ public class ContentDiskDriverTest extends TestCase compAction.addActionCondition(noCondition2); rule.setAction(compAction); - + ruleService.saveRule(testContext.testDirNodeRef, rule); logger.debug("rule created"); @@ -2243,7 +2243,7 @@ public class ContentDiskDriverTest extends TestCase return null; } }; - tran.doInTransaction(createRuleCB, false, true); + transactionService.getRetryingTransactionHelper().doInTransaction(createRuleCB, false, true); /** * Create a file in the test directory @@ -2272,7 +2272,7 @@ public class ContentDiskDriverTest extends TestCase return null; } }; - tran.doInTransaction(createFileCB, false, true); + transactionService.getRetryingTransactionHelper().doInTransaction(createFileCB, false, true); logger.debug("step b: write content to test file"); @@ -2294,16 +2294,16 @@ public class ContentDiskDriverTest extends TestCase return null; } }; - tran.doInTransaction(writeFileCB, false, true); - + transactionService.getRetryingTransactionHelper().doInTransaction(writeFileCB, false, true); + + Thread.sleep(3000); // Need to wait for the async extract logger.debug("Step c: validate metadata has been extracted."); /** * c: check simple case of meta-data extraction has worked. */ - RetryingTransactionCallback validateFirstExtractionCB = new RetryingTransactionCallback() { - - @Override + transactionService.getRetryingTransactionHelper().doInTransaction(new RetryingTransactionHelper.RetryingTransactionCallback() + { public Void execute() throws Throwable { Map props = nodeService.getProperties(testContext.testNodeRef); @@ -2318,14 +2318,11 @@ public class ContentDiskDriverTest extends TestCase assertEquals("description is not correct", "This is a test file", nodeService.getProperty(testContext.testNodeRef, ContentModel.PROP_DESCRIPTION)); assertEquals("title is not correct", "ContentDiskDriverTest", nodeService.getProperty(testContext.testNodeRef, ContentModel.PROP_TITLE)); assertEquals("author is not correct", "mrogers", nodeService.getProperty(testContext.testNodeRef, ContentModel.PROP_AUTHOR)); - - - + return null; } - }; - tran.doInTransaction(validateFirstExtractionCB, false, true); - + }); + /** * d: Save the new file as an update file in the test directory @@ -2346,7 +2343,7 @@ public class ContentDiskDriverTest extends TestCase return null; } }; - tran.doInTransaction(createUpdateFileCB, false, true); + transactionService.getRetryingTransactionHelper().doInTransaction(createUpdateFileCB, false, true); RetryingTransactionCallback writeFile2CB = new RetryingTransactionCallback() { @@ -2379,7 +2376,7 @@ public class ContentDiskDriverTest extends TestCase return null; } }; - tran.doInTransaction(writeFile2CB, false, true); + transactionService.getRetryingTransactionHelper().doInTransaction(writeFile2CB, false, true); /** * rename the old file @@ -2394,7 +2391,7 @@ public class ContentDiskDriverTest extends TestCase return null; } }; - tran.doInTransaction(renameOldFileCB, false, true); + transactionService.getRetryingTransactionHelper().doInTransaction(renameOldFileCB, false, true); /** * Check the old file has gone. @@ -2416,7 +2413,7 @@ public class ContentDiskDriverTest extends TestCase return null; } }; - tran.doInTransaction(validateOldFileGoneCB, false, true); + transactionService.getRetryingTransactionHelper().doInTransaction(validateOldFileGoneCB, false, true); // /** // * Check metadata extraction on intermediate new file @@ -2439,7 +2436,7 @@ public class ContentDiskDriverTest extends TestCase // } // }; // -// tran.doInTransaction(validateIntermediateCB, true, true); +// transactionService.getRetryingTransactionHelper().doInTransaction(validateIntermediateCB, true, true); /** * Move the new file into place, stuff should get shuffled @@ -2454,8 +2451,8 @@ public class ContentDiskDriverTest extends TestCase return null; } }; - - tran.doInTransaction(moveNewFileCB, false, true); + + transactionService.getRetryingTransactionHelper().doInTransaction(moveNewFileCB, false, true); logger.debug("validate update has run correctly."); RetryingTransactionCallback validateUpdateCB = new RetryingTransactionCallback() { @@ -2482,12 +2479,12 @@ public class ContentDiskDriverTest extends TestCase return null; } }; - - tran.doInTransaction(validateUpdateCB, true, true); + + transactionService.getRetryingTransactionHelper().doInTransaction(validateUpdateCB, true, true); } // testScenarioShuffleMetadataExtraction - - + + /** * ALF-12812 * @@ -2698,8 +2695,10 @@ public class ContentDiskDriverTest extends TestCase }; tran.doInTransaction(moveNewFileCB, false, true); - - logger.debug("Step c: validate metadata has been extracted."); + + Thread.sleep(3000); // Need to wait for async extract + + logger.debug("Step c: validate metadata has been extracted."); /** * c: check simple case of meta-data extraction has worked. */ @@ -2732,7 +2731,7 @@ public class ContentDiskDriverTest extends TestCase }; tran.doInTransaction(validateFirstExtractionCB, false, true); - + } // testScenarioMetadataExtractionForMac public void testDirListing()throws Exception diff --git a/repository/src/test/java/org/alfresco/repo/action/ActionServiceImpl2Test.java b/repository/src/test/java/org/alfresco/repo/action/ActionServiceImpl2Test.java index fc8ef71347..9c7e3b8988 100644 --- a/repository/src/test/java/org/alfresco/repo/action/ActionServiceImpl2Test.java +++ b/repository/src/test/java/org/alfresco/repo/action/ActionServiceImpl2Test.java @@ -2,7 +2,7 @@ * #%L * Alfresco Repository * %% - * Copyright (C) 2005 - 2016 Alfresco Software Limited + * Copyright (C) 2005 - 2020 Alfresco Software Limited * %% * This file is part of the Alfresco software. * If the software was purchased under a paid Alfresco license, the terms of @@ -26,6 +26,7 @@ package org.alfresco.repo.action; +import static java.lang.Thread.sleep; import static junit.framework.Assert.assertEquals; import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertTrue; @@ -351,7 +352,16 @@ public class ActionServiceImpl2Test Action action = actionService.createAction(ContentMetadataExtracter.EXECUTOR_NAME); // Execute the action actionService.executeAction(action, testNode); + return null; + } + }); + Thread.sleep(3000); // Need to wait for the async extract + + transactionHelper.doInTransaction(new RetryingTransactionCallback() + { + public Void execute() throws Throwable + { assertEquals("Gym class featuring a brown fox and lazy dog", nodeService.getProperty(testNode, ContentModel.PROP_DESCRIPTION)); return null; diff --git a/repository/src/test/java/org/alfresco/repo/action/executer/ContentMetadataEmbedderTest.java b/repository/src/test/java/org/alfresco/repo/action/executer/ContentMetadataEmbedderTest.java index 5c41c6c663..d5e493616a 100644 --- a/repository/src/test/java/org/alfresco/repo/action/executer/ContentMetadataEmbedderTest.java +++ b/repository/src/test/java/org/alfresco/repo/action/executer/ContentMetadataEmbedderTest.java @@ -93,8 +93,6 @@ public class ContentMetadataEmbedderTest extends BaseSpringTest this.dictionaryService = (DictionaryService) this.applicationContext.getBean("dictionaryService"); this.mimetypeService = (MimetypeService) this.applicationContext.getBean("mimetypeService"); this.metadataExtracterRegistry = (MetadataExtracterRegistry) this.applicationContext.getBean("metadataExtracterRegistry"); - metadataExtracterRegistry.setAsyncExtractEnabled(false); - metadataExtracterRegistry.setAsyncEmbedEnabled(false); AuthenticationComponent authenticationComponent = (AuthenticationComponent)applicationContext.getBean("authenticationComponent"); authenticationComponent.setSystemUserAsCurrentUser(); @@ -124,13 +122,6 @@ public class ContentMetadataEmbedderTest extends BaseSpringTest this.executer.setApplicableTypes(new String[] { ContentModel.TYPE_CONTENT.toString() }); } - @After - public void after() - { - metadataExtracterRegistry.setAsyncExtractEnabled(true); - metadataExtracterRegistry.setAsyncEmbedEnabled(true); - } - /** * Test that a failing embedder does not destroy the original content */ diff --git a/repository/src/test/java/org/alfresco/repo/action/executer/ContentMetadataExtracterTagMappingTest.java b/repository/src/test/java/org/alfresco/repo/action/executer/ContentMetadataExtracterTagMappingTest.java index a7f46c3bc7..559bad289c 100644 --- a/repository/src/test/java/org/alfresco/repo/action/executer/ContentMetadataExtracterTagMappingTest.java +++ b/repository/src/test/java/org/alfresco/repo/action/executer/ContentMetadataExtracterTagMappingTest.java @@ -143,8 +143,6 @@ public class ContentMetadataExtracterTagMappingTest extends TestCase this.nodeService = (NodeService) ctx.getBean("NodeService"); this.contentService = (ContentService) ctx.getBean("ContentService"); this.metadataExtracterRegistry = (MetadataExtracterRegistry) ctx.getBean("metadataExtracterRegistry"); - metadataExtracterRegistry.setAsyncExtractEnabled(false); - metadataExtracterRegistry.setAsyncEmbedEnabled(false); this.transactionService = (TransactionService)ctx.getBean("transactionComponent"); this.auditService = (AuditService)ctx.getBean("auditService"); @@ -209,9 +207,6 @@ public class ContentMetadataExtracterTagMappingTest extends TestCase @Override protected void tearDown() throws Exception { - metadataExtracterRegistry.setAsyncExtractEnabled(true); - metadataExtracterRegistry.setAsyncEmbedEnabled(true); - if (AlfrescoTransactionSupport.getTransactionReadState() != TxnReadState.TXN_NONE) { fail("Test is not transaction-safe. Fix up transaction handling and re-test."); diff --git a/repository/src/test/java/org/alfresco/repo/action/executer/ContentMetadataExtracterTest.java b/repository/src/test/java/org/alfresco/repo/action/executer/ContentMetadataExtracterTest.java index 1e62162c18..ea7c779831 100644 --- a/repository/src/test/java/org/alfresco/repo/action/executer/ContentMetadataExtracterTest.java +++ b/repository/src/test/java/org/alfresco/repo/action/executer/ContentMetadataExtracterTest.java @@ -47,10 +47,10 @@ import org.alfresco.model.ContentModel; import org.alfresco.repo.action.ActionImpl; import org.alfresco.repo.content.MimetypeMap; import org.alfresco.repo.content.metadata.AbstractMappingMetadataExtracter; -import org.alfresco.repo.content.metadata.AsynchronousExtractor; import org.alfresco.repo.content.metadata.MetadataExtracterRegistry; import org.alfresco.repo.content.transform.AbstractContentTransformerTest; import org.alfresco.repo.security.authentication.AuthenticationComponent; +import org.alfresco.repo.transaction.RetryingTransactionHelper; import org.alfresco.service.cmr.repository.ContentReader; import org.alfresco.service.cmr.repository.ContentService; import org.alfresco.service.cmr.repository.ContentWriter; @@ -59,13 +59,14 @@ import org.alfresco.service.cmr.repository.NodeService; import org.alfresco.service.cmr.repository.StoreRef; import org.alfresco.service.namespace.NamespaceService; import org.alfresco.service.namespace.QName; +import org.alfresco.service.transaction.TransactionService; import org.alfresco.test_category.BaseSpringTestsCategory; import org.alfresco.util.BaseSpringTest; import org.alfresco.util.GUID; -import org.junit.After; import org.junit.Before; import org.junit.Test; import org.junit.experimental.categories.Category; +import org.springframework.test.context.transaction.TestTransaction; import org.springframework.transaction.annotation.Transactional; import java.io.Serializable; @@ -91,6 +92,7 @@ public class ContentMetadataExtracterTest extends BaseSpringTest private NodeService nodeService; private ContentService contentService; private MetadataExtracterRegistry registry; + private TransactionService transactionService; private StoreRef testStoreRef; private NodeRef rootNodeRef; private NodeRef nodeRef; @@ -105,8 +107,7 @@ public class ContentMetadataExtracterTest extends BaseSpringTest this.nodeService = (NodeService) this.applicationContext.getBean("nodeService"); this.contentService = (ContentService) this.applicationContext.getBean("contentService"); registry = (MetadataExtracterRegistry) applicationContext.getBean("metadataExtracterRegistry"); - registry.setAsyncExtractEnabled(false); - registry.setAsyncEmbedEnabled(false); + transactionService = (TransactionService) this.applicationContext.getBean("transactionService"); AuthenticationComponent authenticationComponent = (AuthenticationComponent)applicationContext.getBean("authenticationComponent"); authenticationComponent.setSystemUserAsCurrentUser(); @@ -132,18 +133,11 @@ public class ContentMetadataExtracterTest extends BaseSpringTest this.executer = (ContentMetadataExtracter) this.applicationContext.getBean("extract-metadata"); } - @After - public void after() - { - registry.setAsyncExtractEnabled(true); - registry.setAsyncEmbedEnabled(true); - } - /** * Test execution of the extraction itself */ @Test - public void testFromBlanks() + public void testFromBlanks() throws Exception { // Test that the action writes properties when they don't exist or are // unset @@ -156,15 +150,34 @@ public class ContentMetadataExtracterTest extends BaseSpringTest // be handled this.nodeService.setProperties(this.nodeRef, props); - // Execute the action - ActionImpl action = new ActionImpl(null, ID, SetPropertyValueActionExecuter.NAME, null); + // Make the nodeRef visible to other transactions as it will need to be in async requests + TestTransaction.flagForCommit(); + TestTransaction.end(); - this.executer.execute(action, this.nodeRef); + // Execute the action + transactionService.getRetryingTransactionHelper().doInTransaction(new RetryingTransactionHelper.RetryingTransactionCallback() + { + public Void execute() throws Throwable + { + ActionImpl action = new ActionImpl(null, ID, SetPropertyValueActionExecuter.NAME, null); + executer.execute(action, nodeRef); + return null; + } + }); + + Thread.sleep(3000); // Need to wait for the async extract // Check that the properties have been set - assertEquals(QUICK_TITLE, this.nodeService.getProperty(this.nodeRef, ContentModel.PROP_TITLE)); - assertEquals(QUICK_DESCRIPTION, this.nodeService.getProperty(this.nodeRef, ContentModel.PROP_DESCRIPTION)); - assertEquals(QUICK_CREATOR, this.nodeService.getProperty(this.nodeRef, ContentModel.PROP_AUTHOR)); + transactionService.getRetryingTransactionHelper().doInTransaction(new RetryingTransactionHelper.RetryingTransactionCallback() + { + public Void execute() throws Throwable + { + assertEquals(QUICK_TITLE, nodeService.getProperty(nodeRef, ContentModel.PROP_TITLE)); + assertEquals(QUICK_DESCRIPTION, nodeService.getProperty(nodeRef, ContentModel.PROP_DESCRIPTION)); + assertEquals(QUICK_CREATOR, nodeService.getProperty(nodeRef, ContentModel.PROP_AUTHOR)); + return null; + } + }); } private static final QName PROP_UNKNOWN_1 = QName.createQName(NamespaceService.CONTENT_MODEL_1_0_URI, "unkown1"); @@ -304,7 +317,7 @@ public class ContentMetadataExtracterTest extends BaseSpringTest * Test execution of the pragmatic approach */ @Test - public void testFromPartial() + public void testFromPartial() throws Exception { // Test that the action does not overwrite properties that are already // set @@ -318,16 +331,34 @@ public class ContentMetadataExtracterTest extends BaseSpringTest props.remove(ContentModel.PROP_DESCRIPTION); // Allow this baby this.nodeService.setProperties(this.nodeRef, props); + // Make the nodeRef visible to other transactions as it will need to be in async requests + TestTransaction.flagForCommit(); + TestTransaction.end(); + // Execute the action - ActionImpl action = new ActionImpl(null, ID, SetPropertyValueActionExecuter.NAME, null); + transactionService.getRetryingTransactionHelper().doInTransaction(new RetryingTransactionHelper.RetryingTransactionCallback() + { + public Void execute() throws Throwable + { + ActionImpl action = new ActionImpl(null, ID, SetPropertyValueActionExecuter.NAME, null); + executer.execute(action, nodeRef); + return null; + } + }); - this.executer.execute(action, this.nodeRef); + Thread.sleep(3000); // Need to wait for the async extract - // Check that the properties have been preserved - assertEquals(myTitle, this.nodeService.getProperty(this.nodeRef, ContentModel.PROP_TITLE)); - assertEquals(myCreator, this.nodeService.getProperty(this.nodeRef, ContentModel.PROP_AUTHOR)); + // Check that the properties have been preserved, but that description has been set + transactionService.getRetryingTransactionHelper().doInTransaction(new RetryingTransactionHelper.RetryingTransactionCallback() + { + public Void execute() throws Throwable + { + assertEquals(myTitle, nodeService.getProperty(nodeRef, ContentModel.PROP_TITLE)); + assertEquals(myCreator, nodeService.getProperty(nodeRef, ContentModel.PROP_AUTHOR)); - // But this one should have been set - assertEquals(QUICK_DESCRIPTION, this.nodeService.getProperty(this.nodeRef, ContentModel.PROP_DESCRIPTION)); + assertEquals(QUICK_DESCRIPTION, nodeService.getProperty(nodeRef, ContentModel.PROP_DESCRIPTION)); + return null; + } + }); } } diff --git a/repository/src/test/java/org/alfresco/repo/content/metadata/ConcurrencyOfficeMetadataExtracterTest.java b/repository/src/test/java/org/alfresco/repo/content/metadata/ConcurrencyOfficeMetadataExtracterTest.java deleted file mode 100644 index 182b4f1b06..0000000000 --- a/repository/src/test/java/org/alfresco/repo/content/metadata/ConcurrencyOfficeMetadataExtracterTest.java +++ /dev/null @@ -1,82 +0,0 @@ -/* - * #%L - * Alfresco Repository - * %% - * Copyright (C) 2005 - 2020 Alfresco Software Limited - * %% - * This file is part of the Alfresco software. - * If the software was purchased under a paid Alfresco license, the terms of - * the paid license agreement will prevail. Otherwise, the software is - * provided under the following open source license terms: - * - * Alfresco is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Alfresco is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with Alfresco. If not, see . - * #L% - */ -package org.alfresco.repo.content.metadata; - -import static org.junit.Assert.assertEquals; - -import java.util.ArrayList; -import java.util.Date; -import java.util.List; -import java.util.concurrent.Callable; -import java.util.concurrent.ExecutorService; -import java.util.concurrent.Executors; -import java.util.concurrent.Future; - -import org.joda.time.format.DateTimeFormat; -import org.junit.Test; - -/** - * @deprecated OOTB extractors are being moved to T-Engines. - * - * MNT-8978 - */ -@Deprecated -public class ConcurrencyOfficeMetadataExtracterTest -{ - - private OfficeMetadataExtracter extracter = new OfficeMetadataExtracter(); - - private final Date testDate = DateTimeFormat.forPattern("yyyy-MM-dd").parseDateTime("2010-10-22").toDate(); - - @Test - public void testDateFormatting() throws Exception - { - Callable task = new Callable() - { - public Date call() throws Exception - { - return extracter.makeDate("2010-10-22"); - } - }; - - // pool with 5 threads - ExecutorService exec = Executors.newFixedThreadPool(5); - List> results = new ArrayList>(); - - // perform 10 date conversions - for (int i = 0; i < 10; i++) - { - results.add(exec.submit(task)); - } - exec.shutdown(); - - for (Future result : results) - { - assertEquals(testDate, result.get()); - } - } - -} diff --git a/repository/src/test/java/org/alfresco/repo/content/metadata/ConcurrencyPdfBoxMetadataExtracterTest.java b/repository/src/test/java/org/alfresco/repo/content/metadata/ConcurrencyPdfBoxMetadataExtracterTest.java deleted file mode 100644 index 70e6f6e696..0000000000 --- a/repository/src/test/java/org/alfresco/repo/content/metadata/ConcurrencyPdfBoxMetadataExtracterTest.java +++ /dev/null @@ -1,156 +0,0 @@ -/* - * #%L - * Alfresco Repository - * %% - * Copyright (C) 2005 - 2020 Alfresco Software Limited - * %% - * This file is part of the Alfresco software. - * If the software was purchased under a paid Alfresco license, the terms of - * the paid license agreement will prevail. Otherwise, the software is - * provided under the following open source license terms: - * - * Alfresco is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Alfresco is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with Alfresco. If not, see . - * #L% - */ -package org.alfresco.repo.content.metadata; - -import org.alfresco.repo.content.MimetypeMap; -import org.alfresco.service.cmr.repository.ContentReader; -import org.alfresco.service.namespace.QName; - -import java.io.Serializable; -import java.util.HashMap; -import java.util.Map; -import java.util.concurrent.ConcurrentHashMap; - -/** - * @deprecated OOTB extractors are being moved to T-Engines. - * - * The test designed for testing the concurrent limitations in - * {@link AbstractMappingMetadataExtracter#extractRaw(ContentReader, MetadataExtracterLimits)} - * - * @author amukha - */ -@Deprecated -public class ConcurrencyPdfBoxMetadataExtracterTest extends AbstractMetadataExtracterTest -{ - private SlowPdfBoxMetadataExtracter extracter; - - private static final int MAX_CONCURENT_EXTRACTIONS = 5; - private static final double MAX_DOC_SIZE_MB = 0.03; - private static final int NUMBER_OF_CONCURRENT_THREADS = 11; - - @Override - public void setUp() throws Exception - { - super.setUp(); - extracter = new SlowPdfBoxMetadataExtracter(); - extracter.setDictionaryService(dictionaryService); - - MetadataExtracterLimits pdfLimit = new MetadataExtracterLimits(); - pdfLimit.setMaxConcurrentExtractionsCount(MAX_CONCURENT_EXTRACTIONS); - pdfLimit.setMaxDocumentSizeMB(MAX_DOC_SIZE_MB); - Map limits = new HashMap<>(); - limits.put(MimetypeMap.MIMETYPE_PDF,pdfLimit); - - extracter.setMimetypeLimits(limits); - extracter.setDelay(30*NUMBER_OF_CONCURRENT_THREADS); - extracter.register(); - } - - /** - * @return Returns the same transformer regardless - it is allowed - */ - protected MetadataExtracter getExtracter() - { - return extracter; - } - - - protected void testFileSpecificMetadata(String mimetype, Map properties) - { - // not required - } - - - public void testConcurrentExtractions() throws InterruptedException - { - final Map threadResults = new ConcurrentHashMap<>(); - for (int i = 0; i < NUMBER_OF_CONCURRENT_THREADS; i++) - { - new Thread(new Runnable() - { - @Override - public void run() - { - System.out.println(Thread.currentThread().getName() + " started " + System.currentTimeMillis()); - try - { - Map results = extractFromMimetype(MimetypeMap.MIMETYPE_PDF); - System.out.println(Thread.currentThread().getName() + " results are " + results); - threadResults.put(Thread.currentThread().getName(), !results.isEmpty()); - } - catch (Exception e) - { - e.printStackTrace(); - } - System.out.println(Thread.currentThread().getName() + " finished " + System.currentTimeMillis()); - } - - }).start(); - } - int numWaits = NUMBER_OF_CONCURRENT_THREADS*10; - while (numWaits > 0) - { - Thread.sleep(50); - if (threadResults.size() == NUMBER_OF_CONCURRENT_THREADS) - { - break; - } - numWaits--; - } - Map counted = new HashMap<>(); - counted.put(Boolean.FALSE, 0); - counted.put(Boolean.TRUE, 0); - for (Boolean result : threadResults.values()) - { - counted.put(result, counted.get(result)+1); - } - assertEquals("Wrong number of failed extractions.", - new Integer(NUMBER_OF_CONCURRENT_THREADS - MAX_CONCURENT_EXTRACTIONS), - counted.get(Boolean.FALSE)); - assertEquals("Wrong number of successful extractions.", - new Integer(MAX_CONCURENT_EXTRACTIONS), - counted.get(Boolean.TRUE)); - } - - private class SlowPdfBoxMetadataExtracter extends PdfBoxMetadataExtracter - { - private long delay = 0; - - public void setDelay(long delay) - { - this.delay = delay; - } - - @Override - protected Map extractRaw(ContentReader reader) throws Throwable - { - Thread.sleep(delay); - Map results = super.extractRaw(reader); - System.out.println(Thread.currentThread().getName() + " results are " + results); - return results; - } - } -} diff --git a/repository/src/test/java/org/alfresco/repo/content/metadata/DWGMetadataExtracterTest.java b/repository/src/test/java/org/alfresco/repo/content/metadata/DWGMetadataExtracterTest.java deleted file mode 100644 index dea0b50a6f..0000000000 --- a/repository/src/test/java/org/alfresco/repo/content/metadata/DWGMetadataExtracterTest.java +++ /dev/null @@ -1,181 +0,0 @@ -/* - * #%L - * Alfresco Repository - * %% - * Copyright (C) 2005 - 2020 Alfresco Software Limited - * %% - * This file is part of the Alfresco software. - * If the software was purchased under a paid Alfresco license, the terms of - * the paid license agreement will prevail. Otherwise, the software is - * provided under the following open source license terms: - * - * Alfresco is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Alfresco is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with Alfresco. If not, see . - * #L% - */ -package org.alfresco.repo.content.metadata; - -import java.io.File; -import java.io.Serializable; -import java.net.URL; -import java.util.HashMap; -import java.util.HashSet; -import java.util.Map; -import java.util.Set; - -import org.alfresco.model.ContentModel; -import org.alfresco.repo.content.MimetypeMap; -import org.alfresco.repo.content.transform.AbstractContentTransformerTest; -import org.alfresco.service.cmr.repository.datatype.DefaultTypeConverter; -import org.alfresco.service.namespace.QName; -import org.apache.tika.metadata.Metadata; - - -/** - * @deprecated OOTB extractors are being moved to T-Engines. - * - * @see DWGMetadataExtracter - * - * @author Nick Burch - */ -@Deprecated -public class DWGMetadataExtracterTest extends AbstractMetadataExtracterTest -{ - private DWGMetadataExtracter extracter; - private static final QName TIKA_LAST_AUTHOR_TEST_PROPERTY = - QName.createQName("TikaLastAuthorTestProp"); - private static final QName TIKA_CUSTOM_TEST_PROPERTY = - QName.createQName("TikaCustomTestProp"); - private static final String TIKA_CUSTOM_KEY = "customprop1"; - - @SuppressWarnings("deprecation") - @Override - public void setUp() throws Exception - { - super.setUp(); - extracter = new DWGMetadataExtracter(); - extracter.setDictionaryService(dictionaryService); - extracter.register(); - - // Attach some extra mappings, using the Tika - // metadata keys namespace - // These will be tested later - HashMap> newMap = new HashMap>( - extracter.getMapping() - ); - - Set tlaSet = new HashSet(); - tlaSet.add(TIKA_LAST_AUTHOR_TEST_PROPERTY); - Set custSet = new HashSet(); - custSet.add(TIKA_CUSTOM_TEST_PROPERTY); - newMap.put( Metadata.LAST_AUTHOR, tlaSet ); - newMap.put( TIKA_CUSTOM_KEY, custSet ); - - extracter.setMapping(newMap); - } - - /** - * @return Returns the same transformer regardless - it is allowed - */ - protected MetadataExtracter getExtracter() - { - return extracter; - } - - public void testSupports() throws Exception - { - for (String mimetype : DWGMetadataExtracter.SUPPORTED_MIMETYPES) - { - boolean supports = extracter.isSupported(mimetype); - assertTrue("Mimetype should be supported: " + mimetype, supports); - } - } - - /** - * Test all the supported files. - * Note - doesn't use extractFromMimetype - */ - public void testSupportedMimetypes() throws Exception - { - String mimetype = MimetypeMap.MIMETYPE_APP_DWG; - - for (String version : new String[] {"2004","2007","2010"}) - { - String filename = "quick" + version + ".dwg"; - URL url = AbstractContentTransformerTest.class.getClassLoader().getResource("quick/" + filename); - File file = new File(url.getFile()); - - Map properties = extractFromFile(file, mimetype); - - // check we got something - assertFalse("extractFromMimetype should return at least some properties, none found for " + mimetype, - properties.isEmpty()); - - // check common metadata - testCommonMetadata(mimetype, properties); - // check file-type specific metadata - testFileSpecificMetadata(mimetype, properties); - } - } - - @Override - protected boolean skipAuthorCheck(String mimetype) { return true; } - - /** - * We also provide the creation date - check that - */ - protected void testFileSpecificMetadata(String mimetype, - Map properties) - { - // Check for extra fields - assertEquals( - "Property " + ContentModel.PROP_AUTHOR + " not found for mimetype " + mimetype, - "Nevin Nollop", - DefaultTypeConverter.INSTANCE.convert(String.class, properties.get(ContentModel.PROP_AUTHOR))); - - // Ensure that we can also get things which are standard - // Tika metadata properties, if we so choose to - assertTrue( - "Test Property " + TIKA_LAST_AUTHOR_TEST_PROPERTY + " not found for mimetype " + mimetype, - properties.containsKey(TIKA_LAST_AUTHOR_TEST_PROPERTY) - ); - assertEquals( - "Test Property " + TIKA_LAST_AUTHOR_TEST_PROPERTY + " incorrect for mimetype " + mimetype, - "paolon", - DefaultTypeConverter.INSTANCE.convert(String.class, properties.get(TIKA_LAST_AUTHOR_TEST_PROPERTY))); - } - - /** - * Test 2010 custom properties (ALF-16628) - */ - public void test2010CustomProperties() throws Exception - { - String mimetype = MimetypeMap.MIMETYPE_APP_DWG; - - String filename = "quick2010CustomProps.dwg"; - URL url = AbstractContentTransformerTest.class.getClassLoader().getResource("quick/" + filename); - File file = new File(url.getFile()); - - Map properties = extractFromFile(file, mimetype); - - // check we got something - assertFalse("extractFromMimetype should return at least some properties, none found for " + mimetype, - properties.isEmpty()); - - // check common metadata - testCommonMetadata(mimetype, properties); - - assertEquals("Custom DWG property not found", "valueforcustomprop1", properties.get(TIKA_CUSTOM_TEST_PROPERTY)); - } - -} diff --git a/repository/src/test/java/org/alfresco/repo/content/metadata/HtmlMetadataExtracterTest.java b/repository/src/test/java/org/alfresco/repo/content/metadata/HtmlMetadataExtracterTest.java deleted file mode 100644 index 0047491a37..0000000000 --- a/repository/src/test/java/org/alfresco/repo/content/metadata/HtmlMetadataExtracterTest.java +++ /dev/null @@ -1,117 +0,0 @@ -/* - * #%L - * Alfresco Repository - * %% - * Copyright (C) 2005 - 2016 Alfresco Software Limited - * %% - * This file is part of the Alfresco software. - * If the software was purchased under a paid Alfresco license, the terms of - * the paid license agreement will prevail. Otherwise, the software is - * provided under the following open source license terms: - * - * Alfresco is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Alfresco is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with Alfresco. If not, see . - * #L% - */ -/* - * Copyright (C) 2005 - 2020 Jesper Steen Møller - * - * This file is part of Alfresco - * - * Alfresco is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Alfresco is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with Alfresco. If not, see . - */ -package org.alfresco.repo.content.metadata; - -import java.io.File; -import java.io.Serializable; -import java.util.Map; - -import org.alfresco.model.ContentModel; -import org.alfresco.repo.content.MimetypeMap; -import org.alfresco.repo.content.transform.AbstractContentTransformerTest; -import org.alfresco.service.cmr.repository.datatype.DefaultTypeConverter; -import org.alfresco.service.namespace.QName; - -/** - * @deprecated OOTB extractors are being moved to T-Engines. - * - * @author Jesper Steen Møller - */ -@Deprecated -public class HtmlMetadataExtracterTest extends AbstractMetadataExtracterTest -{ - private static final String QUICK_TITLE_JAPANESE = "確認した結果を添付しますので、確認してください"; - private HtmlMetadataExtracter extracter; - - @Override - public void setUp() throws Exception - { - super.setUp(); - extracter = new HtmlMetadataExtracter(); - extracter.setDictionaryService(dictionaryService); - extracter.register(); - } - - /** - * @return Returns the same transformer regardless - it is allowed - */ - protected MetadataExtracter getExtracter() - { - return extracter; - } - - public void testSupports() throws Exception - { - for (String mimetype : HtmlMetadataExtracter.MIMETYPES) - { - boolean supports = extracter.isSupported(mimetype); - assertTrue("Mimetype should be supported: " + mimetype, supports); - } - } - - public void testHtmlExtraction() throws Exception - { - testExtractFromMimetype(MimetypeMap.MIMETYPE_HTML); - } - - public void testHtmlExtractionJapanese() throws Exception - { - String mimetype = MimetypeMap.MIMETYPE_HTML; - - File japaneseHtml = AbstractContentTransformerTest.loadNamedQuickTestFile("quick.japanese.html"); - Map properties = extractFromFile(japaneseHtml, mimetype); - - assertFalse("extractFromMimetype should return at least some properties, none found for " + mimetype, - properties.isEmpty()); - - // Title and description - assertEquals( - "Property " + ContentModel.PROP_TITLE + " not found for mimetype " + mimetype, - QUICK_TITLE_JAPANESE, - DefaultTypeConverter.INSTANCE.convert(String.class, properties.get(ContentModel.PROP_TITLE))); - } - - /** Extractor only does the usual basic three properties */ - public void testFileSpecificMetadata(String mimetype, Map properties) {} -} diff --git a/repository/src/test/java/org/alfresco/repo/content/metadata/JodMetadataExtractorOOoTest.java b/repository/src/test/java/org/alfresco/repo/content/metadata/JodMetadataExtractorOOoTest.java deleted file mode 100644 index 6dd91e2ccc..0000000000 --- a/repository/src/test/java/org/alfresco/repo/content/metadata/JodMetadataExtractorOOoTest.java +++ /dev/null @@ -1,144 +0,0 @@ -/* - * #%L - * Alfresco Repository - * %% - * Copyright (C) 2005 - 2020 Alfresco Software Limited - * %% - * This file is part of the Alfresco software. - * If the software was purchased under a paid Alfresco license, the terms of - * the paid license agreement will prevail. Otherwise, the software is - * provided under the following open source license terms: - * - * Alfresco is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Alfresco is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with Alfresco. If not, see . - * #L% - */ -package org.alfresco.repo.content.metadata; - -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.fail; - -import java.io.File; -import java.io.FileNotFoundException; -import java.io.Serializable; -import java.util.HashMap; -import java.util.HashSet; -import java.util.Map; -import java.util.Set; - -import org.alfresco.repo.content.AbstractJodConverterBasedTest; -import org.alfresco.model.ContentModel; -import org.alfresco.repo.content.MimetypeMap; -import org.alfresco.repo.content.filestore.FileContentReader; -import org.alfresco.repo.content.metadata.OpenOfficeMetadataWorker; -import org.alfresco.repo.content.transform.AbstractContentTransformerTest; -import org.alfresco.service.cmr.repository.ContentReader; -import org.alfresco.service.cmr.repository.datatype.DefaultTypeConverter; -import org.alfresco.service.namespace.QName; -import org.junit.Ignore; -import org.junit.Test; - -/** - * @deprecated OOTB extractors are being moved to T-Engines. - * - * @author Neil McErlean - * @since 3.2 SP1 - */ -@Deprecated -public class JodMetadataExtractorOOoTest extends AbstractJodConverterBasedTest -{ - protected static final String QUICK_TITLE = "The quick brown fox jumps over the lazy dog"; - protected static final String QUICK_DESCRIPTION = "Gym class featuring a brown fox and lazy dog"; - protected static final String QUICK_CREATOR = "Nevin Nollop"; - protected static final String QUICK_CREATOR_EMAIL = "nevin.nollop@alfresco.com"; - protected static final String QUICK_PREVIOUS_AUTHOR = "Derek Hulley"; - - @Test - @Ignore("The test was never run and fails on remote transformer") - public void metadataExtractionUsingJodConverter() throws Exception - { - // If OpenOffice is not available then we will ignore this test (by passing it). - // This is because not all the build servers have OOo installed. - if (!isOpenOfficeAvailable()) - { - System.out.println("Did not run " + this.getClass().getSimpleName() + "thumbnailTransformationsUsingJodConverter" + - " because OOo is not available."); - return; - } - - Map properties = extractFromMimetype(); - assertFalse("extractFromMimetype should return at least some properties, none found", properties.isEmpty()); - String mimetype = MimetypeMap.MIMETYPE_WORD; - - // One of Creator or Author - if (properties.containsKey(ContentModel.PROP_CREATOR)) - { - assertEquals("Property " + ContentModel.PROP_CREATOR - + " not found for mimetype " + mimetype, QUICK_CREATOR, - DefaultTypeConverter.INSTANCE.convert(String.class, - properties.get(ContentModel.PROP_CREATOR))); - } else if (properties.containsKey(ContentModel.PROP_AUTHOR)) - { - assertEquals("Property " + ContentModel.PROP_AUTHOR - + " not found for mimetype " + mimetype, QUICK_CREATOR, - DefaultTypeConverter.INSTANCE.convert(String.class, - properties.get(ContentModel.PROP_AUTHOR))); - } else - { - fail("Expected one Property out of " + ContentModel.PROP_CREATOR - + " and " + ContentModel.PROP_AUTHOR - + " but found neither of them."); - } - - // Title and description - assertEquals("Property " + ContentModel.PROP_TITLE - + " not found for mimetype " + mimetype, QUICK_TITLE, - DefaultTypeConverter.INSTANCE.convert(String.class, properties - .get(ContentModel.PROP_TITLE))); - assertEquals("Property " + ContentModel.PROP_DESCRIPTION - + " not found for mimetype " + mimetype, QUICK_DESCRIPTION, - DefaultTypeConverter.INSTANCE.convert(String.class, properties - .get(ContentModel.PROP_DESCRIPTION))); - } - - protected Map extractFromMimetype() throws Exception - { - Map properties = new HashMap(); - - // attempt to get a source file for each mimetype - File sourceFile = AbstractContentTransformerTest.loadQuickTestFile("doc"); - if (sourceFile == null) - { - throw new FileNotFoundException("No quick.doc file found for test"); - } - - // construct a reader onto the source file - ContentReader sourceReader = new FileContentReader(sourceFile); - sourceReader.setMimetype(MimetypeMap.MIMETYPE_WORD); - - OpenOfficeMetadataWorker worker = (OpenOfficeMetadataWorker) ctx.getBean("extracter.worker.JodConverter"); - - Set supportedTypes = new HashSet(); - supportedTypes.add(MimetypeMap.MIMETYPE_WORD); - JodConverterMetadataExtracter extracter = new JodConverterMetadataExtracter(supportedTypes); - extracter.setMimetypeService(serviceRegistry.getMimetypeService()); - extracter.setDictionaryService(serviceRegistry.getDictionaryService()); - extracter.setWorker(worker); - - extracter.init(); - - extracter.extract(sourceReader, properties); - return properties; - } -} diff --git a/repository/src/test/java/org/alfresco/repo/content/metadata/MP3MetadataExtracterTest.java b/repository/src/test/java/org/alfresco/repo/content/metadata/MP3MetadataExtracterTest.java deleted file mode 100644 index 6b475c1136..0000000000 --- a/repository/src/test/java/org/alfresco/repo/content/metadata/MP3MetadataExtracterTest.java +++ /dev/null @@ -1,132 +0,0 @@ -/* - * #%L - * Alfresco Repository - * %% - * Copyright (C) 2005 - 2020 Alfresco Software Limited - * %% - * This file is part of the Alfresco software. - * If the software was purchased under a paid Alfresco license, the terms of - * the paid license agreement will prevail. Otherwise, the software is - * provided under the following open source license terms: - * - * Alfresco is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Alfresco is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with Alfresco. If not, see . - * #L% - */ -/* - * Copyright (C) 2005 - 2020 Jesper Steen Møller - * - * This file is part of Alfresco - * - * Alfresco is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Alfresco is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with Alfresco. If not, see . - */ -package org.alfresco.repo.content.metadata; - -import java.io.Serializable; -import java.util.Map; - -import org.alfresco.model.ContentModel; -import org.alfresco.repo.content.MimetypeMap; -import org.alfresco.service.cmr.repository.datatype.DefaultTypeConverter; -import org.alfresco.service.namespace.QName; - -/** - * @deprecated OOTB extractors are being moved to T-Engines. - * - * Test for the MP3 metadata extraction from id3 tags. - */ -@Deprecated -public class MP3MetadataExtracterTest extends TikaAudioMetadataExtracterTest -{ - private MP3MetadataExtracter extracter; - - @Override - public void setUp() throws Exception - { - super.setUp(); - extracter = (MP3MetadataExtracter)ctx.getBean("extracter.MP3"); - extracter.register(); - } - - /** - * @return Returns the same transformer regardless - it is allowed - */ - protected MetadataExtracter getExtracter() - { - return extracter; - } - - public void testSupports() throws Exception - { - for (String mimetype : MP3MetadataExtracter.SUPPORTED_MIMETYPES) - { - boolean supports = extracter.isSupported(mimetype); - assertTrue("Mimetype should be supported: " + mimetype, supports); - } - } - - public void testMP3Extraction() throws Exception - { - testExtractFromMimetype(MimetypeMap.MIMETYPE_MP3); - } - @Override - public void testOggExtraction() throws Exception {} - @Override - public void testFlacExtraction() throws Exception {} - @Override - public void testMP4AudioExtraction() throws Exception {} - - /** - * We don't have quite the usual metadata. Tests the descriptions one. - * Other tests in {@link #testFileSpecificMetadata(String, Map)} - */ - protected void testCommonMetadata(String mimetype, Map properties) - { - // Title is as normal - assertEquals( - "Property " + ContentModel.PROP_TITLE + " not found for mimetype " + mimetype, - QUICK_TITLE, - DefaultTypeConverter.INSTANCE.convert(String.class, properties.get(ContentModel.PROP_TITLE))); - // Has Author, not Creator, and is different - assertEquals( - "Property " + ContentModel.PROP_AUTHOR + " not found for mimetype " + mimetype, - "Hauskaz", - DefaultTypeConverter.INSTANCE.convert(String.class, properties.get(ContentModel.PROP_AUTHOR))); - - // Description is a composite - assertContains( - "Property " + ContentModel.PROP_DESCRIPTION + " didn't contain " + QUICK_TITLE + " for mimetype " + mimetype, - QUICK_TITLE, - DefaultTypeConverter.INSTANCE.convert(String.class, properties.get(ContentModel.PROP_DESCRIPTION))); - // Check rest of it later - } - - /** - * Tests for various MP3 specific bits of metadata - */ - public void testFileSpecificMetadata(String mimetype, Map properties) - { - super.testFileSpecificMetadata(mimetype, properties); - } -} diff --git a/repository/src/test/java/org/alfresco/repo/content/metadata/MailMetadataExtracterTest.java b/repository/src/test/java/org/alfresco/repo/content/metadata/MailMetadataExtracterTest.java deleted file mode 100644 index 8f60c8d5e3..0000000000 --- a/repository/src/test/java/org/alfresco/repo/content/metadata/MailMetadataExtracterTest.java +++ /dev/null @@ -1,172 +0,0 @@ -/* - * #%L - * Alfresco Repository - * %% - * Copyright (C) 2005 - 2016 Alfresco Software Limited - * %% - * This file is part of the Alfresco software. - * If the software was purchased under a paid Alfresco license, the terms of - * the paid license agreement will prevail. Otherwise, the software is - * provided under the following open source license terms: - * - * Alfresco is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Alfresco is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with Alfresco. If not, see . - * #L% - */ -/* - * Copyright (C) 2005 - 2020 Jesper Steen Møller - * - * This file is part of Alfresco - * - * Alfresco is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Alfresco is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with Alfresco. If not, see . - */ -package org.alfresco.repo.content.metadata; - -import java.io.File; -import java.io.Serializable; -import java.util.Collection; -import java.util.Map; - -import org.alfresco.model.ContentModel; -import org.alfresco.repo.content.MimetypeMap; -import org.alfresco.repo.content.transform.AbstractContentTransformerTest; -import org.alfresco.service.cmr.repository.datatype.DefaultTypeConverter; -import org.alfresco.service.namespace.QName; - -/** - * @deprecated OOTB extractors are being moved to T-Engines. - * - * @author Derek Hulley - * @since 3.2 - */ -@Deprecated -public class MailMetadataExtracterTest extends AbstractMetadataExtracterTest -{ - private MailMetadataExtracter extracter; - - @Override - public void setUp() throws Exception - { - super.setUp(); - extracter = new MailMetadataExtracter(); - extracter.setDictionaryService(dictionaryService); - extracter.register(); - } - - /** - * @return Returns the same transformer regardless - it is allowed - */ - protected MetadataExtracter getExtracter() - { - return extracter; - } - - public void testSupports() throws Exception - { - for (String mimetype : MailMetadataExtracter.SUPPORTED_MIMETYPES) - { - boolean supports = extracter.isSupported(mimetype); - assertTrue("Mimetype should be supported: " + mimetype, supports); - } - } - - public void testOutlookMsgExtraction() throws Exception - { - // Check we can find the file - File sourceFile = AbstractContentTransformerTest.loadQuickTestFile("msg"); - assertNotNull("quick.msg files should be available from Tests", sourceFile); - - // Now test - testExtractFromMimetype(MimetypeMap.MIMETYPE_OUTLOOK_MSG); - } - - /** - * We have different things to normal, so - * do our own common tests. - */ - protected void testCommonMetadata(String mimetype, Map properties) - { - // Two equivalent ones - assertEquals( - "Property " + ContentModel.PROP_AUTHOR + " not found for mimetype " + mimetype, - "Mark Rogers", - DefaultTypeConverter.INSTANCE.convert(String.class, properties.get(ContentModel.PROP_AUTHOR))); - assertEquals( - "Property " + ContentModel.PROP_ORIGINATOR + " not found for mimetype " + mimetype, - "Mark Rogers", - DefaultTypeConverter.INSTANCE.convert(String.class, properties.get(ContentModel.PROP_ORIGINATOR))); - // One other common bit - assertEquals( - "Property " + ContentModel.PROP_DESCRIPTION + " not found for mimetype " + mimetype, - "This is a quick test", - DefaultTypeConverter.INSTANCE.convert(String.class, properties.get(ContentModel.PROP_DESCRIPTION))); - } - - /** - * Test the outlook specific bits - */ - protected void testFileSpecificMetadata(String mimetype, - Map properties) { - // TODO Sent Date should be a date/time as per the contentModel.xml - assertEquals( - "Property " + ContentModel.PROP_SENTDATE + " not found for mimetype " + mimetype, - "2013-01-18T13:44:20.000Z", - DefaultTypeConverter.INSTANCE.convert(String.class, properties.get(ContentModel.PROP_SENTDATE))); - - // Addressee - assertEquals( - "Property " + ContentModel.PROP_ADDRESSEE + " not found for mimetype " + mimetype, - "mark.rogers@alfresco.com", - DefaultTypeConverter.INSTANCE.convert(String.class, properties.get(ContentModel.PROP_ADDRESSEE))); - - // Addressees - assertTrue( - "Property " + ContentModel.PROP_ADDRESSEES + " not found for mimetype " + mimetype, - properties.get(ContentModel.PROP_ADDRESSEES) != null - ); - - Collection addresses = DefaultTypeConverter.INSTANCE.getCollection(String.class, - properties.get(ContentModel.PROP_ADDRESSEES)); - - assertTrue( - "Property " + ContentModel.PROP_ADDRESSEES + " wrong content for mimetype " + mimetype + ", mark", - addresses.contains("mark.rogers@alfresco.com")); - - assertTrue( - "Property " + ContentModel.PROP_ADDRESSEES + " wrong content for mimetype " + mimetype + ", mrquick", - addresses.contains("mrquick@nowhere.com")); - - // Feature: metadata extractor has normalised internet address ... from "Whizz " - assertTrue( - "Property " + ContentModel.PROP_ADDRESSEES + " wrong content for mimetype " + mimetype + ", Whizz", - addresses.contains("speedy@quick.com")); - - // Subject Line - assertEquals( - "Property " + ContentModel.PROP_SUBJECT + " not found for mimetype " + mimetype, - "This is a quick test", - DefaultTypeConverter.INSTANCE.convert(String.class, properties.get(ContentModel.PROP_SUBJECT))); - } -} - diff --git a/repository/src/test/java/org/alfresco/repo/content/metadata/OfficeMetadataExtracterTest.java b/repository/src/test/java/org/alfresco/repo/content/metadata/OfficeMetadataExtracterTest.java deleted file mode 100644 index 5cf47ab881..0000000000 --- a/repository/src/test/java/org/alfresco/repo/content/metadata/OfficeMetadataExtracterTest.java +++ /dev/null @@ -1,185 +0,0 @@ -/* - * #%L - * Alfresco Repository - * %% - * Copyright (C) 2005 - 2020 Alfresco Software Limited - * %% - * This file is part of the Alfresco software. - * If the software was purchased under a paid Alfresco license, the terms of - * the paid license agreement will prevail. Otherwise, the software is - * provided under the following open source license terms: - * - * Alfresco is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Alfresco is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with Alfresco. If not, see . - * #L% - */ -package org.alfresco.repo.content.metadata; - -import java.io.Serializable; -import java.util.HashMap; -import java.util.HashSet; -import java.util.Map; -import java.util.Set; - -import org.alfresco.model.ContentModel; -import org.alfresco.repo.content.MimetypeMap; -import org.alfresco.service.cmr.repository.datatype.DefaultTypeConverter; -import org.alfresco.service.namespace.QName; - - -/** - * @deprecated OOTB extractors are being moved to T-Engines. - * - * @see OfficeMetadataExtracter - * - * @author Jesper Steen Møller - */ -@Deprecated -public class OfficeMetadataExtracterTest extends AbstractMetadataExtracterTest -{ - private OfficeMetadataExtracter extracter; - - private static final QName WORD_COUNT_TEST_PROPERTY = - QName.createQName("WordCountTest"); - private static final QName LAST_AUTHOR_TEST_PROPERTY = - QName.createQName("LastAuthorTest"); - - @Override - public void setUp() throws Exception - { - super.setUp(); - extracter = new OfficeMetadataExtracter(); - extracter.setDictionaryService(dictionaryService); - extracter.register(); - - // Attach a couple of extra mappings - // These will be tested later - HashMap> newMap = new HashMap>( - extracter.getMapping() - ); - - Set wcSet = new HashSet(); - wcSet.add(WORD_COUNT_TEST_PROPERTY); - newMap.put( OfficeMetadataExtracter.KEY_WORD_COUNT, wcSet ); - - Set laSet = new HashSet(); - laSet.add(LAST_AUTHOR_TEST_PROPERTY); - newMap.put( OfficeMetadataExtracter.KEY_LAST_AUTHOR, laSet ); - - extracter.setMapping(newMap); - } - - /** - * @return Returns the same transformer regardless - it is allowed - */ - protected MetadataExtracter getExtracter() - { - return extracter; - } - - public void testSupports() throws Exception - { - for (String mimetype : OfficeMetadataExtracter.SUPPORTED_MIMETYPES) - { - boolean supports = extracter.isSupported(mimetype); - assertTrue("Mimetype should be supported: " + mimetype, supports); - } - } - - /** - * Test all the supported mimetypes - */ - public void testSupportedMimetypes() throws Exception - { - for (String mimetype : OfficeMetadataExtracter.SUPPORTED_MIMETYPES) - { - testExtractFromMimetype(mimetype); - } - } - - /** - * We support all sorts of extra metadata. Check it all behaves. - */ - public void testFileSpecificMetadata(String mimetype, Map properties) { - // Test the ones with a core alfresco mapping - if(mimetype.equals(MimetypeMap.MIMETYPE_WORD)) { - assertEquals( - "Property " + ContentModel.PROP_CREATED + " not found for mimetype " + mimetype, - "2005-05-26T12:57:00.000Z", - DefaultTypeConverter.INSTANCE.convert(String.class, properties.get(ContentModel.PROP_CREATED))); - assertEquals( - "Property " + ContentModel.PROP_MODIFIED + " not found for mimetype " + mimetype, - "2005-09-20T17:25:00.000Z", - DefaultTypeConverter.INSTANCE.convert(String.class, properties.get(ContentModel.PROP_MODIFIED))); - } else if(mimetype.equals(MimetypeMap.MIMETYPE_EXCEL)) { - assertEquals( - "Property " + ContentModel.PROP_CREATED + " not found for mimetype " + mimetype, - "1996-10-14T23:33:28.000Z", - DefaultTypeConverter.INSTANCE.convert(String.class, properties.get(ContentModel.PROP_CREATED))); - assertEquals( - "Property " + ContentModel.PROP_MODIFIED + " not found for mimetype " + mimetype, - "2005-09-20T18:22:32.000Z", - DefaultTypeConverter.INSTANCE.convert(String.class, properties.get(ContentModel.PROP_MODIFIED))); - } else if(mimetype.equals(MimetypeMap.MIMETYPE_PPT)) { - assertEquals( - "Property " + ContentModel.PROP_CREATED + " not found for mimetype " + mimetype, - "1601-01-01T00:00:00.000Z", // Seriously, that's what the file says! - DefaultTypeConverter.INSTANCE.convert(String.class, properties.get(ContentModel.PROP_CREATED))); - assertEquals( - "Property " + ContentModel.PROP_MODIFIED + " not found for mimetype " + mimetype, - "2005-09-20T18:23:41.000Z", - DefaultTypeConverter.INSTANCE.convert(String.class, properties.get(ContentModel.PROP_MODIFIED))); - } - - // Now check the non-standard ones we added in at test time - assertTrue( - "Test Property " + LAST_AUTHOR_TEST_PROPERTY + " not found for mimetype " + mimetype, - properties.containsKey(LAST_AUTHOR_TEST_PROPERTY) - ); - - if(mimetype.equals(MimetypeMap.MIMETYPE_WORD)) { - assertTrue( - "Test Property " + WORD_COUNT_TEST_PROPERTY + " not found for mimetype " + mimetype, - properties.containsKey(WORD_COUNT_TEST_PROPERTY) - ); - - assertEquals( - "Test Property " + WORD_COUNT_TEST_PROPERTY + " incorrect for mimetype " + mimetype, - "9", - DefaultTypeConverter.INSTANCE.convert(String.class, properties.get(WORD_COUNT_TEST_PROPERTY))); - assertEquals( - "Test Property " + LAST_AUTHOR_TEST_PROPERTY + " incorrect for mimetype " + mimetype, - AbstractMetadataExtracterTest.QUICK_PREVIOUS_AUTHOR, - DefaultTypeConverter.INSTANCE.convert(String.class, properties.get(LAST_AUTHOR_TEST_PROPERTY))); - } else if(mimetype.equals(MimetypeMap.MIMETYPE_EXCEL)) { - assertEquals( - "Test Property " + LAST_AUTHOR_TEST_PROPERTY + " not found for mimetype " + mimetype, - AbstractMetadataExtracterTest.QUICK_PREVIOUS_AUTHOR, - DefaultTypeConverter.INSTANCE.convert(String.class, properties.get(LAST_AUTHOR_TEST_PROPERTY))); - } else if(mimetype.equals(MimetypeMap.MIMETYPE_PPT)) { - assertTrue( - "Test Property " + WORD_COUNT_TEST_PROPERTY + " not found for mimetype " + mimetype, - properties.containsKey(WORD_COUNT_TEST_PROPERTY) - ); - - assertEquals( - "Test Property " + WORD_COUNT_TEST_PROPERTY + " not found for mimetype " + mimetype, - "9", - DefaultTypeConverter.INSTANCE.convert(String.class, properties.get(WORD_COUNT_TEST_PROPERTY))); - assertEquals( - "Test Property " + LAST_AUTHOR_TEST_PROPERTY + " not found for mimetype " + mimetype, - AbstractMetadataExtracterTest.QUICK_PREVIOUS_AUTHOR, - DefaultTypeConverter.INSTANCE.convert(String.class, properties.get(LAST_AUTHOR_TEST_PROPERTY))); - } - } -} diff --git a/repository/src/test/java/org/alfresco/repo/content/metadata/OpenDocumentMetadataExtracterTest.java b/repository/src/test/java/org/alfresco/repo/content/metadata/OpenDocumentMetadataExtracterTest.java deleted file mode 100644 index 6461cc9b38..0000000000 --- a/repository/src/test/java/org/alfresco/repo/content/metadata/OpenDocumentMetadataExtracterTest.java +++ /dev/null @@ -1,124 +0,0 @@ -/* - * #%L - * Alfresco Repository - * %% - * Copyright (C) 2005 - 2020 Alfresco Software Limited - * %% - * This file is part of the Alfresco software. - * If the software was purchased under a paid Alfresco license, the terms of - * the paid license agreement will prevail. Otherwise, the software is - * provided under the following open source license terms: - * - * Alfresco is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Alfresco is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with Alfresco. If not, see . - * #L% - */ -package org.alfresco.repo.content.metadata; - -import java.io.Serializable; -import java.text.DateFormat; -import java.text.ParseException; -import java.text.SimpleDateFormat; -import java.util.Date; -import java.util.Map; - -import org.alfresco.model.ContentModel; -import org.alfresco.service.cmr.repository.datatype.DefaultTypeConverter; -import org.alfresco.service.namespace.QName; - - -/** - * @deprecated OOTB extractors are being moved to T-Engines. - * - * @see OpenDocumentMetadataExtracter - * - * @author Derek Hulley - */ -@Deprecated -public class OpenDocumentMetadataExtracterTest extends AbstractMetadataExtracterTest -{ - private OpenDocumentMetadataExtracter extracter; - - private static final DateFormat DATE_FORMAT = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSSZ"); - - @Override - public void setUp() throws Exception - { - super.setUp(); - extracter = new OpenDocumentMetadataExtracter(); - extracter.setDictionaryService(dictionaryService); - extracter.register(); - } - - /** - * @return Returns the same transformer regardless - it is allowed - */ - protected MetadataExtracter getExtracter() - { - return extracter; - } - - public void testSupports() throws Exception - { - for (String mimetype : OpenDocumentMetadataExtracter.SUPPORTED_MIMETYPES) - { - boolean supports = extracter.isSupported(mimetype); - assertTrue("Mimetype should be supported: " + mimetype, supports); - } - } - - /** - * Test all the supported mimetypes - */ - public void testSupportedMimetypes() throws Exception - { - for (String mimetype : OpenDocumentMetadataExtracter.SUPPORTED_MIMETYPES) - { - testExtractFromMimetype(mimetype); - } - } - - @Override - protected boolean skipAuthorCheck(String mimetype) { return true; } - - /** - * We also provide the creation date - check that - */ - protected void testFileSpecificMetadata(String mimetype, - Map properties) - { - try - { - // Check for two cases - if(mimetype.equals("application/vnd.oasis.opendocument.text")) - { - assertEquals( - "Property " + ContentModel.PROP_CREATED + " not found for mimetype " + mimetype, - DATE_FORMAT.parse("2005-09-06T23:34:00.000+0000"), - DefaultTypeConverter.INSTANCE.convert(Date.class, properties.get(ContentModel.PROP_CREATED))); - } - else if(mimetype.equals("application/vnd.oasis.opendocument.graphics")) - { - assertEquals( - "Property " + ContentModel.PROP_CREATED + " not found for mimetype " + mimetype, - DATE_FORMAT.parse("2006-01-27T11:46:11.000+0000"), - DefaultTypeConverter.INSTANCE.convert(Date.class, properties.get(ContentModel.PROP_CREATED))); - } - } - catch (ParseException e) - { - fail(e.getMessage()); - } - } - -} diff --git a/repository/src/test/java/org/alfresco/repo/content/metadata/PdfBoxMetadataExtracterTest.java b/repository/src/test/java/org/alfresco/repo/content/metadata/PdfBoxMetadataExtracterTest.java deleted file mode 100644 index 89865156ab..0000000000 --- a/repository/src/test/java/org/alfresco/repo/content/metadata/PdfBoxMetadataExtracterTest.java +++ /dev/null @@ -1,141 +0,0 @@ -/* - * #%L - * Alfresco Repository - * %% - * Copyright (C) 2005 - 2020 Alfresco Software Limited - * %% - * This file is part of the Alfresco software. - * If the software was purchased under a paid Alfresco license, the terms of - * the paid license agreement will prevail. Otherwise, the software is - * provided under the following open source license terms: - * - * Alfresco is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Alfresco is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with Alfresco. If not, see . - * #L% - */ -package org.alfresco.repo.content.metadata; - -import java.io.File; -import java.io.FileNotFoundException; -import java.io.Serializable; -import java.util.Calendar; -import java.util.HashMap; -import java.util.Map; - -import org.alfresco.model.ContentModel; -import org.alfresco.repo.content.MimetypeMap; -import org.alfresco.repo.content.transform.AbstractContentTransformerTest; -import org.alfresco.service.cmr.repository.datatype.DefaultTypeConverter; -import org.alfresco.service.namespace.QName; -import org.apache.pdfbox.util.DateConverter; - -/** - * @deprecated OOTB extractors are being moved to T-Engines. - * - * @see org.alfresco.repo.content.metadata.PdfBoxMetadataExtracter - * - * @author Jesper Steen Møller - */ -@Deprecated -public class PdfBoxMetadataExtracterTest extends AbstractMetadataExtracterTest -{ - private PdfBoxMetadataExtracter extracter; - - private static final int MAX_CONCURENT_EXTRACTIONS = 5; - private static final double MAX_DOC_SIZE_MB = 0.03; - - @Override - public void setUp() throws Exception - { - super.setUp(); - extracter = new PdfBoxMetadataExtracter(); - extracter.setDictionaryService(dictionaryService); - - MetadataExtracterLimits pdfLimit = new MetadataExtracterLimits(); - pdfLimit.setMaxConcurrentExtractionsCount(MAX_CONCURENT_EXTRACTIONS); - pdfLimit.setMaxDocumentSizeMB(MAX_DOC_SIZE_MB); - Map limits = new HashMap<>(); - limits.put(MimetypeMap.MIMETYPE_PDF,pdfLimit); - - extracter.setMimetypeLimits(limits); - extracter.register(); - } - - /** - * @return Returns the same transformer regardless - it is allowed - */ - protected MetadataExtracter getExtracter() - { - return extracter; - } - - public void testSupports() throws Exception - { - for (String mimetype : PdfBoxMetadataExtracter.SUPPORTED_MIMETYPES) - { - boolean supports = extracter.isSupported(mimetype); - assertTrue("Mimetype should be supported: " + mimetype, supports); - } - } - - public void testPdfExtraction() throws Exception - { - testExtractFromMimetype(MimetypeMap.MIMETYPE_PDF); - } - - /** - * This test method extracts metadata from an Adobe Illustrator file (which in recent versions is a pdf file). - * @since 3.5.0 - */ - public void testAiExtraction() throws Exception - { - testExtractFromMimetype(MimetypeMap.MIMETYPE_APPLICATION_ILLUSTRATOR); - } - - /** - * We can also return a created date - */ - protected void testFileSpecificMetadata(String mimetype, - Map properties) { - assertEquals( - "Property " + ContentModel.PROP_CREATED + " not found for mimetype " + mimetype, - "2005-05-26T19:52:58.000Z", - DefaultTypeConverter.INSTANCE.convert(String.class, properties.get(ContentModel.PROP_CREATED))); - } - - /** - * Test that will show when the workaround is in place. - */ - public void testDateConversion() throws Exception { - Calendar c = DateConverter.toCalendar("D:20050526205258+01'00'"); - assertEquals(2005, c.get(Calendar.YEAR)); - assertEquals(05-1, c.get(Calendar.MONTH)); - assertEquals(26, c.get(Calendar.DAY_OF_MONTH)); - assertEquals(20, c.get(Calendar.HOUR_OF_DAY)); - assertEquals(52, c.get(Calendar.MINUTE)); - assertEquals(58, c.get(Calendar.SECOND)); - //assertEquals(0, c.get(Calendar.MILLISECOND)); - } - - public void testMaxDocumentSizeLimit() throws Exception - { - File sourceFile = AbstractContentTransformerTest.loadNamedQuickTestFile("quick-size-limit.pdf"); - - if (sourceFile == null) - { - throw new FileNotFoundException("No quick-size-limit.pdf file found for test"); - } - Map properties = extractFromFile(sourceFile, MimetypeMap.MIMETYPE_PDF); - assertTrue(properties.isEmpty()); - } -} diff --git a/repository/src/test/java/org/alfresco/repo/content/metadata/PoiMetadataExtracterTest.java b/repository/src/test/java/org/alfresco/repo/content/metadata/PoiMetadataExtracterTest.java deleted file mode 100644 index 7d83993a1a..0000000000 --- a/repository/src/test/java/org/alfresco/repo/content/metadata/PoiMetadataExtracterTest.java +++ /dev/null @@ -1,214 +0,0 @@ -/* - * #%L - * Alfresco Repository - * %% - * Copyright (C) 2005 - 2020 Alfresco Software Limited - * %% - * This file is part of the Alfresco software. - * If the software was purchased under a paid Alfresco license, the terms of - * the paid license agreement will prevail. Otherwise, the software is - * provided under the following open source license terms: - * - * Alfresco is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Alfresco is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with Alfresco. If not, see . - * #L% - */ -package org.alfresco.repo.content.metadata; - -import java.io.File; -import java.io.Serializable; -import java.util.HashMap; -import java.util.Map; - -import org.alfresco.model.ContentModel; -import org.alfresco.repo.content.MimetypeMap; -import org.alfresco.repo.content.filestore.FileContentReader; -import org.alfresco.repo.content.transform.AbstractContentTransformerTest; -import org.alfresco.service.cmr.repository.ContentReader; -import org.alfresco.service.cmr.repository.datatype.DefaultTypeConverter; -import org.alfresco.service.namespace.QName; - -/** - * @deprecated OOTB extractors are being moved to T-Engines. - * - * @see org.alfresco.repo.content.metadata.PoiMetadataExtracter - * - * @author Neil McErlean - * @author Dmitry Velichkevich - */ -@Deprecated -public class PoiMetadataExtracterTest extends AbstractMetadataExtracterTest -{ - private static final int MINIMAL_EXPECTED_PROPERTIES_AMOUNT = 3; - - private static final String ALL_MIMETYPES_FILTER = "*"; - - private static final String PROBLEM_FOOTNOTES_DOCUMENT_NAME = "problemFootnotes2.docx"; - - private PoiMetadataExtracter extracter; - - private Long extractionTimeWithDefaultFootnotesLimit; - private Long extractionTimeWithLargeFootnotesLimit; - - @Override - public void setUp() throws Exception - { - super.setUp(); - extracter = new PoiMetadataExtracter(); - extracter.setDictionaryService(dictionaryService); - extracter.register(); - } - - @Override - protected void tearDown() throws Exception - { - super.tearDown(); - } - - @Override - protected MetadataExtracter getExtracter() - { - return extracter; - } - - public void testSupports() throws Exception - { - for (String mimetype : PoiMetadataExtracter.SUPPORTED_MIMETYPES) - { - boolean supports = extracter.isSupported(mimetype); - assertTrue("Mimetype should be supported: " + mimetype, supports); - } - } - - public void testOffice2007Extraction() throws Exception - { - for (String mimetype : PoiMetadataExtracter.SUPPORTED_MIMETYPES) - { - testExtractFromMimetype(mimetype); - } - } - - @Override - protected boolean skipDescriptionCheck(String mimetype) - { - // Our 3 OpenOffice 07 quick files have no description properties. - return true; - } - - - @Override - protected void testFileSpecificMetadata(String mimetype, - Map properties) - { - // This test class is testing 3 files: quick.docx, quick.xlsx & quick.pptx. - // Their created times are hard-coded here for checking. - // Of course this means that if the files are updated, the test will break - // but those files are rarely modified - only added to. - if (MimetypeMap.MIMETYPE_OPENXML_WORDPROCESSING.equals(mimetype)) - { - checkFileCreationDate(mimetype, properties, "2010-01-06T17:32:00.000Z"); - } - else if (MimetypeMap.MIMETYPE_OPENXML_SPREADSHEET.equals(mimetype)) - { - checkFileCreationDate(mimetype, properties, "1996-10-14T23:33:28.000Z"); - } - else if (MimetypeMap.MIMETYPE_OPENXML_PRESENTATION.equals(mimetype)) - { - // Extraordinary! This document predates Isaac Newton's Principia Mathematica by almost a century. ;) - checkFileCreationDate(mimetype, properties, "1601-01-01T00:00:00.000Z"); - } - } - - private void checkFileCreationDate(String mimetype, Map properties, String date) - { - assertEquals("Property " + ContentModel.PROP_CREATED + " not found for mimetype " + mimetype, date, - DefaultTypeConverter.INSTANCE.convert(String.class, properties.get(ContentModel.PROP_CREATED))); - } - - /** - * Tests that metadata extraction from a somewhat corrupt file with several - * thousand footnotes times out properly. - * - * @throws Exception - */ - public void testProblemFootnotes() throws Exception - { - long timeoutMs = 2000; - - MetadataExtracterLimits limits = new MetadataExtracterLimits(); - limits.setTimeoutMs(timeoutMs); - HashMap mimetypeLimits = - new HashMap(1); - mimetypeLimits.put(ALL_MIMETYPES_FILTER, limits); - ((PoiMetadataExtracter) getExtracter()).setMimetypeLimits(mimetypeLimits); - - File sourceFile = AbstractContentTransformerTest.loadNamedQuickTestFile("problemFootnotes.docx"); - - Map properties = new HashMap(); - // construct a reader onto the source file - ContentReader sourceReader = new FileContentReader(sourceFile); - sourceReader.setMimetype(MimetypeMap.MIMETYPE_OPENXML_WORDPROCESSING); - - long startTime = System.currentTimeMillis(); - - getExtracter().extract(sourceReader, properties); - - long extractionTime = System.currentTimeMillis() - startTime; - - assertTrue("Metadata extraction took (" + extractionTime + "ms) " + - "but should have failed with a timeout at " + timeoutMs + "ms", - extractionTime < (timeoutMs + 100)); // bit of wiggle room for logging, cleanup, etc. - assertFalse("Reader was not closed", sourceReader.isChannelOpen()); - } - - /** - * Test for MNT-577: Alfresco is running 100% CPU for over 10 minutes while extracting metadata for Word office document - * - * @throws Exception - */ - public void testFootnotesLimitParameterUsingDefault() throws Exception - { - PoiMetadataExtracter extractor = (PoiMetadataExtracter) getExtracter(); - - File sourceFile = AbstractContentTransformerTest.loadNamedQuickTestFile(PROBLEM_FOOTNOTES_DOCUMENT_NAME); - ContentReader sourceReader = new FileContentReader(sourceFile); - sourceReader.setMimetype(MimetypeMap.MIMETYPE_OPENXML_WORDPROCESSING); - - Map properties = new HashMap(); - long startTime = System.currentTimeMillis(); - extractor.extract(sourceReader, properties); - extractionTimeWithDefaultFootnotesLimit = System.currentTimeMillis() - startTime; - - assertExtractedProperties(properties); - if (extractionTimeWithLargeFootnotesLimit != null) - { - assertTrue("The second metadata extraction operation must be longer!", extractionTimeWithLargeFootnotesLimit > extractionTimeWithDefaultFootnotesLimit); - } - } - - /** - * Asserts extracted properties. At least {@link PoiMetadataExtracterTest#MINIMAL_EXPECTED_PROPERTIES_AMOUNT} properties are expected: - * {@link ContentModel#PROP_TITLE}, {@link ContentModel#PROP_AUTHOR} and {@link ContentModel#PROP_CREATED} - * - * @param properties - {@link Map}<{@link QName}, {@link Serializable}> instance which contains all extracted properties - */ - private void assertExtractedProperties(Map properties) - { - assertNotNull("Properties were not extracted at all!", properties); - assertFalse("Extracted properties are empty!", properties.isEmpty()); - assertTrue(("Expected 3 extracted properties but only " + properties.size() + " have been extracted!"), properties.size() >= MINIMAL_EXPECTED_PROPERTIES_AMOUNT); - assertTrue(("'" + ContentModel.PROP_TITLE + "' property is missing!"), properties.containsKey(ContentModel.PROP_TITLE)); - assertTrue(("'" + ContentModel.PROP_AUTHOR + "' property is missing!"), properties.containsKey(ContentModel.PROP_AUTHOR)); - assertTrue(("'" + ContentModel.PROP_CREATED + "' property is missing!"), properties.containsKey(ContentModel.PROP_CREATED)); - } -} diff --git a/repository/src/test/java/org/alfresco/repo/content/metadata/RFC822MetadataExtracterTest.java b/repository/src/test/java/org/alfresco/repo/content/metadata/RFC822MetadataExtracterTest.java deleted file mode 100644 index e129dac642..0000000000 --- a/repository/src/test/java/org/alfresco/repo/content/metadata/RFC822MetadataExtracterTest.java +++ /dev/null @@ -1,272 +0,0 @@ -/* - * #%L - * Alfresco Repository - * %% - * Copyright (C) 2005 - 2020 Alfresco Software Limited - * %% - * This file is part of the Alfresco software. - * If the software was purchased under a paid Alfresco license, the terms of - * the paid license agreement will prevail. Otherwise, the software is - * provided under the following open source license terms: - * - * Alfresco is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Alfresco is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with Alfresco. If not, see . - * #L% - */ -package org.alfresco.repo.content.metadata; - -import static org.junit.Assert.assertEquals; - -import java.io.File; -import java.io.Serializable; -import java.util.Date; -import java.util.HashMap; -import java.util.HashSet; -import java.util.Map; -import java.util.Set; - -import org.alfresco.model.ContentModel; -import org.alfresco.repo.content.MimetypeMap; -import org.alfresco.repo.content.transform.AbstractContentTransformerTest; -import org.alfresco.service.cmr.repository.datatype.DefaultTypeConverter; -import org.alfresco.service.namespace.QName; -import org.joda.time.DateTime; -import org.joda.time.format.DateTimeFormat; -import org.joda.time.format.DateTimeFormatter; - -/** - * @deprecated OOTB extractors are being moved to T-Engines. - * - * Test for the RFC822 (imap/mbox) extractor - */ -@Deprecated -public class RFC822MetadataExtracterTest extends AbstractMetadataExtracterTest -{ - private RFC822MetadataExtracter extracter; - - private static final QName MESSAGE_FROM_TEST_PROPERTY = - QName.createQName("MessageToTest"); - private static final QName MESSAGE_TO_TEST_PROPERTY = - QName.createQName("MessageFromTest"); - private static final QName MESSAGE_CC_TEST_PROPERTY = - QName.createQName("MessageCCTest"); - - @Override - public void setUp() throws Exception - { - super.setUp(); - - // Ask Spring for the extractor, so it - // gets its date formats populated - extracter = (RFC822MetadataExtracter)ctx.getBean("extracter.RFC822"); - - // Attach a couple of extra mappings - // These will be tested later - HashMap> newMap = new HashMap>( - extracter.getMapping() - ); - - Set fromSet = new HashSet(); - fromSet.add(MESSAGE_FROM_TEST_PROPERTY); - fromSet.addAll( extracter.getCurrentMapping().get(RFC822MetadataExtracter.KEY_MESSAGE_FROM) ); - newMap.put( RFC822MetadataExtracter.KEY_MESSAGE_FROM, fromSet ); - - Set toSet = new HashSet(); - toSet.add(MESSAGE_TO_TEST_PROPERTY); - toSet.addAll( extracter.getCurrentMapping().get(RFC822MetadataExtracter.KEY_MESSAGE_TO) ); - newMap.put( RFC822MetadataExtracter.KEY_MESSAGE_TO, toSet ); - - Set ccSet = new HashSet(); - ccSet.add(MESSAGE_CC_TEST_PROPERTY); - ccSet.addAll( extracter.getCurrentMapping().get(RFC822MetadataExtracter.KEY_MESSAGE_CC) ); - newMap.put( RFC822MetadataExtracter.KEY_MESSAGE_CC, ccSet ); - - extracter.setMapping(newMap); - } - - /** - * @return Returns the same transformer regardless - it is allowed - */ - protected MetadataExtracter getExtracter() - { - return extracter; - } - - // RFC822 has a non-standard date format. 1. EEE, d MMM yyyy HH:mm:ss Z - public void testHasDateFormats1() throws Exception - { - assertEquals("16 Aug 2012 15:13:29 GMT", extracter.makeDate("Thu, 16 Aug 2012 08:13:29 -0700").toGMTString()); - } - - // RFC822 has a non-standard date format. 2. EEE, d MMM yy HH:mm:ss Z - public void testHasDateFormats2() throws Exception - { - assertEquals("16 Aug 2012 15:13:29 GMT", extracter.makeDate("Thu, 16 Aug 12 08:13:29 -0700").toGMTString()); - } - - // RFC822 has a non-standard date format. 3. d MMM yyyy HH:mm:ss Z - public void testHasDateFormats3() throws Exception - { - assertEquals("16 Aug 2012 15:13:29 GMT", extracter.makeDate("16 Aug 2012 08:13:29 -0700").toGMTString()); - } - - // Check time zone names are ignored - these are not handled by org.joda.time.format.DateTimeFormat - public void testHasDateFormatsZoneName() throws Exception - { - assertEquals("16 Aug 2012 15:13:29 GMT", extracter.makeDate("Thu, 16 Aug 2012 08:13:29 -0700 (PDT)").toGMTString()); - } - - public void testJodaFormats() - { - String[][] testData = new String[][] - { - { "a1", "EEE, d MMM yyyy HH:mm:ss Z", "Thu, 16 Aug 12 08:13:29 -0700", "Thu Aug 18 15:13:29 GMT 12", "0"}, // gets the year wrong - { "a2a", "EEE, d MMM yy HH:mm:ss Z", "Thu, 16 Aug 12 08:13:29 -0700", "Thu Aug 16 15:13:29 GMT 2012", "20"}, - { "a2b", "EEE, d MMM yy HH:mm:ss Z", "Wed, 16 Aug 50 08:13:29 -0700", "Wed Aug 16 15:13:29 GMT 1950", "19"}, - { "a2c", "EEE, d MMM yy HH:mm:ss Z", "Sun, 16 Aug 20 08:13:29 -0700", "Sun Aug 16 15:13:29 GMT 2020", "20"}, - { "a3", "d MMM yyyy HH:mm:ss Z", "Thu, 16 Aug 12 08:13:29 -0700", null, null}, - - { "b1", "EEE, d MMM yyyy HH:mm:ss Z", "Thu, 16 Aug 2012 08:13:29 -0700", "Thu Aug 16 15:13:29 GMT 2012", "20"}, - { "b2a", "EEE, d MMM yy HH:mm:ss Z", "Thu, 16 Aug 2012 08:13:29 -0700", "Thu Aug 16 15:13:29 GMT 2012", "20"}, - { "b2b", "EEE, d MMM yy HH:mm:ss Z", "Wed, 16 Aug 1950 08:13:29 -0700", "Wed Aug 16 15:13:29 GMT 1950", "19"}, - { "b2c", "EEE, d MMM yy HH:mm:ss Z", "Sun, 16 Aug 2020 08:13:29 -0700", "Sun Aug 16 15:13:29 GMT 2020", "20"}, - { "b3", "d MMM yyyy HH:mm:ss Z", "Thu, 16 Aug 2012 08:13:29 -0700", null, "20"}, - - { "c1", "EEE, d MMM yyyy HH:mm:ss Z", "16 Aug 2012 08:13:29 -0700", null, null}, - { "c2", "EEE, d MMM yy HH:mm:ss Z", "16 Aug 2012 08:13:29 -0700", null, null}, - { "c3a", "d MMM yyyy HH:mm:ss Z", "16 Aug 2012 08:13:29 -0700", "Thu Aug 16 15:13:29 GMT 2012", "20"}, - { "c3b", "d MMM yyyy HH:mm:ss Z", "16 Aug 1950 08:13:29 -0700", "Wed Aug 16 15:13:29 GMT 1950", "19"}, - { "c3c", "d MMM yyyy HH:mm:ss Z", "16 Aug 2020 08:13:29 -0700", "Sun Aug 16 15:13:29 GMT 2020", "20"}, - }; - - for (String[] data: testData) - { - String format = data[1]; - String dateStr = data[2]; - String context = data[0]+") \""+format+"\", \""+dateStr+"\""; - String expected = data[3]; - int centuryOfEra = data[4] == null ? -1 : new Integer(data[4]); - - // Need to set pivot year so it still works in 20 years time :) - DateTimeFormatter dateTimeFormater = DateTimeFormat.forPattern(format).withPivotYear(2000); - DateTime dateTime = null; - try - { - dateTime = dateTimeFormater.parseDateTime(dateStr); - } - catch (IllegalArgumentException e) - { - } - - String actual = dateTime == null ? null : dateTime.toDate().toString(); - assertEquals(context, expected, actual); - - if (dateTime != null) - { - assertEquals(context, centuryOfEra, dateTime.getCenturyOfEra()); - } - } - } - - public void testSupports() throws Exception - { - for (String mimetype : RFC822MetadataExtracter.SUPPORTED_MIMETYPES) - { - boolean supports = extracter.isSupported(mimetype); - assertTrue("Mimetype should be supported: " + mimetype, supports); - } - } - - public void testEmailExtraction() throws Exception - { - testExtractFromMimetype(MimetypeMap.MIMETYPE_RFC822); - } - - public void testSpanishEmailExtraction() throws Exception - { - File spanishEml = AbstractContentTransformerTest.loadNamedQuickTestFile("quick.spanish.eml"); - Map properties = extractFromFile(spanishEml, MimetypeMap.MIMETYPE_RFC822); - testCommonMetadata(MimetypeMap.MIMETYPE_RFC822, properties); - } - - /** - * We have no author, and have the same title and description - */ - protected void testCommonMetadata(String mimetype, - Map properties) { - assertEquals( - "Property " + ContentModel.PROP_TITLE + " not found for mimetype " + mimetype, - QUICK_TITLE, - DefaultTypeConverter.INSTANCE.convert(String.class, properties.get(ContentModel.PROP_TITLE))); - assertEquals( - "Property " + ContentModel.PROP_DESCRIPTION + " not found for mimetype " + mimetype, - QUICK_TITLE, - DefaultTypeConverter.INSTANCE.convert(String.class, properties.get(ContentModel.PROP_DESCRIPTION))); - } - - /** - * Test our extra IMAP properties - */ - public void testFileSpecificMetadata(String mimetype, Map properties) { - // Check the other cm: ones - assertEquals( - "Property " + ContentModel.PROP_ORIGINATOR + " not found for mimetype " + mimetype, - QUICK_CREATOR + " <" + QUICK_CREATOR_EMAIL + ">", - DefaultTypeConverter.INSTANCE.convert(String.class, properties.get(ContentModel.PROP_ORIGINATOR))); - // assertEquals( - // "Property " + ContentModel.PROP_SENTDATE + " not found for mimetype " + mimetype, - // "2004-06-04T13:23:22.000+01:00", - // DefaultTypeConverter.INSTANCE.convert(String.class, properties.get(ContentModel.PROP_SENTDATE))); - - // Check some imap: ones - assertEquals( - "Test Property " + MESSAGE_FROM_TEST_PROPERTY + " incorrect for mimetype " + mimetype, - "Nevin Nollop ", - DefaultTypeConverter.INSTANCE.convert(String.class, properties.get(MESSAGE_FROM_TEST_PROPERTY))); - assertEquals( - "Test Property " + MESSAGE_FROM_TEST_PROPERTY + " incorrect for mimetype " + mimetype, - "Nevin Nollop ", - DefaultTypeConverter.INSTANCE.convert(String.class, properties.get(MESSAGE_FROM_TEST_PROPERTY))); - assertEquals( - "Test Property " + MESSAGE_TO_TEST_PROPERTY + " incorrect for mimetype " + mimetype, - "Nevin Nollop ", - DefaultTypeConverter.INSTANCE.convert(String.class, properties.get(MESSAGE_TO_TEST_PROPERTY))); - - // Finally check our non-standard ones we added in at test time - assertTrue( - "Test Property " + MESSAGE_FROM_TEST_PROPERTY + " not found for mimetype " + mimetype, - properties.containsKey(MESSAGE_FROM_TEST_PROPERTY) - ); - assertTrue( - "Test Property " + MESSAGE_TO_TEST_PROPERTY + " not found for mimetype " + mimetype, - properties.containsKey(MESSAGE_TO_TEST_PROPERTY) - ); - assertTrue( - "Test Property " + MESSAGE_CC_TEST_PROPERTY + " not found for mimetype " + mimetype, - properties.containsKey(MESSAGE_CC_TEST_PROPERTY) - ); - - assertEquals( - "Test Property " + MESSAGE_FROM_TEST_PROPERTY + " incorrect for mimetype " + mimetype, - "Nevin Nollop ", - DefaultTypeConverter.INSTANCE.convert(String.class, properties.get(MESSAGE_FROM_TEST_PROPERTY))); - assertEquals( - "Test Property " + MESSAGE_TO_TEST_PROPERTY + " incorrect for mimetype " + mimetype, - "Nevin Nollop ", - DefaultTypeConverter.INSTANCE.convert(String.class, properties.get(MESSAGE_TO_TEST_PROPERTY))); - assertEquals( - "Test Property " + MESSAGE_CC_TEST_PROPERTY + " incorrect for mimetype " + mimetype, - "Nevin Nollop ", - DefaultTypeConverter.INSTANCE.convert(String.class, properties.get(MESSAGE_CC_TEST_PROPERTY))); - } -} diff --git a/repository/src/test/java/org/alfresco/repo/content/metadata/TikaAudioMetadataExtracterTest.java b/repository/src/test/java/org/alfresco/repo/content/metadata/TikaAudioMetadataExtracterTest.java deleted file mode 100644 index ef0415b0c5..0000000000 --- a/repository/src/test/java/org/alfresco/repo/content/metadata/TikaAudioMetadataExtracterTest.java +++ /dev/null @@ -1,172 +0,0 @@ -/* - * #%L - * Alfresco Repository - * %% - * Copyright (C) 2005 - 2020 Alfresco Software Limited - * %% - * This file is part of the Alfresco software. - * If the software was purchased under a paid Alfresco license, the terms of - * the paid license agreement will prevail. Otherwise, the software is - * provided under the following open source license terms: - * - * Alfresco is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Alfresco is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with Alfresco. If not, see . - * #L% - */ -/* - * Copyright (C) 2005 - 2020 Jesper Steen Møller - * - * This file is part of Alfresco - * - * Alfresco is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Alfresco is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with Alfresco. If not, see . - */ -package org.alfresco.repo.content.metadata; - -import java.io.Serializable; -import java.util.Map; - -import org.alfresco.model.ContentModel; -import org.alfresco.repo.content.MimetypeMap; -import org.alfresco.service.cmr.repository.datatype.DefaultTypeConverter; -import org.alfresco.service.namespace.NamespaceService; -import org.alfresco.service.namespace.QName; - -/** - * @deprecated OOTB extractors are being moved to T-Engines. - * - * Test for the audio metadata extraction. - */ -@Deprecated -public class TikaAudioMetadataExtracterTest extends AbstractMetadataExtracterTest -{ - private TikaAudioMetadataExtracter extracter; - private static final String ARTIST = "Hauskaz"; - private static final String ALBUM = "About a dog and a fox"; - private static final String GENRE = "Foxtrot"; - - @Override - public void setUp() throws Exception - { - super.setUp(); - extracter = (TikaAudioMetadataExtracter)ctx.getBean("extracter.Audio"); - extracter.register(); - } - - /** - * @return Returns the same transformer regardless - it is allowed - */ - protected MetadataExtracter getExtracter() - { - return extracter; - } - - public void testSupports() throws Exception - { - for (String mimetype : TikaAudioMetadataExtracter.SUPPORTED_MIMETYPES) - { - boolean supports = extracter.isSupported(mimetype); - assertTrue("Mimetype should be supported: " + mimetype, supports); - } - } - - public void testOggExtraction() throws Exception - { - testExtractFromMimetype(MimetypeMap.MIMETYPE_VORBIS); - } - public void testFlacExtraction() throws Exception - { - testExtractFromMimetype(MimetypeMap.MIMETYPE_FLAC); - } - public void testMP4AudioExtraction() throws Exception - { - testExtractFromMimetype(MimetypeMap.MIMETYPE_AUDIO_MP4); - } - - /** - * We don't have quite the usual metadata. Tests the descriptions one. - * Other tests in {@link #testFileSpecificMetadata(String, Map)} - */ - protected void testCommonMetadata(String mimetype, Map properties) - { - // Title is as normal - assertEquals( - "Property " + ContentModel.PROP_TITLE + " not found for mimetype " + mimetype, - QUICK_TITLE, - DefaultTypeConverter.INSTANCE.convert(String.class, properties.get(ContentModel.PROP_TITLE))); - // Has Author, not Creator, and is different - assertEquals( - "Property " + ContentModel.PROP_AUTHOR + " not found for mimetype " + mimetype, - "Hauskaz", - DefaultTypeConverter.INSTANCE.convert(String.class, properties.get(ContentModel.PROP_AUTHOR))); - - // Description is a composite - assertContains( - "Property " + ContentModel.PROP_DESCRIPTION + " didn't contain " + QUICK_TITLE + " for mimetype " + mimetype, - QUICK_TITLE, - DefaultTypeConverter.INSTANCE.convert(String.class, properties.get(ContentModel.PROP_DESCRIPTION))); - // Check rest of it later - } - - /** - * Tests for various Audio specific bits of metadata - */ - public void testFileSpecificMetadata(String mimetype, Map properties) { - QName album = QName.createQName(NamespaceService.AUDIO_MODEL_1_0_URI, "album"); - assertEquals( - "Property " + album + " not found for mimetype " + mimetype, - ALBUM, - DefaultTypeConverter.INSTANCE.convert(String.class, properties.get(album))); - - QName artist = QName.createQName(NamespaceService.AUDIO_MODEL_1_0_URI, "artist"); - assertEquals( - "Property " + artist + " not found for mimetype " + mimetype, - ARTIST, - DefaultTypeConverter.INSTANCE.convert(String.class, properties.get(artist))); - - QName genre = QName.createQName(NamespaceService.AUDIO_MODEL_1_0_URI, "genre"); - assertEquals( - "Property " + genre + " not found for mimetype " + mimetype, - GENRE, - DefaultTypeConverter.INSTANCE.convert(String.class, properties.get(genre))); - - QName releaseDate = QName.createQName(NamespaceService.AUDIO_MODEL_1_0_URI, "releaseDate"); - assertEquals( - "Property " + releaseDate + " not found for mimetype " + mimetype, - "2009-01-01T00:00:00.000Z", - DefaultTypeConverter.INSTANCE.convert(String.class, properties.get(releaseDate))); - - QName channels = QName.createQName(NamespaceService.AUDIO_MODEL_1_0_URI, "channelType"); - assertEquals( - "Property " + channels + " not found for mimetype " + mimetype, - "Stereo", - DefaultTypeConverter.INSTANCE.convert(String.class, properties.get(channels))); - - - // Description is a composite - check the artist part - assertContains( - "Property " + ContentModel.PROP_DESCRIPTION + " didn't contain " + ARTIST + " for mimetype " + mimetype, - ARTIST, - DefaultTypeConverter.INSTANCE.convert(String.class, properties.get(ContentModel.PROP_DESCRIPTION))); - } -} diff --git a/repository/src/test/java/org/alfresco/repo/content/metadata/TikaAutoMetadataExtracterTest.java b/repository/src/test/java/org/alfresco/repo/content/metadata/TikaAutoMetadataExtracterTest.java deleted file mode 100644 index 3d5ceb0d29..0000000000 --- a/repository/src/test/java/org/alfresco/repo/content/metadata/TikaAutoMetadataExtracterTest.java +++ /dev/null @@ -1,399 +0,0 @@ -/* - * #%L - * Alfresco Repository - * %% - * Copyright (C) 2005 - 2020 Alfresco Software Limited - * %% - * This file is part of the Alfresco software. - * If the software was purchased under a paid Alfresco license, the terms of - * the paid license agreement will prevail. Otherwise, the software is - * provided under the following open source license terms: - * - * Alfresco is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Alfresco is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with Alfresco. If not, see . - * #L% - */ -package org.alfresco.repo.content.metadata; - -import java.io.File; -import java.io.Serializable; -import java.net.URL; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.HashMap; -import java.util.HashSet; -import java.util.Map; -import java.util.Set; - -import org.alfresco.model.ContentModel; -import org.alfresco.repo.content.filestore.FileContentReader; -import org.alfresco.repo.content.transform.AbstractContentTransformerTest; -import org.alfresco.service.cmr.repository.ContentReader; -import org.alfresco.service.cmr.repository.datatype.DefaultTypeConverter; -import org.alfresco.service.namespace.NamespaceService; -import org.alfresco.service.namespace.QName; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; -import org.apache.tika.config.TikaConfig; -import org.apache.tika.io.TikaInputStream; -import org.apache.tika.metadata.Metadata; -import org.apache.tika.mime.MediaType; -import org.apache.tika.parser.AutoDetectParser; -import org.apache.tika.parser.ParseContext; -import org.apache.tika.parser.Parser; -import org.apache.tika.parser.microsoft.OfficeParser; -import org.apache.tika.parser.microsoft.ooxml.OOXMLParser; -import org.apache.tika.parser.mp3.Mp3Parser; -import org.apache.tika.parser.odf.OpenDocumentParser; - - -/** - * @deprecated OOTB extractors are being moved to T-Engines. - * - * @see TikaAutoMetadataExtracter - * - * @author Nick Burch - */ -@Deprecated -public class TikaAutoMetadataExtracterTest extends AbstractMetadataExtracterTest -{ - private static Log logger = LogFactory.getLog(TikaAutoMetadataExtracterTest.class); - - private TikaAutoMetadataExtracter extracter; - private static final QName TIKA_MIMETYPE_TEST_PROPERTY = - QName.createQName("TikaMimeTypeTestProp"); - - @Override - public void setUp() throws Exception - { - super.setUp(); - - TikaConfig config = (TikaConfig)ctx.getBean("tikaConfig"); - extracter = new TikaAutoMetadataExtracter(config); - extracter.setDictionaryService(dictionaryService); - extracter.register(); - - // Attach some extra mappings, using the Tika - // metadata keys namespace - // These will be tested later - HashMap> newMap = new HashMap>( - extracter.getMapping() - ); - - Set tlaSet = new HashSet(); - tlaSet.add(TIKA_MIMETYPE_TEST_PROPERTY); - newMap.put( Metadata.CONTENT_TYPE, tlaSet ); - - extracter.setMapping(newMap); - } - - /** - * @return Returns the same transformer regardless - it is allowed - */ - protected MetadataExtracter getExtracter() - { - return extracter; - } - - public void testSupports() throws Exception - { - ArrayList mimeTypes = new ArrayList(); - for (Parser p : new Parser[] { - new OfficeParser(), new OpenDocumentParser(), - new Mp3Parser(), new OOXMLParser() - }) { - Set mts = p.getSupportedTypes(new ParseContext()); - for (MediaType mt : mts) - { - mimeTypes.add(mt.toString()); - } - } - - for (String mimetype : mimeTypes) - { - boolean supports = extracter.isSupported(mimetype); - assertTrue("Mimetype should be supported: " + mimetype, supports); - } - } - - /** - * Test several different files - * Note - doesn't use extractFromMimetype - */ - public void testSupportedMimetypes() throws Exception - { - String[] testFiles = new String[] { - ".doc", ".docx", ".xls", ".xlsx", - ".ppt", ".pptx", - //".vsd", // Our sample file lacks suitable metadata - "2010.dwg", - "2003.mpp", "2007.mpp", - ".pdf", - ".odt", - }; - - AutoDetectParser ap = new AutoDetectParser(); - for (String fileBase : testFiles) - { - String filename = "quick" + fileBase; - URL url = AbstractContentTransformerTest.class.getClassLoader().getResource("quick/" + filename); - File file = new File(url.getFile()); - - // Cheat and ask Tika for the mime type! - Metadata metadata = new Metadata(); - metadata.set(Metadata.RESOURCE_NAME_KEY, filename); - MediaType mt = ap.getDetector().detect(TikaInputStream.get(file), metadata); - String mimetype = mt.toString(); - - if (logger.isDebugEnabled()) - { - logger.debug("Detected mimetype " + mimetype + " for quick test file " + filename); - } - - // Have it processed - Map properties = extractFromFile(file, mimetype); - - // check we got something - assertFalse("extractFromMimetype should return at least some properties, " + - "none found for " + mimetype + " - " + filename, - properties.isEmpty()); - - // check common metadata - testCommonMetadata(mimetype, properties); - // check file-type specific metadata - testFileSpecificMetadata(mimetype, properties); - } - } - - /** - * Test MNT-15219 Excel (.xlsx) containing xmls (shapes/drawings) with multi byte characters may - * cause OutOfMemory in Tika Note - doesn't use extractFromMimetype - */ - public void testParsingOfShapesInXLSXFiles() throws Exception - { - AutoDetectParser ap = new AutoDetectParser(); - - String filename = "dmsu1332-reproduced.xlsx"; - URL url = AbstractContentTransformerTest.class.getClassLoader().getResource("quick/" + filename); - File file = new File(url.getFile()); - - // Cheat and ask Tika for the mime type! - Metadata metadata = new Metadata(); - metadata.set(Metadata.RESOURCE_NAME_KEY, filename); - MediaType mt = ap.getDetector().detect(TikaInputStream.get(file), metadata); - String mimetype = mt.toString(); - - if (logger.isDebugEnabled()) - { - logger.debug("Detected mimetype " + mimetype + " for quick test file " + filename); - } - - // Have it processed - // see MNT-15219 and REPO-3251 - Map properties = extractFromFile(file, mimetype); - - // check we got something - assertFalse("extractFromMimetype should return at least some properties, none found for " + mimetype + " - " + filename, - properties.isEmpty()); - - if (properties.containsKey(ContentModel.PROP_AUTHOR)) - { - assertEquals("Property " + ContentModel.PROP_AUTHOR + " not found for mimetype " + mimetype, - "Udintsev, Anton (external - Project)", - DefaultTypeConverter.INSTANCE.convert(String.class, properties.get(ContentModel.PROP_AUTHOR))); - } - else - { - fail("Expected one property out of " + ContentModel.PROP_CREATOR + " and " + ContentModel.PROP_AUTHOR + " but found neither of them for " - + mimetype); - } - - // Ensure that we can also get things which are standard - // Tika metadata properties, if we so choose to - assertTrue("Test Property " + TIKA_MIMETYPE_TEST_PROPERTY + " not found for mimetype " + mimetype, - properties.containsKey(TIKA_MIMETYPE_TEST_PROPERTY)); - assertEquals("Test Property " + TIKA_MIMETYPE_TEST_PROPERTY + " incorrect for mimetype " + mimetype, - mimetype, - DefaultTypeConverter.INSTANCE.convert(String.class, properties.get(TIKA_MIMETYPE_TEST_PROPERTY))); - } - - @Override - protected boolean skipAuthorCheck(String mimetype) { return true; } - - @Override - protected boolean skipDescriptionCheck(String mimetype) - { - if(mimetype.endsWith("/ogg")) - { - return true; - } - return false; - } - - /** - * We also provide the creation date - check that - */ - protected void testFileSpecificMetadata(String mimetype, - Map properties) - { - - // Check for extra fields - // Author isn't there for the OpenDocument ones - if(mimetype.indexOf(".oasis.") == -1 && !mimetype.endsWith("/ogg") && !mimetype.endsWith("dwg")) - { - assertEquals( - "Property " + ContentModel.PROP_AUTHOR + " not found for mimetype " + mimetype, - "Nevin Nollop", - DefaultTypeConverter.INSTANCE.convert(String.class, properties.get(ContentModel.PROP_AUTHOR))); - } - - // Ensure that we can also get things which are standard - // Tika metadata properties, if we so choose to - assertTrue( - "Test Property " + TIKA_MIMETYPE_TEST_PROPERTY + " not found for mimetype " + mimetype, - properties.containsKey(TIKA_MIMETYPE_TEST_PROPERTY) - ); - assertEquals( - "Test Property " + TIKA_MIMETYPE_TEST_PROPERTY + " incorrect for mimetype " + mimetype, - mimetype, - DefaultTypeConverter.INSTANCE.convert(String.class, properties.get(TIKA_MIMETYPE_TEST_PROPERTY))); - - // Extra media checks for music formats - if(mimetype.startsWith("audio")) - { - assertEquals( - "Property " + ContentModel.PROP_AUTHOR + " not found for mimetype " + mimetype, - "Hauskaz", - DefaultTypeConverter.INSTANCE.convert(String.class, properties.get(ContentModel.PROP_AUTHOR))); - QName artistQ = QName.createQName(NamespaceService.AUDIO_MODEL_1_0_URI, "artist"); - assertEquals( - "Property " + artistQ + " not found for mimetype " + mimetype, - "Hauskaz", - DefaultTypeConverter.INSTANCE.convert(String.class, properties.get(artistQ))); - } - } - - /** - * We don't have explicit extractors for most image and video formats. - * Instead, these will be handled by the Auto Tika Parser, and - * this test ensures that they are - */ - @SuppressWarnings("deprecation") -public void testImageVideo() throws Throwable { - Map p; - - // Image - p = openAndCheck(".jpg", "image/jpeg"); - assertEquals("409 pixels", p.get("Image Width")); - assertEquals("92 pixels", p.get("Image Height")); - assertEquals("8 bits", p.get("Data Precision")); - - p = openAndCheck(".gif", "image/gif"); - assertEquals("409", p.get("width")); - assertEquals("92", p.get("height")); - - p = openAndCheck(".png", "image/png"); - assertEquals("409", p.get("width")); - assertEquals("92", p.get("height")); - assertEquals("8 8 8", p.get("Data BitsPerSample")); - assertEquals("none", p.get("Transparency Alpha")); - - p = openAndCheck(".bmp", "image/bmp"); - assertEquals("409", p.get("width")); - assertEquals("92", p.get("height")); - assertEquals("8 8 8", p.get("Data BitsPerSample")); - - // Image with wrong tiff:Width property. see MNT-13920 - p = openAndCheck("SizeSample.jpg", "image/jpeg"); - // Check raw EXIF properties - assertEquals("1535 pixels", p.get("Image Width")); - assertEquals("367 pixels", p.get("Image Height")); - - // Map and check - Map propsJPG = new HashMap(); - ContentReader readerJPG = new FileContentReader(open("SizeSample.jpg")); - readerJPG.setMimetype("image/jpeg"); - extracter.extract(readerJPG, propsJPG); - assertEquals(1535, propsJPG.get(QName.createQName(NamespaceService.EXIF_MODEL_1_0_URI, "pixelXDimension"))); - assertEquals(367, propsJPG.get(QName.createQName(NamespaceService.EXIF_MODEL_1_0_URI, "pixelYDimension"))); - - - // Geo tagged image - p = openAndCheck("GEO.jpg", "image/jpeg"); - // Check raw EXIF properties - assertEquals("100 pixels", p.get("Image Width")); - assertEquals("68 pixels", p.get("Image Height")); - assertEquals("8 bits", p.get("Data Precision")); - // Check regular Tika properties - assertEquals(QUICK_TITLE, p.get(Metadata.COMMENT)); - assertEquals("canon-55-250, moscow-birds, serbor", p.get(Metadata.SUBJECT)); - assertTrue(Arrays.equals(new String[] { "canon-55-250", "moscow-birds", "serbor" }, (String[]) p.get("dc:subject"))); - // Check namespace'd Tika properties - assertEquals("12.54321", p.get("geo:lat")); - assertEquals("-54.1234", p.get("geo:long")); - assertEquals("100", p.get("tiff:ImageWidth")); - assertEquals("68", p.get("tiff:ImageLength")); - assertEquals("Canon", p.get("tiff:Make")); - assertEquals("5.6", p.get("exif:FNumber")); - - // Map and check - Map properties = new HashMap(); - ContentReader reader = new FileContentReader(open("GEO.jpg")); - reader.setMimetype("image/jpeg"); - extracter.extract(reader, properties); - // Check the geo bits - assertEquals(12.54321, properties.get(ContentModel.PROP_LATITUDE)); - assertEquals(-54.1234, properties.get(ContentModel.PROP_LONGITUDE)); - // Check the exif bits - assertEquals(100, properties.get(QName.createQName(NamespaceService.EXIF_MODEL_1_0_URI, "pixelXDimension"))); - assertEquals(68, properties.get(QName.createQName(NamespaceService.EXIF_MODEL_1_0_URI, "pixelYDimension"))); - assertEquals(0.000625, properties.get(QName.createQName(NamespaceService.EXIF_MODEL_1_0_URI, "exposureTime"))); - assertEquals(5.6, properties.get(QName.createQName(NamespaceService.EXIF_MODEL_1_0_URI, "fNumber"))); - assertEquals(false, properties.get(QName.createQName(NamespaceService.EXIF_MODEL_1_0_URI, "flash"))); - assertEquals(194.0, properties.get(QName.createQName(NamespaceService.EXIF_MODEL_1_0_URI, "focalLength"))); - assertEquals("400", properties.get(QName.createQName(NamespaceService.EXIF_MODEL_1_0_URI, "isoSpeedRatings"))); - assertEquals("Canon", properties.get(QName.createQName(NamespaceService.EXIF_MODEL_1_0_URI, "manufacturer"))); - assertEquals("Canon EOS 40D", properties.get(QName.createQName(NamespaceService.EXIF_MODEL_1_0_URI, "model"))); - assertEquals("Adobe Photoshop CS3 Macintosh", properties.get(QName.createQName(NamespaceService.EXIF_MODEL_1_0_URI, "software"))); - assertEquals(null, properties.get(QName.createQName(NamespaceService.EXIF_MODEL_1_0_URI, "orientation"))); - assertEquals(240.0, properties.get(QName.createQName(NamespaceService.EXIF_MODEL_1_0_URI, "xResolution"))); - assertEquals(240.0, properties.get(QName.createQName(NamespaceService.EXIF_MODEL_1_0_URI, "yResolution"))); - assertEquals("Inch", properties.get(QName.createQName(NamespaceService.EXIF_MODEL_1_0_URI, "resolutionUnit"))); - } - private File open(String fileBase) throws Throwable { - String filename = "quick" + fileBase; - URL url = AbstractContentTransformerTest.class.getClassLoader().getResource("quick/" + filename); - File file = new File(url.getFile()); - assertTrue(file.exists()); - return file; - } - private Map openAndCheck(String fileBase, String expMimeType) throws Throwable { - // Get the mimetype via the MimeTypeMap - // (Uses Tika internally for the detection) - File file = open(fileBase); - ContentReader detectReader = new FileContentReader(file); - String mimetype = mimetypeMap.guessMimetype(fileBase, detectReader); - - assertEquals("Wrong mimetype for " + fileBase, mimetype, expMimeType); - - // Ensure the Tika Auto parser actually handles this - assertTrue("Mimetype should be supported but isn't: " + mimetype, extracter.isSupported(mimetype)); - - // Now create our proper reader - ContentReader sourceReader = new FileContentReader(file); - sourceReader.setMimetype(mimetype); - - // And finally do the properties extraction - return extracter.extractRaw(sourceReader); - } -} diff --git a/repository/src/test/java/org/alfresco/repo/rendition2/AbstractRenditionIntegrationTest.java b/repository/src/test/java/org/alfresco/repo/rendition2/AbstractRenditionIntegrationTest.java index 09c52ba170..590e0098f0 100644 --- a/repository/src/test/java/org/alfresco/repo/rendition2/AbstractRenditionIntegrationTest.java +++ b/repository/src/test/java/org/alfresco/repo/rendition2/AbstractRenditionIntegrationTest.java @@ -28,12 +28,15 @@ package org.alfresco.repo.rendition2; import junit.framework.AssertionFailedError; import org.alfresco.model.ContentModel; import org.alfresco.repo.content.MimetypeMap; +import org.alfresco.repo.content.metadata.AsynchronousExtractor; +import org.alfresco.repo.content.metadata.MetadataExtracter; import org.alfresco.repo.content.transform.LocalTransformServiceRegistry; import org.alfresco.repo.security.authentication.AuthenticationUtil; import org.alfresco.repo.thumbnail.ThumbnailRegistry; import org.alfresco.repo.transaction.RetryingTransactionHelper; import org.alfresco.service.cmr.rendition.RenditionService; import org.alfresco.service.cmr.repository.ChildAssociationRef; +import org.alfresco.service.cmr.repository.ContentReader; import org.alfresco.service.cmr.repository.ContentService; import org.alfresco.service.cmr.repository.ContentWriter; import org.alfresco.service.cmr.repository.MimetypeService; @@ -59,7 +62,9 @@ import org.springframework.util.ResourceUtils; import java.io.File; import java.io.FileNotFoundException; +import java.io.Serializable; import java.util.Collections; +import java.util.Map; import static java.lang.Thread.sleep; import static org.alfresco.model.ContentModel.PROP_CONTENT; @@ -118,6 +123,9 @@ public abstract class AbstractRenditionIntegrationTest extends BaseSpringTest @Autowired protected TransformationOptionsConverter converter; + @Autowired + protected AsynchronousExtractor asynchronousExtractor; + static String PASSWORD = "password"; protected static final String ADMIN = "admin"; @@ -247,7 +255,7 @@ public abstract class AbstractRenditionIntegrationTest extends BaseSpringTest fail("The " + renditionName + " rendition should NOT be supported for " + testFileName); } } - catch(UnsupportedOperationException e) + catch (UnsupportedOperationException e) { if (expectedToPass) { @@ -256,6 +264,27 @@ public abstract class AbstractRenditionIntegrationTest extends BaseSpringTest } } + protected void checkExtract(String testFileName, boolean expectedToPass) + { + try + { + NodeRef sourceNodeRef = createSource(ADMIN, testFileName); + extract(ADMIN, sourceNodeRef); + waitForExtract(ADMIN, sourceNodeRef, true); + if (!expectedToPass) + { + fail("The extract of metadata should NOT be supported for " + testFileName); + } + } + catch (AssertionFailedError e) + { + if (expectedToPass) + { + fail("The extract of metadata SHOULD be supported for " + testFileName); + } + } + } + // Creates a new source node as the given user in its own transaction. protected NodeRef createSource(String user, String testFileName) { @@ -322,12 +351,31 @@ public abstract class AbstractRenditionIntegrationTest extends BaseSpringTest }), user); } + // Requests a new metadata extract as the given user in its own transaction. + protected void extract(String user, NodeRef sourceNode) + { + AuthenticationUtil.runAs((AuthenticationUtil.RunAsWork) () -> + transactionService.getRetryingTransactionHelper().doInTransaction(() -> + { + extract(sourceNode); + return null; + }), user); + } + // Requests a new rendition as the current user in the current transaction. private void render(NodeRef sourceNodeRef, String renditionName) { renditionService2.render(sourceNodeRef, renditionName); } + // Requests a new metadata extract as the current user in the current transaction. + private void extract(NodeRef sourceNodeRef) + { + ContentReader reader = contentService.getReader(sourceNodeRef, ContentModel.PROP_CONTENT); + asynchronousExtractor.extract(sourceNodeRef, reader, MetadataExtracter.OverwritePolicy.PRAGMATIC, + Collections.emptyMap(), Collections.emptyMap()); + } + // As a given user waitForRendition for a rendition to appear. Creates new transactions to do this. protected NodeRef waitForRendition(String user, NodeRef sourceNodeRef, String renditionName, boolean shouldExist) throws AssertionFailedError { @@ -346,6 +394,24 @@ public abstract class AbstractRenditionIntegrationTest extends BaseSpringTest } } + // As a given user waitForExtract to appear. Creates new transactions to do this. + protected void waitForExtract(String user, NodeRef sourceNodeRef, boolean nodePropsShouldChange) throws AssertionFailedError + { + try + { + AuthenticationUtil.runAs(() -> waitForExtract(sourceNodeRef, nodePropsShouldChange), user); + } + catch (RuntimeException e) + { + Throwable cause = e.getCause(); + if (cause instanceof AssertionFailedError) + { + throw (AssertionFailedError)cause; + } + throw e; + } + } + // As the current user waitForRendition for a rendition to appear. Creates new transactions to do this. private NodeRef waitForRendition(NodeRef sourceNodeRef, String renditionName, boolean shouldExist) throws InterruptedException { @@ -375,6 +441,38 @@ public abstract class AbstractRenditionIntegrationTest extends BaseSpringTest } } + // As the current user waitForRendition for a rendition to appear. Creates new transactions to do this. + private Object waitForExtract(NodeRef sourceNodeRef, boolean nodePropsShouldChange) throws InterruptedException + { + long maxMillis = 5000; + boolean nodeModified = true; + for (int i = (int)(maxMillis / 1000); i >= 0; i--) + { + // Must create a new transaction in order to see changes that take place after this method started. + nodeModified = transactionService.getRetryingTransactionHelper().doInTransaction(() -> + { + Serializable created = nodeService.getProperty(sourceNodeRef, ContentModel.PROP_CREATED); + Serializable modified = nodeService.getProperty(sourceNodeRef, ContentModel.PROP_MODIFIED); + return !created.equals(modified); + }, true, true); + if (nodeModified) + { + break; + } + logger.debug("waitForExtract sleep "+i); + sleep(1000); + } + if (nodePropsShouldChange) + { + assertTrue("Extract failed", nodeModified); + } + else + { + assertFalse("Extract did not fail", nodeModified); + } + return null; + } + protected String getTestFileName(String sourceMimetype) throws FileNotFoundException { String extension = mimetypeMap.getExtension(sourceMimetype); diff --git a/repository/src/test/java/org/alfresco/repo/rendition2/AbstractRenditionTest.java b/repository/src/test/java/org/alfresco/repo/rendition2/AbstractRenditionTest.java index 5ea0714e6d..b12695279b 100644 --- a/repository/src/test/java/org/alfresco/repo/rendition2/AbstractRenditionTest.java +++ b/repository/src/test/java/org/alfresco/repo/rendition2/AbstractRenditionTest.java @@ -184,6 +184,57 @@ public abstract class AbstractRenditionTest extends AbstractRenditionIntegration } } + private void assertMetadataExtractsOkayFromSourceExtension(List sourceExtensions, List excludeList, List expectedToFail, + int expectedExtractCount, int expectedFailedCount) throws Exception + { + int extractCount = 0; + int failedCount = 0; + int successCount = 0; + int excludedCount = 0; + RenditionDefinitionRegistry2 renditionDefinitionRegistry2 = renditionService2.getRenditionDefinitionRegistry2(); + StringJoiner failures = new StringJoiner("\n"); + StringJoiner successes = new StringJoiner("\n"); + + for (String sourceExtension : sourceExtensions) + { + String sourceMimetype = mimetypeMap.getMimetype(sourceExtension); + String testFileName = getTestFileName(sourceMimetype); + if (testFileName != null) + { + extractCount++; + if (excludeList.contains(sourceExtension)) + { + excludedCount++; + } + else + { + try + { + checkExtract(testFileName, !expectedToFail.contains(sourceExtension)); + successes.add(sourceExtension); + successCount++; + } + catch (AssertionFailedError e) + { + failures.add(sourceExtension); + failedCount++; + } + } + } + } + + int expectedSuccessCount = expectedExtractCount - excludedCount - expectedFailedCount; + System.out.println("FAILURES:\n"+failures+"\n"); + System.out.println("SUCCESSES:\n"+successes+"\n"); + System.out.println("extractCount: "+extractCount+" expected "+expectedExtractCount); + System.out.println(" failedCount: "+failedCount+" expected "+expectedFailedCount); + System.out.println("successCount: "+successCount+" expected "+expectedSuccessCount); + + assertEquals("Extract count has changed", expectedExtractCount, extractCount); + assertEquals("Failed extract count has changed", expectedFailedCount, failedCount); + assertEquals("Successful extract count has changed", expectedSuccessCount, successCount); + } + @Test public void testExpectedNumberOfRenditions() throws Exception { @@ -242,6 +293,18 @@ public abstract class AbstractRenditionTest extends AbstractRenditionIntegration Collections.emptyList(), Collections.emptyList(), expectedRenditionCount, expectedFailedCount); } + @Test + public void testSelectedMetadataExtracts() throws Exception + { + internalTestSelectedMetadataExtracts(7, 0); + } + + protected void internalTestSelectedMetadataExtracts(int expectedExtractCount, int expectedFailedCount) throws Exception + { + assertMetadataExtractsOkayFromSourceExtension(Arrays.asList("msg", "doc", "odt", "pdf", "docx", "mp4", "png"), + Collections.emptyList(), Collections.emptyList(), expectedExtractCount, expectedFailedCount); + } + /** * Gets transforms combinations that are possible regardless of renditions. */ diff --git a/repository/src/test/java/org/alfresco/repo/rendition2/LegacyRenditionTest.java b/repository/src/test/java/org/alfresco/repo/rendition2/LegacyRenditionTest.java index 734a2dd255..adaa4ce13d 100644 --- a/repository/src/test/java/org/alfresco/repo/rendition2/LegacyRenditionTest.java +++ b/repository/src/test/java/org/alfresco/repo/rendition2/LegacyRenditionTest.java @@ -27,6 +27,7 @@ package org.alfresco.repo.rendition2; import org.alfresco.util.testing.category.DebugTests; import org.junit.AfterClass; +import org.junit.Before; import org.junit.BeforeClass; import org.junit.Test; import org.junit.experimental.categories.Category; @@ -47,9 +48,23 @@ public class LegacyRenditionTest extends AbstractRenditionTest legacy(); } + @Override + @Before + public void setUp() throws Exception + { + super.setUp(); + } + @AfterClass public static void after() { AbstractRenditionIntegrationTest.after(); } + + @Test + @Override + public void testSelectedMetadataExtracts() throws Exception + { + internalTestSelectedMetadataExtracts(7, 7); + } } diff --git a/repository/src/test/java/org/alfresco/repo/rendition2/NoneRenditionTest.java b/repository/src/test/java/org/alfresco/repo/rendition2/NoneRenditionTest.java index e3605284da..f845adb1d0 100644 --- a/repository/src/test/java/org/alfresco/repo/rendition2/NoneRenditionTest.java +++ b/repository/src/test/java/org/alfresco/repo/rendition2/NoneRenditionTest.java @@ -76,6 +76,13 @@ public class NoneRenditionTest extends AbstractRenditionTest internalTestGifRenditions(0, 0); } + @Test + @Override + public void testSelectedMetadataExtracts() throws Exception + { + internalTestSelectedMetadataExtracts(7, 7); + } + @Test public void testAllTransformServiceConfigRenditions() throws Exception {