REPO-5190 Remove Metadata Extractors that are been offloaded in 7.0.0 (#148)

Removed all the Extractors that now exist in the T-Engines:

    JodConnverterMetadataExtracter
    TikaPoweredMetadataExtracter – the abstract base class used by other extractors
    -- MailMetadataExtracter
    -- PoiMetadataExtracter
    -- TikaAutoMetadataExtracter
    -- MP3MetadataExtracter
    -- TikaSpringConfiguredMetadataExtracter - removed as it required Spring config and would run in process
    -- PdfBoxMetadataExtracter
    -- OpenDocumentMetadataExtracter
    -- OfficeMetadataExtracter
    -- DWGMetadataExtracter
    HtmlMetadataExtracter
    RFC822MetadataExtracter

XmlMetadataExtracter and XPathMetadataExtracter still exist but don't provide any extraction out of the box. The reason they still exist is to support custom transforms (in AMPs) to extract from XML. There are no XML extractors in the T-Engines at the moment, but that is where the custom transformer code really should be moved.

    There are new tests to ensure the async transforms take place as expected.
    Additionally many of the existing tests still exist (those not related to a specific extractor). Some of these have been modified to reflect that the extract is now async and to no longer check the modified value has not changed (it is now expected to change).
    There are also a number of new metadata extract smoke tests that ensure that a selected subset of extracts are supported by the OOTB T-Engines.
This commit is contained in:
Alan Davis
2020-11-12 14:08:04 +00:00
committed by GitHub
parent 374bf7aea8
commit d70790f0c2
54 changed files with 487 additions and 5640 deletions

View File

@@ -713,7 +713,8 @@ public class NodeApiTest extends AbstractSingleNetworkSiteTest
String contentName = "content " + RUNID + ".txt";
String content1Id = createTextFile(folderB_Id, contentName, "The quick brown fox jumps over the lazy dog.", "UTF-8", docProps).getId();
// TODO find a better solution to wait for the asynchronous metadata-extract/transform operation. E.g. awaitility
Thread.sleep(3000);
// get node info
response = getSingle(NodesEntityResource.class, content1Id, null, 200);
Document documentResp = RestApiUtil.parseRestApiEntry(response.getJsonResponse(), Document.class);

View File

@@ -714,7 +714,7 @@ public class RenditionsTest extends AbstractBaseApiTest
response = getSingle(NodesEntityResource.class, contentNodeId, params, 200);
Document document1b = RestApiUtil.parseRestApiEntry(response.getJsonResponse(), Document.class);
assertEquals(document1b.getModifiedAt(), document1.getModifiedAt());
// assertEquals(document1b.getModifiedAt(), document1.getModifiedAt());
assertEquals(document1b.getModifiedByUser().getId(), document1.getModifiedByUser().getId());
assertEquals(document1b.getModifiedByUser().getDisplayName(), document1.getModifiedByUser().getDisplayName());
@@ -749,7 +749,7 @@ public class RenditionsTest extends AbstractBaseApiTest
response = getSingle(NodesEntityResource.class, contentNodeId, params, 200);
Document document2b = RestApiUtil.parseRestApiEntry(response.getJsonResponse(), Document.class);
assertTrue(document2b.getModifiedAt().after(document1.getModifiedAt()));
// assertTrue(document2b.getModifiedAt().after(document1.getModifiedAt()));
assertEquals(document2b.getModifiedByUser().getId(), document1.getModifiedByUser().getId());
assertEquals(document2b.getModifiedByUser().getDisplayName(), document1.getModifiedByUser().getDisplayName());

View File

@@ -25,6 +25,7 @@
*/
package org.alfresco.rest.api.tests;
import org.alfresco.repo.action.ActionServiceImpl;
import org.alfresco.repo.content.MimetypeMap;
import org.alfresco.repo.quickshare.QuickShareLinkExpiryActionImpl;
import org.alfresco.repo.security.authentication.AuthenticationUtil;
@@ -192,6 +193,9 @@ public class SharedLinkApiTest extends AbstractBaseApiTest
Map<String, String> body = new HashMap<>();
body.put("nodeId", d1Id);
// TODO find a better solution to wait for the asynchronous metadata-extract/transform operation. E.g. awaitility
Thread.sleep(3000);
response = post(URL_SHARED_LINKS, toJsonAsStringNonNull(body), 201);
QuickShareLink resp = RestApiUtil.parseRestApiEntry(response.getJsonResponse(), QuickShareLink.class);
@@ -209,7 +213,7 @@ public class SharedLinkApiTest extends AbstractBaseApiTest
assertEquals(new Long(file1_originalBytes.length), resp.getContent().getSizeInBytes());
assertEquals("UTF-8", resp.getContent().getEncoding());
assertEquals(docModifiedAt.getTime(), resp.getModifiedAt().getTime()); // not changed
// assertEquals(docModifiedAt.getTime(), resp.getModifiedAt().getTime()); // not changed
assertEquals(docModifiedBy, resp.getModifiedByUser().getId()); // not changed (ie. not user2)
assertEquals(UserInfo.getTestDisplayName(docModifiedBy), resp.getModifiedByUser().getDisplayName());
@@ -364,7 +368,7 @@ public class SharedLinkApiTest extends AbstractBaseApiTest
// create rendition of pdf doc - note: for some reason create rendition of txt doc fail on build m/c (TBC) ?
setRequestContext(user2);
Rendition rendition = createAndGetRendition(d1Id, "doclib");
assertNotNull(rendition);
assertEquals(Rendition.RenditionStatus.CREATED, rendition.getStatus());
@@ -417,12 +421,12 @@ public class SharedLinkApiTest extends AbstractBaseApiTest
// -ve test - unauthenticated
setRequestContext(null);
deleteSharedLink(shared1Id, 401);
setRequestContext(user1);
// -ve test - user1 cannot delete shared link
deleteSharedLink(shared1Id, 403);
// -ve test - delete - cannot delete non-existent link
deleteSharedLink("dummy", 404);
}

View File

@@ -1025,87 +1025,6 @@ public class TestCMIS extends EnterpriseTestApi
}
}
/**
* Tests CMIS and non-CMIS public api interactions
*/
@SuppressWarnings("deprecation")
@Test
public void testScenario1() throws Exception
{
final TestNetwork network1 = getTestFixture().getRandomNetwork();
Iterator<String> personIt = network1.getPersonIds().iterator();
final String person = personIt.next();
assertNotNull(person);
Sites sitesProxy = publicApiClient.sites();
Comments commentsProxy = publicApiClient.comments();
publicApiClient.setRequestContext(new RequestContext(network1.getId(), person));
CmisSession cmisSession = publicApiClient.createPublicApiCMISSession(Binding.atom, CMIS_VERSION_10, AlfrescoObjectFactoryImpl.class.getName());
ListResponse<MemberOfSite> sites = sitesProxy.getPersonSites(person, null);
assertTrue(sites.getList().size() > 0);
MemberOfSite siteMember = sites.getList().get(0);
String siteId = siteMember.getSite().getSiteId();
Folder documentLibrary = (Folder)cmisSession.getObjectByPath("/Sites/" + siteId + "/documentLibrary");
System.out.println("documentLibrary id = " + documentLibrary.getId());
Map<String, String> fileProps = new HashMap<String, String>();
{
fileProps.put(PropertyIds.OBJECT_TYPE_ID, TYPE_CMIS_DOCUMENT);
fileProps.put(PropertyIds.NAME, "mydoc-" + GUID.generate() + ".txt");
}
ContentStreamImpl fileContent = new ContentStreamImpl();
{
ContentWriter writer = new FileContentWriter(TempFileProvider.createTempFile(GUID.generate(), ".txt"));
writer.putContent("Ipsum and so on");
ContentReader reader = writer.getReader();
fileContent.setMimeType(MimetypeMap.MIMETYPE_TEXT_PLAIN);
fileContent.setStream(reader.getContentInputStream());
}
Document doc = documentLibrary.createDocument(fileProps, fileContent, VersioningState.MAJOR);
System.out.println("Document id = " + doc.getId());
Comment c = commentsProxy.createNodeComment(doc.getId(), new Comment("comment title 1", "comment 1"));
System.out.println("Comment = " + c);
// Now lock the document
String nodeRefStr = (String) doc.getPropertyValue("alfcmis:nodeRef");
final NodeRef nodeRef = new NodeRef(nodeRefStr);
final TenantRunAsWork<Void> runAsWork = new TenantRunAsWork<Void>()
{
@Override
public Void doWork() throws Exception
{
lockService.lock(nodeRef, LockType.WRITE_LOCK);
return null;
}
};
RetryingTransactionCallback<Void> txnWork = new RetryingTransactionCallback<Void>()
{
@Override
public Void execute() throws Throwable
{
TenantUtil.runAsUserTenant(runAsWork, "bob", network1.getId());
return null;
}
};
transactionHelper.doInTransaction(txnWork);
// Now attempt to update the document's metadata
try
{
doc.delete();
}
catch (CmisUpdateConflictException e)
{
// Expected: ACE-762 BM-0012: NodeLockedException not handled by CMIS
}
}
//@Test
public void testInvalidMethods() throws Exception
{
@@ -1275,7 +1194,7 @@ public class TestCMIS extends EnterpriseTestApi
return null;
}
}, personId);
NodeRef folderNodeRef = folders.get(0);
NodeRef docNodeRef = documents.get(0);
@@ -1789,6 +1708,9 @@ public class TestCMIS extends EnterpriseTestApi
}
Document autoVersionedDoc = docLibrary.createDocument(properties, fileContent, VersioningState.MAJOR);
// TODO find a better solution to wait for the asynchronous metadata-extract/transform operation. E.g. awaitility
Thread.sleep(5000);
String objectId = autoVersionedDoc.getId();
String bareObjectId = stripCMISSuffix(objectId);
// create versions
@@ -1807,6 +1729,8 @@ public class TestCMIS extends EnterpriseTestApi
contentStream.setMimeType(MimetypeMap.MIMETYPE_TEXT_PLAIN);
contentStream.setStream(reader.getContentInputStream());
}
// TODO find a better solution to wait for the asynchronous metadata-extract/transform operation. E.g. awaitility
Thread.sleep(5000);
pwc.checkIn(true, Collections.EMPTY_MAP, contentStream, "checkin " + i);
}
@@ -2506,6 +2430,8 @@ public class TestCMIS extends EnterpriseTestApi
/* Create document */
Document doc = docLibrary.createDocument(properties, fileContent, VersioningState.MAJOR);
// TODO find a better solution to wait for the asynchronous metadata-extract/transform operation. E.g. awaitility
Thread.sleep(3000);
/* Checkout document */
ObjectId pwcId = doc.checkOut();
@@ -2867,6 +2793,7 @@ public class TestCMIS extends EnterpriseTestApi
fileContent.setStream(stream);
Document doc = docLibrary.createDocument(properties, fileContent, VersioningState.MAJOR);
Thread.sleep(5000);
ObjectId pwcId = doc.checkOut();
Document pwc = (Document) cmisSession.getObject(pwcId.getId());
@@ -3272,6 +3199,8 @@ public class TestCMIS extends EnterpriseTestApi
"This is just a test");
final Document document = folder.createDocument(props, cs, VersioningState.MAJOR);
// TODO find a better solution to wait for the asynchronous metadata-extract/transform operation. E.g. awaitility
Thread.sleep(3000);
ObjectId pwcObjectId = document.checkOut();

View File

@@ -6,4 +6,21 @@ log4j.appender.Console.layout.ConversionPattern=%d{ISO8601} %x %-5p [%c{3}] [%t]
log4j.logger.org.alfresco=WARN
log4j.logger.org.alfresco.rest.api=DEBUG
log4j.logger.org.eclipse.jetty.util.log=INFO
log4j.logger.org.eclipse.jetty.util.log=INFO
# Renditions and Transforms
log4j.logger.org.alfresco.repo.content.transform.TransformerDebug=debug
log4j.logger.org.alfresco.repo.rendition2=debug
#log4j.logger.org.alfresco.repo.rendition2.LocalTransformClient=debug
#log4j.logger.org.alfresco.repo.rendition2.LegacyTransformClient=debug
#log4j.logger.org.alfresco.repo.rendition.RenditionServiceImpl=debug
#log4j.logger.org.alfresco.enterprise.repo.rendition2.RemoteTransformClient=debug
log4j.logger.org.alfresco.repo.thumbnail.ThumbnailServiceImplTest=DEBUG
log4j.logger.org.alfresco.repo.rendition2.RenditionService2Impl=DEBUG
#log4j.logger.org.alfresco.repo.content.transform.LocalTransformServiceRegistry=debug
#log4j.logger.org.alfresco.enterprise.repo.rendition2.RemoteTransformServiceRegistry=debug
#log4j.logger.org.alfresco.repo.rendition2.RenditionDefinitionRegistry2Impl=debug
#log4j.logger.org.alfresco.repo.content.MimetypeMap=debug
#log4j.logger.org.alfresco.repo.content.transform.LocalTransform=trace