mirror of
https://github.com/Alfresco/alfresco-community-repo.git
synced 2025-10-08 14:51:49 +00:00
REPO-1525: PdfBoxMetadataExtracterTest failures on all DBs (including main PostgreSQL build)
- Moved the concurrent test to a separate class - ConcurrencyPdfBoxMetadataExtracterTest - It is now utilizing an overridden extractor with a configurable timeout. git-svn-id: https://svn.alfresco.com/repos/alfresco-enterprise/alfresco/BRANCHES/DEV/5.2.N/root@132690 c4b6b30b-aa2e-2d43-bbcb-ca4b014f7261
This commit is contained in:
@@ -29,18 +29,7 @@ import junit.framework.JUnit4TestAdapter;
|
|||||||
import junit.framework.Test;
|
import junit.framework.Test;
|
||||||
import junit.framework.TestSuite;
|
import junit.framework.TestSuite;
|
||||||
|
|
||||||
import org.alfresco.repo.content.metadata.DWGMetadataExtracterTest;
|
import org.alfresco.repo.content.metadata.*;
|
||||||
import org.alfresco.repo.content.metadata.HtmlMetadataExtracterTest;
|
|
||||||
import org.alfresco.repo.content.metadata.MP3MetadataExtracterTest;
|
|
||||||
import org.alfresco.repo.content.metadata.MailMetadataExtracterTest;
|
|
||||||
import org.alfresco.repo.content.metadata.MetadataExtracterLimitsTest;
|
|
||||||
import org.alfresco.repo.content.metadata.OfficeMetadataExtracterTest;
|
|
||||||
import org.alfresco.repo.content.metadata.OpenDocumentMetadataExtracterTest;
|
|
||||||
import org.alfresco.repo.content.metadata.OpenOfficeMetadataExtracterTest;
|
|
||||||
import org.alfresco.repo.content.metadata.PdfBoxMetadataExtracterTest;
|
|
||||||
import org.alfresco.repo.content.metadata.PoiMetadataExtracterTest;
|
|
||||||
import org.alfresco.repo.content.metadata.RFC822MetadataExtracterTest;
|
|
||||||
import org.alfresco.repo.content.metadata.TikaAutoMetadataExtracterTest;
|
|
||||||
import org.alfresco.repo.content.transform.AbstractContentTransformerLimitsTest;
|
import org.alfresco.repo.content.transform.AbstractContentTransformerLimitsTest;
|
||||||
import org.alfresco.repo.content.transform.AppleIWorksContentTransformerTest;
|
import org.alfresco.repo.content.transform.AppleIWorksContentTransformerTest;
|
||||||
import org.alfresco.repo.content.transform.BinaryPassThroughContentTransformerTest;
|
import org.alfresco.repo.content.transform.BinaryPassThroughContentTransformerTest;
|
||||||
@@ -121,6 +110,7 @@ public class ContentMinimalContextTestSuite extends TestSuite
|
|||||||
suite.addTestSuite( OpenDocumentMetadataExtracterTest.class );
|
suite.addTestSuite( OpenDocumentMetadataExtracterTest.class );
|
||||||
suite.addTestSuite( OpenOfficeMetadataExtracterTest.class );
|
suite.addTestSuite( OpenOfficeMetadataExtracterTest.class );
|
||||||
suite.addTestSuite( PdfBoxMetadataExtracterTest.class );
|
suite.addTestSuite( PdfBoxMetadataExtracterTest.class );
|
||||||
|
suite.addTestSuite( ConcurrencyPdfBoxMetadataExtracterTest.class );
|
||||||
suite.addTestSuite( PoiMetadataExtracterTest.class );
|
suite.addTestSuite( PoiMetadataExtracterTest.class );
|
||||||
suite.addTestSuite( RFC822MetadataExtracterTest.class );
|
suite.addTestSuite( RFC822MetadataExtracterTest.class );
|
||||||
suite.addTestSuite( TikaAutoMetadataExtracterTest.class );
|
suite.addTestSuite( TikaAutoMetadataExtracterTest.class );
|
||||||
|
@@ -0,0 +1,153 @@
|
|||||||
|
/*
|
||||||
|
* #%L
|
||||||
|
* Alfresco Repository
|
||||||
|
* %%
|
||||||
|
* Copyright (C) 2005 - 2016 Alfresco Software Limited
|
||||||
|
* %%
|
||||||
|
* This file is part of the Alfresco software.
|
||||||
|
* If the software was purchased under a paid Alfresco license, the terms of
|
||||||
|
* the paid license agreement will prevail. Otherwise, the software is
|
||||||
|
* provided under the following open source license terms:
|
||||||
|
*
|
||||||
|
* Alfresco is free software: you can redistribute it and/or modify
|
||||||
|
* it under the terms of the GNU Lesser General Public License as published by
|
||||||
|
* the Free Software Foundation, either version 3 of the License, or
|
||||||
|
* (at your option) any later version.
|
||||||
|
*
|
||||||
|
* Alfresco is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
* GNU Lesser General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU Lesser General Public License
|
||||||
|
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
* #L%
|
||||||
|
*/
|
||||||
|
package org.alfresco.repo.content.metadata;
|
||||||
|
|
||||||
|
import org.alfresco.repo.content.MimetypeMap;
|
||||||
|
import org.alfresco.service.cmr.repository.ContentReader;
|
||||||
|
import org.alfresco.service.namespace.QName;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.concurrent.ConcurrentHashMap;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The test designed for testing the concurrent limitations in
|
||||||
|
* {@link AbstractMappingMetadataExtracter#extractRaw(ContentReader, MetadataExtracterLimits)}
|
||||||
|
*
|
||||||
|
* @author amukha
|
||||||
|
*/
|
||||||
|
public class ConcurrencyPdfBoxMetadataExtracterTest extends AbstractMetadataExtracterTest
|
||||||
|
{
|
||||||
|
private SlowPdfBoxMetadataExtracter extracter;
|
||||||
|
|
||||||
|
private static final int MAX_CONCURENT_EXTRACTIONS = 5;
|
||||||
|
private static final double MAX_DOC_SIZE_MB = 0.03;
|
||||||
|
private static final int NUMBER_OF_CONCURRENT_THREADS = 11;
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void setUp() throws Exception
|
||||||
|
{
|
||||||
|
super.setUp();
|
||||||
|
extracter = new SlowPdfBoxMetadataExtracter();
|
||||||
|
extracter.setDictionaryService(dictionaryService);
|
||||||
|
|
||||||
|
MetadataExtracterLimits pdfLimit = new MetadataExtracterLimits();
|
||||||
|
pdfLimit.setMaxConcurrentExtractionsCount(MAX_CONCURENT_EXTRACTIONS);
|
||||||
|
pdfLimit.setMaxDocumentSizeMB(MAX_DOC_SIZE_MB);
|
||||||
|
Map<String,MetadataExtracterLimits> limits = new HashMap<>();
|
||||||
|
limits.put(MimetypeMap.MIMETYPE_PDF,pdfLimit);
|
||||||
|
|
||||||
|
extracter.setMimetypeLimits(limits);
|
||||||
|
extracter.setDelay(30*NUMBER_OF_CONCURRENT_THREADS);
|
||||||
|
extracter.register();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return Returns the same transformer regardless - it is allowed
|
||||||
|
*/
|
||||||
|
protected MetadataExtracter getExtracter()
|
||||||
|
{
|
||||||
|
return extracter;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
protected void testFileSpecificMetadata(String mimetype, Map<QName, Serializable> properties)
|
||||||
|
{
|
||||||
|
// not required
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public void testConcurrentExtractions() throws InterruptedException
|
||||||
|
{
|
||||||
|
final Map<String, Boolean> threadResults = new ConcurrentHashMap<>();
|
||||||
|
for (int i = 0; i < NUMBER_OF_CONCURRENT_THREADS; i++)
|
||||||
|
{
|
||||||
|
new Thread(new Runnable()
|
||||||
|
{
|
||||||
|
@Override
|
||||||
|
public void run()
|
||||||
|
{
|
||||||
|
System.out.println(Thread.currentThread().getName() + " started " + System.currentTimeMillis());
|
||||||
|
try
|
||||||
|
{
|
||||||
|
Map<QName, Serializable> results = extractFromMimetype(MimetypeMap.MIMETYPE_PDF);
|
||||||
|
System.out.println(Thread.currentThread().getName() + " results are " + results);
|
||||||
|
threadResults.put(Thread.currentThread().getName(), !results.isEmpty());
|
||||||
|
}
|
||||||
|
catch (Exception e)
|
||||||
|
{
|
||||||
|
e.printStackTrace();
|
||||||
|
}
|
||||||
|
System.out.println(Thread.currentThread().getName() + " finished " + System.currentTimeMillis());
|
||||||
|
}
|
||||||
|
|
||||||
|
}).start();
|
||||||
|
}
|
||||||
|
int numWaits = NUMBER_OF_CONCURRENT_THREADS*10;
|
||||||
|
while (numWaits > 0)
|
||||||
|
{
|
||||||
|
Thread.sleep(50);
|
||||||
|
if (threadResults.size() == NUMBER_OF_CONCURRENT_THREADS)
|
||||||
|
{
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
numWaits--;
|
||||||
|
}
|
||||||
|
Map<Boolean, Integer> counted = new HashMap<>();
|
||||||
|
counted.put(Boolean.FALSE, 0);
|
||||||
|
counted.put(Boolean.TRUE, 0);
|
||||||
|
for (Boolean result : threadResults.values())
|
||||||
|
{
|
||||||
|
counted.put(result, counted.get(result)+1);
|
||||||
|
}
|
||||||
|
assertEquals("Wrong number of failed extractions.",
|
||||||
|
new Integer(NUMBER_OF_CONCURRENT_THREADS - MAX_CONCURENT_EXTRACTIONS),
|
||||||
|
counted.get(Boolean.FALSE));
|
||||||
|
assertEquals("Wrong number of successful extractions.",
|
||||||
|
new Integer(MAX_CONCURENT_EXTRACTIONS),
|
||||||
|
counted.get(Boolean.TRUE));
|
||||||
|
}
|
||||||
|
|
||||||
|
private class SlowPdfBoxMetadataExtracter extends PdfBoxMetadataExtracter
|
||||||
|
{
|
||||||
|
private long delay = 0;
|
||||||
|
|
||||||
|
public void setDelay(long delay)
|
||||||
|
{
|
||||||
|
this.delay = delay;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected Map<String, Serializable> extractRaw(ContentReader reader) throws Throwable
|
||||||
|
{
|
||||||
|
Thread.sleep(delay);
|
||||||
|
Map<String, Serializable> results = super.extractRaw(reader);
|
||||||
|
System.out.println(Thread.currentThread().getName() + " results are " + results);
|
||||||
|
return results;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
@@ -31,7 +31,6 @@ import java.io.Serializable;
|
|||||||
import java.util.Calendar;
|
import java.util.Calendar;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.concurrent.ConcurrentHashMap;
|
|
||||||
|
|
||||||
import org.alfresco.model.ContentModel;
|
import org.alfresco.model.ContentModel;
|
||||||
import org.alfresco.repo.content.MimetypeMap;
|
import org.alfresco.repo.content.MimetypeMap;
|
||||||
@@ -125,60 +124,6 @@ public class PdfBoxMetadataExtracterTest extends AbstractMetadataExtracterTest
|
|||||||
//assertEquals(0, c.get(Calendar.MILLISECOND));
|
//assertEquals(0, c.get(Calendar.MILLISECOND));
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testConcurrentExtractions() throws InterruptedException
|
|
||||||
{
|
|
||||||
final int threadNum = 11;
|
|
||||||
final Map<String, Boolean> threadResults = new ConcurrentHashMap<>();
|
|
||||||
for (int i = 0; i < threadNum; i++)
|
|
||||||
{
|
|
||||||
new Thread(new Runnable()
|
|
||||||
{
|
|
||||||
@Override
|
|
||||||
public void run()
|
|
||||||
{
|
|
||||||
try
|
|
||||||
{
|
|
||||||
Map<QName, Serializable> results = extractFromMimetype(MimetypeMap.MIMETYPE_PDF);
|
|
||||||
if(!results.isEmpty())
|
|
||||||
{
|
|
||||||
// delay successful transformations to help all threads to start in time
|
|
||||||
Thread.sleep(5*threadNum);
|
|
||||||
}
|
|
||||||
threadResults.put(Thread.currentThread().getName(), !results.isEmpty());
|
|
||||||
}
|
|
||||||
catch (Exception e)
|
|
||||||
{
|
|
||||||
e.printStackTrace();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
}).start();
|
|
||||||
}
|
|
||||||
int numWaits = 100;
|
|
||||||
while (numWaits > 0)
|
|
||||||
{
|
|
||||||
Thread.sleep(50);
|
|
||||||
if (threadResults.size() == threadNum)
|
|
||||||
{
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
numWaits--;
|
|
||||||
}
|
|
||||||
Map<Boolean, Integer> counted = new HashMap<>();
|
|
||||||
counted.put(Boolean.FALSE, 0);
|
|
||||||
counted.put(Boolean.TRUE, 0);
|
|
||||||
for (Boolean result : threadResults.values())
|
|
||||||
{
|
|
||||||
counted.put(result, counted.get(result)+1);
|
|
||||||
}
|
|
||||||
assertEquals("Wrong number of failed extractions.",
|
|
||||||
new Integer(threadNum - MAX_CONCURENT_EXTRACTIONS),
|
|
||||||
counted.get(Boolean.FALSE));
|
|
||||||
assertEquals("Wrong number of successful extractions.",
|
|
||||||
new Integer(MAX_CONCURENT_EXTRACTIONS),
|
|
||||||
counted.get(Boolean.TRUE));
|
|
||||||
}
|
|
||||||
|
|
||||||
public void testMaxDocumentSizeLimit() throws Exception
|
public void testMaxDocumentSizeLimit() throws Exception
|
||||||
{
|
{
|
||||||
File sourceFile = AbstractContentTransformerTest.loadNamedQuickTestFile("quick-size-limit.pdf");
|
File sourceFile = AbstractContentTransformerTest.loadNamedQuickTestFile("quick-size-limit.pdf");
|
||||||
|
@@ -0,0 +1,11 @@
|
|||||||
|
#
|
||||||
|
# Copied form PdfBoxMetadataExtracter
|
||||||
|
#
|
||||||
|
# Namespaces
|
||||||
|
namespace.prefix.cm=http://www.alfresco.org/model/content/1.0
|
||||||
|
|
||||||
|
# Mappings
|
||||||
|
author=cm:author
|
||||||
|
title=cm:title
|
||||||
|
subject=cm:description
|
||||||
|
created=cm:created
|
Reference in New Issue
Block a user