diff --git a/source/test-java/org/alfresco/repo/content/metadata/PdfBoxMetadataExtracterTest.java b/source/test-java/org/alfresco/repo/content/metadata/PdfBoxMetadataExtracterTest.java index 7a0e1682e7..133bc3b7a8 100644 --- a/source/test-java/org/alfresco/repo/content/metadata/PdfBoxMetadataExtracterTest.java +++ b/source/test-java/org/alfresco/repo/content/metadata/PdfBoxMetadataExtracterTest.java @@ -23,41 +23,44 @@ * along with Alfresco. If not, see . * #L% */ -package org.alfresco.repo.content.metadata; - +package org.alfresco.repo.content.metadata; + import java.io.File; import java.io.FileNotFoundException; -import java.io.Serializable; -import java.util.Calendar; +import java.io.Serializable; +import java.util.Calendar; import java.util.HashMap; -import java.util.Map; +import java.util.Map; +import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.CountDownLatch; import java.util.concurrent.TimeUnit; +import java.util.function.Function; +import java.util.stream.Collectors; -import org.alfresco.model.ContentModel; -import org.alfresco.repo.content.MimetypeMap; +import org.alfresco.model.ContentModel; +import org.alfresco.repo.content.MimetypeMap; import org.alfresco.repo.content.transform.AbstractContentTransformerTest; -import org.alfresco.service.cmr.repository.datatype.DefaultTypeConverter; -import org.alfresco.service.namespace.QName; -import org.apache.pdfbox.util.DateConverter; - -/** - * @see org.alfresco.repo.content.metadata.PdfBoxMetadataExtracter - * - * @author Jesper Steen Møller - */ -public class PdfBoxMetadataExtracterTest extends AbstractMetadataExtracterTest -{ +import org.alfresco.service.cmr.repository.datatype.DefaultTypeConverter; +import org.alfresco.service.namespace.QName; +import org.apache.pdfbox.util.DateConverter; + +/** + * @see org.alfresco.repo.content.metadata.PdfBoxMetadataExtracter + * + * @author Jesper Steen Møller + */ +public class PdfBoxMetadataExtracterTest extends AbstractMetadataExtracterTest +{ private PdfBoxMetadataExtracter extracter; private static final int MAX_CONCURENT_EXTRACTIONS = 5; - private static final double MAX_DOC_SIZE_MB = 0.03; - - @Override - public void setUp() throws Exception - { - super.setUp(); - extracter = new PdfBoxMetadataExtracter(); + private static final double MAX_DOC_SIZE_MB = 0.03; + + @Override + public void setUp() throws Exception + { + super.setUp(); + extracter = new PdfBoxMetadataExtracter(); extracter.setDictionaryService(dictionaryService); MetadataExtracterLimits pdfLimit = new MetadataExtracterLimits(); @@ -66,96 +69,113 @@ public class PdfBoxMetadataExtracterTest extends AbstractMetadataExtracterTest Map limits = new HashMap<>(); limits.put(MimetypeMap.MIMETYPE_PDF,pdfLimit); - extracter.setMimetypeLimits(limits); - extracter.register(); - } - - /** - * @return Returns the same transformer regardless - it is allowed - */ - protected MetadataExtracter getExtracter() - { - return extracter; - } - - public void testSupports() throws Exception - { - for (String mimetype : PdfBoxMetadataExtracter.SUPPORTED_MIMETYPES) - { - boolean supports = extracter.isSupported(mimetype); - assertTrue("Mimetype should be supported: " + mimetype, supports); - } - } - - public void testPdfExtraction() throws Exception - { - testExtractFromMimetype(MimetypeMap.MIMETYPE_PDF); - } - - /** - * This test method extracts metadata from an Adobe Illustrator file (which in recent versions is a pdf file). - * @since 3.5.0 - */ - public void testAiExtraction() throws Exception - { - testExtractFromMimetype(MimetypeMap.MIMETYPE_APPLICATION_ILLUSTRATOR); - } - - /** - * We can also return a created date - */ - protected void testFileSpecificMetadata(String mimetype, - Map properties) { - assertEquals( - "Property " + ContentModel.PROP_CREATED + " not found for mimetype " + mimetype, - "2005-05-26T19:52:58.000Z", - DefaultTypeConverter.INSTANCE.convert(String.class, properties.get(ContentModel.PROP_CREATED))); - } - - /** - * Test that will show when the workaround is in place. - */ - public void testDateConversion() throws Exception { - Calendar c = DateConverter.toCalendar("D:20050526205258+01'00'"); - assertEquals(2005, c.get(Calendar.YEAR)); - assertEquals(05-1, c.get(Calendar.MONTH)); - assertEquals(26, c.get(Calendar.DAY_OF_MONTH)); - assertEquals(20, c.get(Calendar.HOUR_OF_DAY)); - assertEquals(52, c.get(Calendar.MINUTE)); - assertEquals(58, c.get(Calendar.SECOND)); - //assertEquals(0, c.get(Calendar.MILLISECOND)); + extracter.setMimetypeLimits(limits); + extracter.register(); + } + + /** + * @return Returns the same transformer regardless - it is allowed + */ + protected MetadataExtracter getExtracter() + { + return extracter; + } + + public void testSupports() throws Exception + { + for (String mimetype : PdfBoxMetadataExtracter.SUPPORTED_MIMETYPES) + { + boolean supports = extracter.isSupported(mimetype); + assertTrue("Mimetype should be supported: " + mimetype, supports); + } + } + + public void testPdfExtraction() throws Exception + { + testExtractFromMimetype(MimetypeMap.MIMETYPE_PDF); + } + + /** + * This test method extracts metadata from an Adobe Illustrator file (which in recent versions is a pdf file). + * @since 3.5.0 + */ + public void testAiExtraction() throws Exception + { + testExtractFromMimetype(MimetypeMap.MIMETYPE_APPLICATION_ILLUSTRATOR); + } + + /** + * We can also return a created date + */ + protected void testFileSpecificMetadata(String mimetype, + Map properties) { + assertEquals( + "Property " + ContentModel.PROP_CREATED + " not found for mimetype " + mimetype, + "2005-05-26T19:52:58.000Z", + DefaultTypeConverter.INSTANCE.convert(String.class, properties.get(ContentModel.PROP_CREATED))); + } + + /** + * Test that will show when the workaround is in place. + */ + public void testDateConversion() throws Exception { + Calendar c = DateConverter.toCalendar("D:20050526205258+01'00'"); + assertEquals(2005, c.get(Calendar.YEAR)); + assertEquals(05-1, c.get(Calendar.MONTH)); + assertEquals(26, c.get(Calendar.DAY_OF_MONTH)); + assertEquals(20, c.get(Calendar.HOUR_OF_DAY)); + assertEquals(52, c.get(Calendar.MINUTE)); + assertEquals(58, c.get(Calendar.SECOND)); + //assertEquals(0, c.get(Calendar.MILLISECOND)); } public void testConcurrentExtractions() throws InterruptedException { - int threadNum = 10; - final CountDownLatch extractionsCountDown = new CountDownLatch(threadNum); + int threadNum = 11; + Map threadResults = new ConcurrentHashMap<>(); for (int i = 0; i < threadNum; i++) { - Thread t = new Thread(new Runnable() + new Thread(new Runnable() { @Override public void run() { try { - Map properties = extractFromMimetype(MimetypeMap.MIMETYPE_PDF); - if (!properties.isEmpty()) - { - extractionsCountDown.countDown(); - } + threadResults.put(Thread.currentThread().getName(), + !extractFromMimetype(MimetypeMap.MIMETYPE_PDF).isEmpty()); } catch (Exception e) { e.printStackTrace(); } } - }); - t.start(); + + }).start(); } - extractionsCountDown.await(1000, TimeUnit.MILLISECONDS); - long rejectedExtractions = extractionsCountDown.getCount(); - assertTrue("Wrong number of rejected extractions", rejectedExtractions == (threadNum - MAX_CONCURENT_EXTRACTIONS)); + int numWaits = 100; + while (numWaits > 0) + { + Thread.sleep(50); + if (threadResults.size() == threadNum) + { + break; + } + numWaits--; + } + Map counted = new HashMap<>(); + counted.put(Boolean.FALSE, 0); + counted.put(Boolean.TRUE, 0); + for (Boolean result : threadResults.values()) + { + counted.put(result, counted.get(result)+1); + } + assertEquals("Wrong number of failed extractions.", + new Integer(threadNum - MAX_CONCURENT_EXTRACTIONS), + counted.get(Boolean.FALSE)); + assertEquals("Wrong number of successful extractions.", + new Integer(MAX_CONCURENT_EXTRACTIONS), + counted.get(Boolean.TRUE)); } public void testMaxDocumentSizeLimit() throws Exception @@ -168,5 +188,5 @@ public class PdfBoxMetadataExtracterTest extends AbstractMetadataExtracterTest } Map properties = extractFromFile(sourceFile, MimetypeMap.MIMETYPE_PDF); assertTrue(properties.isEmpty()); - } -} + } +}