diff --git a/source/test-java/org/alfresco/repo/content/ContentMinimalContextTestSuite.java b/source/test-java/org/alfresco/repo/content/ContentMinimalContextTestSuite.java index 367bfa73a0..3ce7c2276c 100644 --- a/source/test-java/org/alfresco/repo/content/ContentMinimalContextTestSuite.java +++ b/source/test-java/org/alfresco/repo/content/ContentMinimalContextTestSuite.java @@ -1,46 +1,35 @@ -/* - * #%L - * Alfresco Repository - * %% - * Copyright (C) 2005 - 2016 Alfresco Software Limited - * %% - * This file is part of the Alfresco software. - * If the software was purchased under a paid Alfresco license, the terms of - * the paid license agreement will prevail. Otherwise, the software is - * provided under the following open source license terms: - * - * Alfresco is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Alfresco is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with Alfresco. If not, see . - * #L% - */ +/* + * #%L + * Alfresco Repository + * %% + * Copyright (C) 2005 - 2016 Alfresco Software Limited + * %% + * This file is part of the Alfresco software. + * If the software was purchased under a paid Alfresco license, the terms of + * the paid license agreement will prevail. Otherwise, the software is + * provided under the following open source license terms: + * + * Alfresco is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Alfresco is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with Alfresco. If not, see . + * #L% + */ package org.alfresco.repo.content; import junit.framework.JUnit4TestAdapter; import junit.framework.Test; import junit.framework.TestSuite; -import org.alfresco.repo.content.metadata.DWGMetadataExtracterTest; -import org.alfresco.repo.content.metadata.HtmlMetadataExtracterTest; -import org.alfresco.repo.content.metadata.MP3MetadataExtracterTest; -import org.alfresco.repo.content.metadata.MailMetadataExtracterTest; -import org.alfresco.repo.content.metadata.MetadataExtracterLimitsTest; -import org.alfresco.repo.content.metadata.OfficeMetadataExtracterTest; -import org.alfresco.repo.content.metadata.OpenDocumentMetadataExtracterTest; -import org.alfresco.repo.content.metadata.OpenOfficeMetadataExtracterTest; -import org.alfresco.repo.content.metadata.PdfBoxMetadataExtracterTest; -import org.alfresco.repo.content.metadata.PoiMetadataExtracterTest; -import org.alfresco.repo.content.metadata.RFC822MetadataExtracterTest; -import org.alfresco.repo.content.metadata.TikaAutoMetadataExtracterTest; +import org.alfresco.repo.content.metadata.*; import org.alfresco.repo.content.transform.AbstractContentTransformerLimitsTest; import org.alfresco.repo.content.transform.AppleIWorksContentTransformerTest; import org.alfresco.repo.content.transform.BinaryPassThroughContentTransformerTest; @@ -121,6 +110,7 @@ public class ContentMinimalContextTestSuite extends TestSuite suite.addTestSuite( OpenDocumentMetadataExtracterTest.class ); suite.addTestSuite( OpenOfficeMetadataExtracterTest.class ); suite.addTestSuite( PdfBoxMetadataExtracterTest.class ); + suite.addTestSuite( ConcurrencyPdfBoxMetadataExtracterTest.class ); suite.addTestSuite( PoiMetadataExtracterTest.class ); suite.addTestSuite( RFC822MetadataExtracterTest.class ); suite.addTestSuite( TikaAutoMetadataExtracterTest.class ); diff --git a/source/test-java/org/alfresco/repo/content/metadata/ConcurrencyPdfBoxMetadataExtracterTest.java b/source/test-java/org/alfresco/repo/content/metadata/ConcurrencyPdfBoxMetadataExtracterTest.java new file mode 100644 index 0000000000..775dd02665 --- /dev/null +++ b/source/test-java/org/alfresco/repo/content/metadata/ConcurrencyPdfBoxMetadataExtracterTest.java @@ -0,0 +1,153 @@ +/* + * #%L + * Alfresco Repository + * %% + * Copyright (C) 2005 - 2016 Alfresco Software Limited + * %% + * This file is part of the Alfresco software. + * If the software was purchased under a paid Alfresco license, the terms of + * the paid license agreement will prevail. Otherwise, the software is + * provided under the following open source license terms: + * + * Alfresco is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Alfresco is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with Alfresco. If not, see . + * #L% + */ +package org.alfresco.repo.content.metadata; + +import org.alfresco.repo.content.MimetypeMap; +import org.alfresco.service.cmr.repository.ContentReader; +import org.alfresco.service.namespace.QName; + +import java.io.Serializable; +import java.util.HashMap; +import java.util.Map; +import java.util.concurrent.ConcurrentHashMap; + +/** + * The test designed for testing the concurrent limitations in + * {@link AbstractMappingMetadataExtracter#extractRaw(ContentReader, MetadataExtracterLimits)} + * + * @author amukha + */ +public class ConcurrencyPdfBoxMetadataExtracterTest extends AbstractMetadataExtracterTest +{ + private SlowPdfBoxMetadataExtracter extracter; + + private static final int MAX_CONCURENT_EXTRACTIONS = 5; + private static final double MAX_DOC_SIZE_MB = 0.03; + private static final int NUMBER_OF_CONCURRENT_THREADS = 11; + + @Override + public void setUp() throws Exception + { + super.setUp(); + extracter = new SlowPdfBoxMetadataExtracter(); + extracter.setDictionaryService(dictionaryService); + + MetadataExtracterLimits pdfLimit = new MetadataExtracterLimits(); + pdfLimit.setMaxConcurrentExtractionsCount(MAX_CONCURENT_EXTRACTIONS); + pdfLimit.setMaxDocumentSizeMB(MAX_DOC_SIZE_MB); + Map limits = new HashMap<>(); + limits.put(MimetypeMap.MIMETYPE_PDF,pdfLimit); + + extracter.setMimetypeLimits(limits); + extracter.setDelay(30*NUMBER_OF_CONCURRENT_THREADS); + extracter.register(); + } + + /** + * @return Returns the same transformer regardless - it is allowed + */ + protected MetadataExtracter getExtracter() + { + return extracter; + } + + + protected void testFileSpecificMetadata(String mimetype, Map properties) + { + // not required + } + + + public void testConcurrentExtractions() throws InterruptedException + { + final Map threadResults = new ConcurrentHashMap<>(); + for (int i = 0; i < NUMBER_OF_CONCURRENT_THREADS; i++) + { + new Thread(new Runnable() + { + @Override + public void run() + { + System.out.println(Thread.currentThread().getName() + " started " + System.currentTimeMillis()); + try + { + Map results = extractFromMimetype(MimetypeMap.MIMETYPE_PDF); + System.out.println(Thread.currentThread().getName() + " results are " + results); + threadResults.put(Thread.currentThread().getName(), !results.isEmpty()); + } + catch (Exception e) + { + e.printStackTrace(); + } + System.out.println(Thread.currentThread().getName() + " finished " + System.currentTimeMillis()); + } + + }).start(); + } + int numWaits = NUMBER_OF_CONCURRENT_THREADS*10; + while (numWaits > 0) + { + Thread.sleep(50); + if (threadResults.size() == NUMBER_OF_CONCURRENT_THREADS) + { + break; + } + numWaits--; + } + Map counted = new HashMap<>(); + counted.put(Boolean.FALSE, 0); + counted.put(Boolean.TRUE, 0); + for (Boolean result : threadResults.values()) + { + counted.put(result, counted.get(result)+1); + } + assertEquals("Wrong number of failed extractions.", + new Integer(NUMBER_OF_CONCURRENT_THREADS - MAX_CONCURENT_EXTRACTIONS), + counted.get(Boolean.FALSE)); + assertEquals("Wrong number of successful extractions.", + new Integer(MAX_CONCURENT_EXTRACTIONS), + counted.get(Boolean.TRUE)); + } + + private class SlowPdfBoxMetadataExtracter extends PdfBoxMetadataExtracter + { + private long delay = 0; + + public void setDelay(long delay) + { + this.delay = delay; + } + + @Override + protected Map extractRaw(ContentReader reader) throws Throwable + { + Thread.sleep(delay); + Map results = super.extractRaw(reader); + System.out.println(Thread.currentThread().getName() + " results are " + results); + return results; + } + } +} diff --git a/source/test-java/org/alfresco/repo/content/metadata/PdfBoxMetadataExtracterTest.java b/source/test-java/org/alfresco/repo/content/metadata/PdfBoxMetadataExtracterTest.java index dd7d405b91..1679217862 100644 --- a/source/test-java/org/alfresco/repo/content/metadata/PdfBoxMetadataExtracterTest.java +++ b/source/test-java/org/alfresco/repo/content/metadata/PdfBoxMetadataExtracterTest.java @@ -31,7 +31,6 @@ import java.io.Serializable; import java.util.Calendar; import java.util.HashMap; import java.util.Map; -import java.util.concurrent.ConcurrentHashMap; import org.alfresco.model.ContentModel; import org.alfresco.repo.content.MimetypeMap; @@ -124,60 +123,6 @@ public class PdfBoxMetadataExtracterTest extends AbstractMetadataExtracterTest assertEquals(58, c.get(Calendar.SECOND)); //assertEquals(0, c.get(Calendar.MILLISECOND)); } - - public void testConcurrentExtractions() throws InterruptedException - { - final int threadNum = 11; - final Map threadResults = new ConcurrentHashMap<>(); - for (int i = 0; i < threadNum; i++) - { - new Thread(new Runnable() - { - @Override - public void run() - { - try - { - Map results = extractFromMimetype(MimetypeMap.MIMETYPE_PDF); - if(!results.isEmpty()) - { - // delay successful transformations to help all threads to start in time - Thread.sleep(5*threadNum); - } - threadResults.put(Thread.currentThread().getName(), !results.isEmpty()); - } - catch (Exception e) - { - e.printStackTrace(); - } - } - - }).start(); - } - int numWaits = 100; - while (numWaits > 0) - { - Thread.sleep(50); - if (threadResults.size() == threadNum) - { - break; - } - numWaits--; - } - Map counted = new HashMap<>(); - counted.put(Boolean.FALSE, 0); - counted.put(Boolean.TRUE, 0); - for (Boolean result : threadResults.values()) - { - counted.put(result, counted.get(result)+1); - } - assertEquals("Wrong number of failed extractions.", - new Integer(threadNum - MAX_CONCURENT_EXTRACTIONS), - counted.get(Boolean.FALSE)); - assertEquals("Wrong number of successful extractions.", - new Integer(MAX_CONCURENT_EXTRACTIONS), - counted.get(Boolean.TRUE)); - } public void testMaxDocumentSizeLimit() throws Exception { diff --git a/source/test-resources/alfresco/metadata/ConcurrencyPdfBoxMetadataExtracterTest-SlowPdfBoxMetadataExtracter.properties b/source/test-resources/alfresco/metadata/ConcurrencyPdfBoxMetadataExtracterTest-SlowPdfBoxMetadataExtracter.properties new file mode 100644 index 0000000000..156b5d63a7 --- /dev/null +++ b/source/test-resources/alfresco/metadata/ConcurrencyPdfBoxMetadataExtracterTest-SlowPdfBoxMetadataExtracter.properties @@ -0,0 +1,11 @@ +# +# Copied form PdfBoxMetadataExtracter +# +# Namespaces +namespace.prefix.cm=http://www.alfresco.org/model/content/1.0 + +# Mappings +author=cm:author +title=cm:title +subject=cm:description +created=cm:created \ No newline at end of file