mirror of
				https://github.com/Alfresco/alfresco-community-repo.git
				synced 2025-10-22 15:12:38 +00:00 
			
		
		
		
	REPO-1525: PdfBoxMetadataExtracterTest failures on all DBs (including main PostgreSQL build)
- Moved the concurrent test to a separate class - ConcurrencyPdfBoxMetadataExtracterTest - It is now utilizing an overridden extractor with a configurable timeout. git-svn-id: https://svn.alfresco.com/repos/alfresco-enterprise/alfresco/BRANCHES/DEV/5.2.N/root@132690 c4b6b30b-aa2e-2d43-bbcb-ca4b014f7261
This commit is contained in:
		| @@ -1,46 +1,35 @@ | ||||
| /* | ||||
|  * #%L | ||||
|  * Alfresco Repository | ||||
|  * %% | ||||
|  * Copyright (C) 2005 - 2016 Alfresco Software Limited | ||||
|  * %% | ||||
|  * This file is part of the Alfresco software.  | ||||
|  * If the software was purchased under a paid Alfresco license, the terms of  | ||||
|  * the paid license agreement will prevail.  Otherwise, the software is  | ||||
|  * provided under the following open source license terms: | ||||
|  *  | ||||
|  * Alfresco is free software: you can redistribute it and/or modify | ||||
|  * it under the terms of the GNU Lesser General Public License as published by | ||||
|  * the Free Software Foundation, either version 3 of the License, or | ||||
|  * (at your option) any later version. | ||||
|  *  | ||||
|  * Alfresco is distributed in the hope that it will be useful, | ||||
|  * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
|  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||
|  * GNU Lesser General Public License for more details. | ||||
|  *  | ||||
|  * You should have received a copy of the GNU Lesser General Public License | ||||
|  * along with Alfresco. If not, see <http://www.gnu.org/licenses/>. | ||||
|  * #L% | ||||
|  */ | ||||
| /* | ||||
|  * #%L | ||||
|  * Alfresco Repository | ||||
|  * %% | ||||
|  * Copyright (C) 2005 - 2016 Alfresco Software Limited | ||||
|  * %% | ||||
|  * This file is part of the Alfresco software.  | ||||
|  * If the software was purchased under a paid Alfresco license, the terms of  | ||||
|  * the paid license agreement will prevail.  Otherwise, the software is  | ||||
|  * provided under the following open source license terms: | ||||
|  *  | ||||
|  * Alfresco is free software: you can redistribute it and/or modify | ||||
|  * it under the terms of the GNU Lesser General Public License as published by | ||||
|  * the Free Software Foundation, either version 3 of the License, or | ||||
|  * (at your option) any later version. | ||||
|  *  | ||||
|  * Alfresco is distributed in the hope that it will be useful, | ||||
|  * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
|  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||
|  * GNU Lesser General Public License for more details. | ||||
|  *  | ||||
|  * You should have received a copy of the GNU Lesser General Public License | ||||
|  * along with Alfresco. If not, see <http://www.gnu.org/licenses/>. | ||||
|  * #L% | ||||
|  */ | ||||
| package org.alfresco.repo.content; | ||||
|  | ||||
| import junit.framework.JUnit4TestAdapter; | ||||
| import junit.framework.Test; | ||||
| import junit.framework.TestSuite; | ||||
|  | ||||
| import org.alfresco.repo.content.metadata.DWGMetadataExtracterTest; | ||||
| import org.alfresco.repo.content.metadata.HtmlMetadataExtracterTest; | ||||
| import org.alfresco.repo.content.metadata.MP3MetadataExtracterTest; | ||||
| import org.alfresco.repo.content.metadata.MailMetadataExtracterTest; | ||||
| import org.alfresco.repo.content.metadata.MetadataExtracterLimitsTest; | ||||
| import org.alfresco.repo.content.metadata.OfficeMetadataExtracterTest; | ||||
| import org.alfresco.repo.content.metadata.OpenDocumentMetadataExtracterTest; | ||||
| import org.alfresco.repo.content.metadata.OpenOfficeMetadataExtracterTest; | ||||
| import org.alfresco.repo.content.metadata.PdfBoxMetadataExtracterTest; | ||||
| import org.alfresco.repo.content.metadata.PoiMetadataExtracterTest; | ||||
| import org.alfresco.repo.content.metadata.RFC822MetadataExtracterTest; | ||||
| import org.alfresco.repo.content.metadata.TikaAutoMetadataExtracterTest; | ||||
| import org.alfresco.repo.content.metadata.*; | ||||
| import org.alfresco.repo.content.transform.AbstractContentTransformerLimitsTest; | ||||
| import org.alfresco.repo.content.transform.AppleIWorksContentTransformerTest; | ||||
| import org.alfresco.repo.content.transform.BinaryPassThroughContentTransformerTest; | ||||
| @@ -121,6 +110,7 @@ public class ContentMinimalContextTestSuite extends TestSuite | ||||
|        suite.addTestSuite( OpenDocumentMetadataExtracterTest.class ); | ||||
|        suite.addTestSuite( OpenOfficeMetadataExtracterTest.class ); | ||||
|        suite.addTestSuite( PdfBoxMetadataExtracterTest.class ); | ||||
|        suite.addTestSuite( ConcurrencyPdfBoxMetadataExtracterTest.class ); | ||||
|        suite.addTestSuite( PoiMetadataExtracterTest.class ); | ||||
|        suite.addTestSuite( RFC822MetadataExtracterTest.class ); | ||||
|        suite.addTestSuite( TikaAutoMetadataExtracterTest.class ); | ||||
|   | ||||
| @@ -0,0 +1,153 @@ | ||||
| /* | ||||
|  * #%L | ||||
|  * Alfresco Repository | ||||
|  * %% | ||||
|  * Copyright (C) 2005 - 2016 Alfresco Software Limited | ||||
|  * %% | ||||
|  * This file is part of the Alfresco software. | ||||
|  * If the software was purchased under a paid Alfresco license, the terms of | ||||
|  * the paid license agreement will prevail.  Otherwise, the software is | ||||
|  * provided under the following open source license terms: | ||||
|  * | ||||
|  * Alfresco is free software: you can redistribute it and/or modify | ||||
|  * it under the terms of the GNU Lesser General Public License as published by | ||||
|  * the Free Software Foundation, either version 3 of the License, or | ||||
|  * (at your option) any later version. | ||||
|  * | ||||
|  * Alfresco is distributed in the hope that it will be useful, | ||||
|  * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
|  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||
|  * GNU Lesser General Public License for more details. | ||||
|  * | ||||
|  * You should have received a copy of the GNU Lesser General Public License | ||||
|  * along with Alfresco. If not, see <http://www.gnu.org/licenses/>. | ||||
|  * #L% | ||||
|  */ | ||||
| package org.alfresco.repo.content.metadata; | ||||
|  | ||||
| import org.alfresco.repo.content.MimetypeMap; | ||||
| import org.alfresco.service.cmr.repository.ContentReader; | ||||
| import org.alfresco.service.namespace.QName; | ||||
|  | ||||
| import java.io.Serializable; | ||||
| import java.util.HashMap; | ||||
| import java.util.Map; | ||||
| import java.util.concurrent.ConcurrentHashMap; | ||||
|  | ||||
| /** | ||||
|  * The test designed for testing the concurrent limitations in | ||||
|  * {@link AbstractMappingMetadataExtracter#extractRaw(ContentReader, MetadataExtracterLimits)} | ||||
|  * | ||||
|  * @author amukha | ||||
|  */ | ||||
| public class ConcurrencyPdfBoxMetadataExtracterTest extends AbstractMetadataExtracterTest | ||||
| { | ||||
|     private SlowPdfBoxMetadataExtracter extracter; | ||||
|  | ||||
|     private static final int MAX_CONCURENT_EXTRACTIONS = 5; | ||||
|     private static final double MAX_DOC_SIZE_MB = 0.03; | ||||
|     private static final int NUMBER_OF_CONCURRENT_THREADS = 11; | ||||
|  | ||||
|     @Override | ||||
|     public void setUp() throws Exception | ||||
|     { | ||||
|         super.setUp(); | ||||
|         extracter = new SlowPdfBoxMetadataExtracter(); | ||||
|         extracter.setDictionaryService(dictionaryService); | ||||
|  | ||||
|         MetadataExtracterLimits pdfLimit = new MetadataExtracterLimits(); | ||||
|         pdfLimit.setMaxConcurrentExtractionsCount(MAX_CONCURENT_EXTRACTIONS); | ||||
|         pdfLimit.setMaxDocumentSizeMB(MAX_DOC_SIZE_MB); | ||||
|         Map<String,MetadataExtracterLimits> limits = new HashMap<>(); | ||||
|         limits.put(MimetypeMap.MIMETYPE_PDF,pdfLimit); | ||||
|  | ||||
|         extracter.setMimetypeLimits(limits); | ||||
|         extracter.setDelay(30*NUMBER_OF_CONCURRENT_THREADS); | ||||
|         extracter.register(); | ||||
|     } | ||||
|  | ||||
|     /** | ||||
|      * @return Returns the same transformer regardless - it is allowed | ||||
|      */ | ||||
|     protected MetadataExtracter getExtracter() | ||||
|     { | ||||
|         return extracter; | ||||
|     } | ||||
|  | ||||
|  | ||||
|     protected void testFileSpecificMetadata(String mimetype, Map<QName, Serializable> properties) | ||||
|     { | ||||
|         // not required | ||||
|     } | ||||
|  | ||||
|  | ||||
|     public void testConcurrentExtractions() throws InterruptedException | ||||
|     { | ||||
|         final Map<String, Boolean> threadResults = new ConcurrentHashMap<>(); | ||||
|         for (int i = 0; i < NUMBER_OF_CONCURRENT_THREADS; i++) | ||||
|         { | ||||
|             new Thread(new Runnable() | ||||
|             { | ||||
|                 @Override | ||||
|                 public void run() | ||||
|                 { | ||||
|                     System.out.println(Thread.currentThread().getName() + " started " + System.currentTimeMillis()); | ||||
|                     try | ||||
|                     { | ||||
|                         Map<QName, Serializable> results = extractFromMimetype(MimetypeMap.MIMETYPE_PDF); | ||||
|                         System.out.println(Thread.currentThread().getName() + " results are " + results); | ||||
|                         threadResults.put(Thread.currentThread().getName(), !results.isEmpty()); | ||||
|                     } | ||||
|                     catch (Exception e) | ||||
|                     { | ||||
|                         e.printStackTrace(); | ||||
|                     } | ||||
|                     System.out.println(Thread.currentThread().getName() + " finished " + System.currentTimeMillis()); | ||||
|                 } | ||||
|  | ||||
|             }).start(); | ||||
|         } | ||||
|         int numWaits = NUMBER_OF_CONCURRENT_THREADS*10; | ||||
|         while (numWaits > 0) | ||||
|         { | ||||
|             Thread.sleep(50); | ||||
|             if (threadResults.size() == NUMBER_OF_CONCURRENT_THREADS) | ||||
|             { | ||||
|                 break; | ||||
|             } | ||||
|             numWaits--; | ||||
|         } | ||||
|         Map<Boolean, Integer> counted = new HashMap<>(); | ||||
|         counted.put(Boolean.FALSE, 0); | ||||
|         counted.put(Boolean.TRUE, 0); | ||||
|         for (Boolean result : threadResults.values()) | ||||
|         { | ||||
|             counted.put(result, counted.get(result)+1); | ||||
|         } | ||||
|         assertEquals("Wrong number of failed extractions.", | ||||
|                 new Integer(NUMBER_OF_CONCURRENT_THREADS - MAX_CONCURENT_EXTRACTIONS), | ||||
|                 counted.get(Boolean.FALSE)); | ||||
|         assertEquals("Wrong number of successful extractions.", | ||||
|                 new Integer(MAX_CONCURENT_EXTRACTIONS), | ||||
|                 counted.get(Boolean.TRUE)); | ||||
|     } | ||||
|  | ||||
|     private class SlowPdfBoxMetadataExtracter extends PdfBoxMetadataExtracter | ||||
|     { | ||||
|         private long delay = 0; | ||||
|  | ||||
|         public void setDelay(long delay) | ||||
|         { | ||||
|             this.delay = delay; | ||||
|         } | ||||
|  | ||||
|         @Override | ||||
|         protected Map<String, Serializable> extractRaw(ContentReader reader) throws Throwable | ||||
|         { | ||||
|             Thread.sleep(delay); | ||||
|             Map<String, Serializable> results = super.extractRaw(reader); | ||||
|             System.out.println(Thread.currentThread().getName() + " results are " + results); | ||||
|             return results; | ||||
|         } | ||||
|     } | ||||
| } | ||||
| @@ -31,7 +31,6 @@ import java.io.Serializable; | ||||
| import java.util.Calendar; | ||||
| import java.util.HashMap; | ||||
| import java.util.Map; | ||||
| import java.util.concurrent.ConcurrentHashMap; | ||||
|  | ||||
| import org.alfresco.model.ContentModel; | ||||
| import org.alfresco.repo.content.MimetypeMap; | ||||
| @@ -124,60 +123,6 @@ public class PdfBoxMetadataExtracterTest extends AbstractMetadataExtracterTest | ||||
|        assertEquals(58, c.get(Calendar.SECOND)); | ||||
|        //assertEquals(0, c.get(Calendar.MILLISECOND)); | ||||
|     } | ||||
|      | ||||
|     public void testConcurrentExtractions() throws InterruptedException | ||||
|     { | ||||
|         final int threadNum = 11; | ||||
|         final Map<String, Boolean> threadResults = new ConcurrentHashMap<>(); | ||||
|         for (int i = 0; i < threadNum; i++) | ||||
|         { | ||||
|             new Thread(new Runnable() | ||||
|             { | ||||
|                 @Override | ||||
|                 public void run() | ||||
|                 { | ||||
|                     try | ||||
|                     { | ||||
|                         Map<QName, Serializable> results = extractFromMimetype(MimetypeMap.MIMETYPE_PDF); | ||||
|                         if(!results.isEmpty()) | ||||
|                         { | ||||
|                             // delay successful transformations to help all threads to start in time | ||||
|                             Thread.sleep(5*threadNum); | ||||
|                         } | ||||
|                         threadResults.put(Thread.currentThread().getName(), !results.isEmpty()); | ||||
|                     } | ||||
|                     catch (Exception e) | ||||
|                     { | ||||
|                         e.printStackTrace(); | ||||
|                     } | ||||
|                 } | ||||
|  | ||||
|             }).start(); | ||||
|         } | ||||
|         int numWaits = 100; | ||||
|         while (numWaits > 0) | ||||
|         { | ||||
|             Thread.sleep(50); | ||||
|             if (threadResults.size() == threadNum) | ||||
|             { | ||||
|                 break; | ||||
|             } | ||||
|             numWaits--; | ||||
|         } | ||||
|         Map<Boolean, Integer> counted = new HashMap<>(); | ||||
|         counted.put(Boolean.FALSE, 0); | ||||
|         counted.put(Boolean.TRUE, 0); | ||||
|         for (Boolean result : threadResults.values()) | ||||
|         { | ||||
|             counted.put(result, counted.get(result)+1); | ||||
|         } | ||||
|         assertEquals("Wrong number of failed extractions.", | ||||
|                 new Integer(threadNum - MAX_CONCURENT_EXTRACTIONS), | ||||
|                 counted.get(Boolean.FALSE)); | ||||
|         assertEquals("Wrong number of successful extractions.", | ||||
|                 new Integer(MAX_CONCURENT_EXTRACTIONS), | ||||
|                 counted.get(Boolean.TRUE)); | ||||
|     } | ||||
|  | ||||
|     public void testMaxDocumentSizeLimit() throws Exception | ||||
|     { | ||||
|   | ||||
| @@ -0,0 +1,11 @@ | ||||
| # | ||||
| # Copied form PdfBoxMetadataExtracter | ||||
| # | ||||
| # Namespaces | ||||
| namespace.prefix.cm=http://www.alfresco.org/model/content/1.0 | ||||
|  | ||||
| # Mappings | ||||
| author=cm:author | ||||
| title=cm:title | ||||
| subject=cm:description | ||||
| created=cm:created | ||||
		Reference in New Issue
	
	Block a user