From 349183a535b9ebb690718e545effee8e397d619c Mon Sep 17 00:00:00 2001 From: Derek Hulley Date: Wed, 22 Feb 2006 13:16:56 +0000 Subject: [PATCH] Beefed up unit tests for content metadata extracters git-svn-id: https://svn.alfresco.com/repos/alfresco-enterprise/alfresco/HEAD/root@2469 c4b6b30b-aa2e-2d43-bbcb-ca4b014f7261 --- config/alfresco/content-services-context.xml | 9 +-- .../metadata/AbstractMetadataExtracter.java | 32 +++++++- .../AbstractMetadataExtracterTest.java | 80 ++++++++----------- .../metadata/HtmlMetadataExtracterTest.java | 7 +- .../metadata/OfficeMetadataExtracter.java | 8 +- .../metadata/OfficeMetadataExtracterTest.java | 47 ++++------- .../metadata/PdfBoxMetadataExtracterTest.java | 6 +- .../metadata/UnoMetadataExtracter.java | 17 ++-- .../metadata/UnoMetadataExtracterTest.java | 36 +++------ .../transform/AbstractContentTransformer.java | 14 +++- 10 files changed, 125 insertions(+), 131 deletions(-) diff --git a/config/alfresco/content-services-context.xml b/config/alfresco/content-services-context.xml index 7fd5b8840f..8e639bef00 100644 --- a/config/alfresco/content-services-context.xml +++ b/config/alfresco/content-services-context.xml @@ -102,6 +102,9 @@ + + + @@ -110,11 +113,7 @@ - - - - - + diff --git a/source/java/org/alfresco/repo/content/metadata/AbstractMetadataExtracter.java b/source/java/org/alfresco/repo/content/metadata/AbstractMetadataExtracter.java index f94cb5a529..3a8b8d4f3e 100644 --- a/source/java/org/alfresco/repo/content/metadata/AbstractMetadataExtracter.java +++ b/source/java/org/alfresco/repo/content/metadata/AbstractMetadataExtracter.java @@ -24,6 +24,7 @@ import java.util.Set; import org.alfresco.error.AlfrescoRuntimeException; import org.alfresco.service.cmr.repository.ContentIOException; import org.alfresco.service.cmr.repository.ContentReader; +import org.alfresco.service.cmr.repository.MimetypeService; import org.alfresco.service.namespace.QName; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -36,6 +37,7 @@ abstract public class AbstractMetadataExtracter implements MetadataExtracter { private static Log logger = LogFactory.getLog(AbstractMetadataExtracter.class); + private MimetypeService mimetypeService; private MetadataExtracterRegistry registry; private Set supportedMimetypes; private double reliability; @@ -64,6 +66,24 @@ abstract public class AbstractMetadataExtracter implements MetadataExtracter { this.registry = registry; } + + /** + * Helper setter of the mimetype service. This is not always required. + * + * @param mimetypeService + */ + public void setMimetypeService(MimetypeService mimetypeService) + { + this.mimetypeService = mimetypeService; + } + + /** + * @return Returns the mimetype helper + */ + protected MimetypeService getMimetypeService() + { + return mimetypeService; + } /** * Registers this instance of the extracter with the registry. @@ -74,7 +94,9 @@ abstract public class AbstractMetadataExtracter implements MetadataExtracter { if (registry == null) { - throw new IllegalArgumentException("Property 'registry' has not been set"); + logger.warn("Property 'registry' has not been set. Ignoring auto-registration: \n" + + " extracter: " + this); + return; } registry.register(this); } @@ -129,15 +151,17 @@ abstract public class AbstractMetadataExtracter implements MetadataExtracter catch (Throwable e) { throw new ContentIOException("Metadata extraction failed: \n" + - " reader: " + reader + "\n" + + " reader: " + reader, e); } finally { - // check that the reader and writer are both closed + // check that the reader was closed if (!reader.isClosed()) { - logger.error("Content reader not closed by metadata extracter: \n" + reader); + logger.error("Content reader not closed by metadata extracter: \n" + + " reader: " + reader + "\n" + + " extracter: " + this); } } diff --git a/source/java/org/alfresco/repo/content/metadata/AbstractMetadataExtracterTest.java b/source/java/org/alfresco/repo/content/metadata/AbstractMetadataExtracterTest.java index bcddc285cf..51ead94f88 100644 --- a/source/java/org/alfresco/repo/content/metadata/AbstractMetadataExtracterTest.java +++ b/source/java/org/alfresco/repo/content/metadata/AbstractMetadataExtracterTest.java @@ -18,40 +18,37 @@ package org.alfresco.repo.content.metadata; import java.io.File; import java.io.FileNotFoundException; -import java.io.IOException; import java.io.Serializable; -import java.net.URL; import java.util.HashMap; import java.util.Map; +import junit.framework.TestCase; + import org.alfresco.model.ContentModel; import org.alfresco.repo.content.MimetypeMap; import org.alfresco.repo.content.filestore.FileContentReader; +import org.alfresco.repo.content.transform.AbstractContentTransformerTest; import org.alfresco.service.cmr.repository.ContentReader; import org.alfresco.service.namespace.QName; -import org.alfresco.util.BaseSpringTest; +import org.alfresco.util.ApplicationContextHelper; import org.alfresco.util.TempFileProvider; +import org.springframework.context.ApplicationContext; /** - * Provides a base set of tests for testing - * {@link org.alfresco.repo.content.metadata.MetadataExtracter} implementations. + * @see org.alfresco.repo.content.metadata.MetadataExtracter + * @see org.alfresco.repo.content.metadata.AbstractMetadataExtracter * * @author Jesper Steen Møller */ -public abstract class AbstractMetadataExtracterTest extends BaseSpringTest +public abstract class AbstractMetadataExtracterTest extends TestCase { + private static ApplicationContext ctx = ApplicationContextHelper.getApplicationContext(); + protected static final String QUICK_TITLE = "The quick brown fox jumps over the lazy dog"; protected static final String QUICK_DESCRIPTION = "Gym class featuring a brown fox and lazy dog"; protected static final String QUICK_CREATOR = "Nevin Nollop"; - protected static final String[] QUICK_WORDS = new String[] { "quick", "brown", "fox", "jumps", "lazy", "dog" }; protected MimetypeMap mimetypeMap; - protected MetadataExtracter transformer; - - public final void setMimetypeMap(MimetypeMap mimetypeMap) - { - this.mimetypeMap = mimetypeMap; - } protected abstract MetadataExtracter getExtracter(); @@ -59,8 +56,10 @@ public abstract class AbstractMetadataExtracterTest extends BaseSpringTest * Ensures that the temp locations are cleaned out before the tests start */ @Override - protected void onSetUpInTransaction() throws Exception + public void setUp() throws Exception { + this.mimetypeMap = (MimetypeMap) ctx.getBean("mimetypeService"); + // perform a little cleaning up long now = System.currentTimeMillis(); TempFileProvider.TempFileCleanerJob.removeFiles(now); @@ -73,40 +72,26 @@ public abstract class AbstractMetadataExtracterTest extends BaseSpringTest { assertNotNull("MimetypeMap not present", mimetypeMap); // check that the quick resources are available - File sourceFile = AbstractMetadataExtracterTest.loadQuickTestFile("txt"); + File sourceFile = AbstractContentTransformerTest.loadQuickTestFile("txt"); assertNotNull("quick.* files should be available from Tests", sourceFile); } - - /** - * Helper method to load one of the "The quick brown fox" files from the - * classpath. - * - * @param extension the extension of the file required - * @return Returns a test resource loaded from the classpath or - * null if no resource could be found. - * @throws IOException - */ - public static File loadQuickTestFile(String extension) throws IOException + + protected void testExtractFromMimetype(String mimetype) throws Exception { - URL url = AbstractMetadataExtracterTest.class.getClassLoader().getResource("quick/quick." + extension); - if (url == null) - { - return null; - } - File file = new File(url.getFile()); - if (!file.exists()) - { - return null; - } - return file; + Map properties = extractFromMimetype(mimetype); + // check + testCommonMetadata(mimetype, properties); } - public Map extractFromExtension(String ext, String mimetype) throws Exception + protected Map extractFromMimetype(String mimetype) throws Exception { - Map destination = new HashMap(); + Map properties = new HashMap(); + + // get the extension for the mimetype + String ext = mimetypeMap.getExtension(mimetype); // attempt to get a source file for each mimetype - File sourceFile = AbstractMetadataExtracterTest.loadQuickTestFile(ext); + File sourceFile = AbstractContentTransformerTest.loadQuickTestFile(ext); if (sourceFile == null) { throw new FileNotFoundException("No quick." + ext + " file found for test"); @@ -115,14 +100,17 @@ public abstract class AbstractMetadataExtracterTest extends BaseSpringTest // construct a reader onto the source file ContentReader sourceReader = new FileContentReader(sourceFile); sourceReader.setMimetype(mimetype); - getExtracter().extract(sourceReader, destination); - return destination; + getExtracter().extract(sourceReader, properties); + return properties; } - public void testCommonMetadata(Map destination) + protected void testCommonMetadata(String mimetype, Map properties) { - assertEquals(QUICK_TITLE, destination.get(ContentModel.PROP_TITLE)); - assertEquals(QUICK_DESCRIPTION, destination.get(ContentModel.PROP_DESCRIPTION)); - assertEquals(QUICK_CREATOR, destination.get(ContentModel.PROP_AUTHOR)); + assertEquals( + "Property " + ContentModel.PROP_TITLE + " not found for mimetype " + mimetype, + QUICK_TITLE, properties.get(ContentModel.PROP_TITLE)); + assertEquals( + "Property " + ContentModel.PROP_DESCRIPTION + " not found for mimetype " + mimetype, + QUICK_DESCRIPTION, properties.get(ContentModel.PROP_DESCRIPTION)); } } diff --git a/source/java/org/alfresco/repo/content/metadata/HtmlMetadataExtracterTest.java b/source/java/org/alfresco/repo/content/metadata/HtmlMetadataExtracterTest.java index 39f627d3b0..a4ed6efaba 100644 --- a/source/java/org/alfresco/repo/content/metadata/HtmlMetadataExtracterTest.java +++ b/source/java/org/alfresco/repo/content/metadata/HtmlMetadataExtracterTest.java @@ -25,8 +25,10 @@ public class HtmlMetadataExtracterTest extends AbstractMetadataExtracterTest { private MetadataExtracter extracter; - public void onSetUpInTransaction() throws Exception + @Override + public void setUp() throws Exception { + super.setUp(); extracter = new HtmlMetadataExtracter(); } @@ -50,7 +52,6 @@ public class HtmlMetadataExtracterTest extends AbstractMetadataExtracterTest public void testHtmlExtraction() throws Exception { - testCommonMetadata(extractFromExtension("html", MimetypeMap.MIMETYPE_HTML)); + testExtractFromMimetype(MimetypeMap.MIMETYPE_HTML); } - } diff --git a/source/java/org/alfresco/repo/content/metadata/OfficeMetadataExtracter.java b/source/java/org/alfresco/repo/content/metadata/OfficeMetadataExtracter.java index 9f0917d50e..250f9bdfc2 100644 --- a/source/java/org/alfresco/repo/content/metadata/OfficeMetadataExtracter.java +++ b/source/java/org/alfresco/repo/content/metadata/OfficeMetadataExtracter.java @@ -42,12 +42,14 @@ import org.apache.poi.poifs.eventfilesystem.POIFSReaderListener; */ public class OfficeMetadataExtracter extends AbstractMetadataExtracter { - private static String[] mimeTypes = new String[] { MimetypeMap.MIMETYPE_WORD, MimetypeMap.MIMETYPE_EXCEL, - MimetypeMap.MIMETYPE_PPT }; + public static String[] SUPPORTED_MIMETYPES = new String[] { + MimetypeMap.MIMETYPE_WORD, + MimetypeMap.MIMETYPE_EXCEL, + MimetypeMap.MIMETYPE_PPT }; public OfficeMetadataExtracter() { - super(new HashSet(Arrays.asList(mimeTypes)), 1.0, 1000); + super(new HashSet(Arrays.asList(SUPPORTED_MIMETYPES)), 1.0, 1000); } public void extractInternal(ContentReader reader, final Map destination) throws Throwable diff --git a/source/java/org/alfresco/repo/content/metadata/OfficeMetadataExtracterTest.java b/source/java/org/alfresco/repo/content/metadata/OfficeMetadataExtracterTest.java index 37f274a7df..2630ee4ab1 100644 --- a/source/java/org/alfresco/repo/content/metadata/OfficeMetadataExtracterTest.java +++ b/source/java/org/alfresco/repo/content/metadata/OfficeMetadataExtracterTest.java @@ -1,20 +1,19 @@ package org.alfresco.repo.content.metadata; -import org.alfresco.repo.content.MimetypeMap; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; /** * @see org.alfresco.repo.content.transform.OfficeMetadataExtracter + * * @author Jesper Steen Møller */ public class OfficeMetadataExtracterTest extends AbstractMetadataExtracterTest { - private static final Log logger = LogFactory.getLog(OfficeMetadataExtracterTest.class); private MetadataExtracter extracter; - public void onSetUpInTransaction() throws Exception + @Override + public void setUp() throws Exception { + super.setUp(); extracter = new OfficeMetadataExtracter(); } @@ -28,33 +27,21 @@ public class OfficeMetadataExtracterTest extends AbstractMetadataExtracterTest public void testReliability() throws Exception { - double reliability = 0.0; - reliability = extracter.getReliability(MimetypeMap.MIMETYPE_TEXT_PLAIN); - assertEquals("Mimetype text should not be supported", 0.0, reliability); - - reliability = extracter.getReliability(MimetypeMap.MIMETYPE_WORD); - assertEquals("Word should be supported", 1.0, reliability); - - reliability = extracter.getReliability(MimetypeMap.MIMETYPE_EXCEL); - assertEquals("Excel should be supported", 1.0, reliability); - - reliability = extracter.getReliability(MimetypeMap.MIMETYPE_PPT); - assertEquals("PowerPoint should be supported", 1.0, reliability); + for (String mimetype : OfficeMetadataExtracter.SUPPORTED_MIMETYPES) + { + double reliability = extracter.getReliability(mimetype); + assertTrue("Expected above zero reliability", reliability > 0.0); + } } - public void testWordExtraction() throws Exception + /** + * Test all the supported mimetypes + */ + public void testSupportedMimetypes() throws Exception { - testCommonMetadata(extractFromExtension("doc", MimetypeMap.MIMETYPE_WORD)); + for (String mimetype : OfficeMetadataExtracter.SUPPORTED_MIMETYPES) + { + testExtractFromMimetype(mimetype); + } } - - public void testExcelExtraction() throws Exception - { - testCommonMetadata(extractFromExtension("xls", MimetypeMap.MIMETYPE_EXCEL)); - } - - public void testPowerPointExtraction() throws Exception - { - testCommonMetadata(extractFromExtension("ppt", MimetypeMap.MIMETYPE_PPT)); - } - } diff --git a/source/java/org/alfresco/repo/content/metadata/PdfBoxMetadataExtracterTest.java b/source/java/org/alfresco/repo/content/metadata/PdfBoxMetadataExtracterTest.java index ddb3dd91cf..6b82efa45e 100644 --- a/source/java/org/alfresco/repo/content/metadata/PdfBoxMetadataExtracterTest.java +++ b/source/java/org/alfresco/repo/content/metadata/PdfBoxMetadataExtracterTest.java @@ -11,8 +11,10 @@ public class PdfBoxMetadataExtracterTest extends AbstractMetadataExtracterTest { private MetadataExtracter extracter; - public void onSetUpInTransaction() throws Exception + @Override + public void setUp() throws Exception { + super.setUp(); extracter = new PdfBoxMetadataExtracter(); } @@ -36,6 +38,6 @@ public class PdfBoxMetadataExtracterTest extends AbstractMetadataExtracterTest public void testPdfExtraction() throws Exception { - testCommonMetadata(extractFromExtension("pdf", MimetypeMap.MIMETYPE_PDF)); + testExtractFromMimetype(MimetypeMap.MIMETYPE_PDF); } } diff --git a/source/java/org/alfresco/repo/content/metadata/UnoMetadataExtracter.java b/source/java/org/alfresco/repo/content/metadata/UnoMetadataExtracter.java index 5364f668b2..465d10e759 100644 --- a/source/java/org/alfresco/repo/content/metadata/UnoMetadataExtracter.java +++ b/source/java/org/alfresco/repo/content/metadata/UnoMetadataExtracter.java @@ -41,35 +41,30 @@ import com.sun.star.ucb.XFileIdentifierConverter; import com.sun.star.uno.UnoRuntime; /** - * * @author Jesper Steen Møller */ public class UnoMetadataExtracter extends AbstractMetadataExtracter { - private static String[] mimeTypes = new String[] { - MimetypeMap.MIMETYPE_OPENDOCUMENT_TEXT, + public static String[] SUPPORTED_MIMETYPES = new String[] { + MimetypeMap.MIMETYPE_STAROFFICE5_WRITER, + MimetypeMap.MIMETYPE_STAROFFICE5_IMPRESS, MimetypeMap.MIMETYPE_OPENOFFICE1_WRITER, + MimetypeMap.MIMETYPE_OPENOFFICE1_IMPRESS // Add the other OpenOffice.org stuff here // In fact, other types may apply as well, but should be counted as lower // quality since they involve conversion. }; - private MimetypeMap mimetypeMap; private String contentUrl; private MyUnoConnection connection; private boolean isConnected; public UnoMetadataExtracter() { - super(new HashSet(Arrays.asList(mimeTypes)), 1.00, 10000); + super(new HashSet(Arrays.asList(SUPPORTED_MIMETYPES)), 1.00, 10000); this.contentUrl = UnoConnection.DEFAULT_CONNECTION_STRING; } - public void setMimetypeMap(MimetypeMap mimetypeMap) - { - this.mimetypeMap = mimetypeMap; - } - /** * * @param contentUrl the URL to connect to @@ -115,7 +110,7 @@ public class UnoMetadataExtracter extends AbstractMetadataExtracter // create temporary files to convert from and to File tempFromFile = TempFileProvider.createTempFile( "UnoContentTransformer_", "." - + mimetypeMap.getExtension(sourceMimetype)); + + getMimetypeService().getExtension(sourceMimetype)); // download the content from the source reader reader.getContent(tempFromFile); diff --git a/source/java/org/alfresco/repo/content/metadata/UnoMetadataExtracterTest.java b/source/java/org/alfresco/repo/content/metadata/UnoMetadataExtracterTest.java index 474d9f7700..f43013c2a3 100644 --- a/source/java/org/alfresco/repo/content/metadata/UnoMetadataExtracterTest.java +++ b/source/java/org/alfresco/repo/content/metadata/UnoMetadataExtracterTest.java @@ -16,7 +16,6 @@ */ package org.alfresco.repo.content.metadata; -import org.alfresco.repo.content.MimetypeMap; /** * @author Jesper Steen Møller @@ -25,10 +24,13 @@ public class UnoMetadataExtracterTest extends AbstractMetadataExtracterTest { private UnoMetadataExtracter extracter; - public void onSetUpInTransaction() throws Exception + @Override + public void setUp() throws Exception { + super.setUp(); extracter = new UnoMetadataExtracter(); - extracter.setMimetypeMap(mimetypeMap); + extracter.setMimetypeService(mimetypeMap); + extracter.init(); } /** @@ -46,34 +48,22 @@ public class UnoMetadataExtracterTest extends AbstractMetadataExtracterTest return; } - double reliability = 0.0; - reliability = extracter.getReliability(MimetypeMap.MIMETYPE_TEXT_PLAIN); - assertEquals("Mimetype text should not be supported", 0.0, reliability); - - reliability = extracter.getReliability(MimetypeMap.MIMETYPE_OPENDOCUMENT_TEXT); - assertEquals("OpenOffice 2.0 Writer (OpenDoc) should be supported", 1.0, reliability); - - reliability = extracter.getReliability(MimetypeMap.MIMETYPE_OPENOFFICE1_WRITER); - assertEquals("OpenOffice 1.0 Writer should be supported", 1.0, reliability); + for (String mimetype : UnoMetadataExtracter.SUPPORTED_MIMETYPES) + { + double reliability = extracter.getReliability(mimetype); + assertTrue("Expected above zero reliability", reliability > 0.0); + } } - public void testOOo20WriterExtraction() throws Exception + public void testSupportedMimetypes() throws Exception { if (!extracter.isConnected()) { return; } - - testCommonMetadata(extractFromExtension("odt", MimetypeMap.MIMETYPE_OPENDOCUMENT_TEXT)); - } - - public void testOOo10WriterExtraction() throws Exception - { - if (!extracter.isConnected()) + for (String mimetype : UnoMetadataExtracter.SUPPORTED_MIMETYPES) { - return; + testExtractFromMimetype(mimetype); } - - testCommonMetadata(extractFromExtension("sxw", MimetypeMap.MIMETYPE_OPENOFFICE1_WRITER)); } } diff --git a/source/java/org/alfresco/repo/content/transform/AbstractContentTransformer.java b/source/java/org/alfresco/repo/content/transform/AbstractContentTransformer.java index 934009cd08..e355888e5c 100644 --- a/source/java/org/alfresco/repo/content/transform/AbstractContentTransformer.java +++ b/source/java/org/alfresco/repo/content/transform/AbstractContentTransformer.java @@ -113,9 +113,11 @@ public abstract class AbstractContentTransformer implements ContentTransformer { if (registry == null) { - if (logger.isDebugEnabled()) + if (registry == null) { - logger.debug("No registry assigned. Ignoring auto-registration."); + logger.warn("Property 'registry' has not been set. Ignoring auto-registration: \n" + + " transformer: " + this); + return; } return; } @@ -245,11 +247,15 @@ public abstract class AbstractContentTransformer implements ContentTransformer // check that the reader and writer are both closed if (!reader.isClosed()) { - logger.error("Content reader not closed by transformer: \n" + reader); + logger.error("Content reader not closed by transformer: \n" + + " reader: " + reader + "\n" + + " transformer: " + this); } if (!writer.isClosed()) { - logger.error("Content writer not closed by transformer: \n" + writer); + logger.error("Content writer not closed by transformer: \n" + + " writer: " + writer + "\n" + + " transformer: " + this); } }