Beefed up unit tests for content metadata extracters

git-svn-id: https://svn.alfresco.com/repos/alfresco-enterprise/alfresco/HEAD/root@2469 c4b6b30b-aa2e-2d43-bbcb-ca4b014f7261
This commit is contained in:
Derek Hulley 2006-02-22 13:16:56 +00:00
parent 31d9ef768b
commit 349183a535
10 changed files with 125 additions and 131 deletions

View File

@ -102,6 +102,9 @@
<property name="registry"> <property name="registry">
<ref bean="metadataExtracterRegistry" /> <ref bean="metadataExtracterRegistry" />
</property> </property>
<property name="mimetypeService">
<ref bean="mimetypeService" />
</property>
</bean> </bean>
<!-- Content Metadata Extracters --> <!-- Content Metadata Extracters -->
@ -110,11 +113,7 @@
<bean class="org.alfresco.repo.content.metadata.HtmlMetadataExtracter" parent="baseMetadataExtracter" /> <bean class="org.alfresco.repo.content.metadata.HtmlMetadataExtracter" parent="baseMetadataExtracter" />
<bean class="org.alfresco.repo.content.metadata.MP3MetadataExtracter" parent="baseMetadataExtracter" /> <bean class="org.alfresco.repo.content.metadata.MP3MetadataExtracter" parent="baseMetadataExtracter" />
<bean class="org.alfresco.repo.content.metadata.OpenDocumentMetadataExtracter" parent="baseMetadataExtracter" /> <bean class="org.alfresco.repo.content.metadata.OpenDocumentMetadataExtracter" parent="baseMetadataExtracter" />
<bean class="org.alfresco.repo.content.metadata.UnoMetadataExtracter" parent="baseMetadataExtracter" init-method="init" > <bean class="org.alfresco.repo.content.metadata.UnoMetadataExtracter" parent="baseMetadataExtracter" init-method="init" />
<property name="mimetypeMap">
<ref bean="mimetypeService" />
</property>
</bean>
<!-- Content Transformation Regisitry --> <!-- Content Transformation Regisitry -->

View File

@ -24,6 +24,7 @@ import java.util.Set;
import org.alfresco.error.AlfrescoRuntimeException; import org.alfresco.error.AlfrescoRuntimeException;
import org.alfresco.service.cmr.repository.ContentIOException; import org.alfresco.service.cmr.repository.ContentIOException;
import org.alfresco.service.cmr.repository.ContentReader; import org.alfresco.service.cmr.repository.ContentReader;
import org.alfresco.service.cmr.repository.MimetypeService;
import org.alfresco.service.namespace.QName; import org.alfresco.service.namespace.QName;
import org.apache.commons.logging.Log; import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory; import org.apache.commons.logging.LogFactory;
@ -36,6 +37,7 @@ abstract public class AbstractMetadataExtracter implements MetadataExtracter
{ {
private static Log logger = LogFactory.getLog(AbstractMetadataExtracter.class); private static Log logger = LogFactory.getLog(AbstractMetadataExtracter.class);
private MimetypeService mimetypeService;
private MetadataExtracterRegistry registry; private MetadataExtracterRegistry registry;
private Set<String> supportedMimetypes; private Set<String> supportedMimetypes;
private double reliability; private double reliability;
@ -64,6 +66,24 @@ abstract public class AbstractMetadataExtracter implements MetadataExtracter
{ {
this.registry = registry; this.registry = registry;
} }
/**
* Helper setter of the mimetype service. This is not always required.
*
* @param mimetypeService
*/
public void setMimetypeService(MimetypeService mimetypeService)
{
this.mimetypeService = mimetypeService;
}
/**
* @return Returns the mimetype helper
*/
protected MimetypeService getMimetypeService()
{
return mimetypeService;
}
/** /**
* Registers this instance of the extracter with the registry. * Registers this instance of the extracter with the registry.
@ -74,7 +94,9 @@ abstract public class AbstractMetadataExtracter implements MetadataExtracter
{ {
if (registry == null) if (registry == null)
{ {
throw new IllegalArgumentException("Property 'registry' has not been set"); logger.warn("Property 'registry' has not been set. Ignoring auto-registration: \n" +
" extracter: " + this);
return;
} }
registry.register(this); registry.register(this);
} }
@ -129,15 +151,17 @@ abstract public class AbstractMetadataExtracter implements MetadataExtracter
catch (Throwable e) catch (Throwable e)
{ {
throw new ContentIOException("Metadata extraction failed: \n" + throw new ContentIOException("Metadata extraction failed: \n" +
" reader: " + reader + "\n" + " reader: " + reader,
e); e);
} }
finally finally
{ {
// check that the reader and writer are both closed // check that the reader was closed
if (!reader.isClosed()) if (!reader.isClosed())
{ {
logger.error("Content reader not closed by metadata extracter: \n" + reader); logger.error("Content reader not closed by metadata extracter: \n" +
" reader: " + reader + "\n" +
" extracter: " + this);
} }
} }

View File

@ -18,40 +18,37 @@ package org.alfresco.repo.content.metadata;
import java.io.File; import java.io.File;
import java.io.FileNotFoundException; import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.Serializable; import java.io.Serializable;
import java.net.URL;
import java.util.HashMap; import java.util.HashMap;
import java.util.Map; import java.util.Map;
import junit.framework.TestCase;
import org.alfresco.model.ContentModel; import org.alfresco.model.ContentModel;
import org.alfresco.repo.content.MimetypeMap; import org.alfresco.repo.content.MimetypeMap;
import org.alfresco.repo.content.filestore.FileContentReader; import org.alfresco.repo.content.filestore.FileContentReader;
import org.alfresco.repo.content.transform.AbstractContentTransformerTest;
import org.alfresco.service.cmr.repository.ContentReader; import org.alfresco.service.cmr.repository.ContentReader;
import org.alfresco.service.namespace.QName; import org.alfresco.service.namespace.QName;
import org.alfresco.util.BaseSpringTest; import org.alfresco.util.ApplicationContextHelper;
import org.alfresco.util.TempFileProvider; import org.alfresco.util.TempFileProvider;
import org.springframework.context.ApplicationContext;
/** /**
* Provides a base set of tests for testing * @see org.alfresco.repo.content.metadata.MetadataExtracter
* {@link org.alfresco.repo.content.metadata.MetadataExtracter} implementations. * @see org.alfresco.repo.content.metadata.AbstractMetadataExtracter
* *
* @author Jesper Steen Møller * @author Jesper Steen Møller
*/ */
public abstract class AbstractMetadataExtracterTest extends BaseSpringTest public abstract class AbstractMetadataExtracterTest extends TestCase
{ {
private static ApplicationContext ctx = ApplicationContextHelper.getApplicationContext();
protected static final String QUICK_TITLE = "The quick brown fox jumps over the lazy dog"; protected static final String QUICK_TITLE = "The quick brown fox jumps over the lazy dog";
protected static final String QUICK_DESCRIPTION = "Gym class featuring a brown fox and lazy dog"; protected static final String QUICK_DESCRIPTION = "Gym class featuring a brown fox and lazy dog";
protected static final String QUICK_CREATOR = "Nevin Nollop"; protected static final String QUICK_CREATOR = "Nevin Nollop";
protected static final String[] QUICK_WORDS = new String[] { "quick", "brown", "fox", "jumps", "lazy", "dog" };
protected MimetypeMap mimetypeMap; protected MimetypeMap mimetypeMap;
protected MetadataExtracter transformer;
public final void setMimetypeMap(MimetypeMap mimetypeMap)
{
this.mimetypeMap = mimetypeMap;
}
protected abstract MetadataExtracter getExtracter(); protected abstract MetadataExtracter getExtracter();
@ -59,8 +56,10 @@ public abstract class AbstractMetadataExtracterTest extends BaseSpringTest
* Ensures that the temp locations are cleaned out before the tests start * Ensures that the temp locations are cleaned out before the tests start
*/ */
@Override @Override
protected void onSetUpInTransaction() throws Exception public void setUp() throws Exception
{ {
this.mimetypeMap = (MimetypeMap) ctx.getBean("mimetypeService");
// perform a little cleaning up // perform a little cleaning up
long now = System.currentTimeMillis(); long now = System.currentTimeMillis();
TempFileProvider.TempFileCleanerJob.removeFiles(now); TempFileProvider.TempFileCleanerJob.removeFiles(now);
@ -73,40 +72,26 @@ public abstract class AbstractMetadataExtracterTest extends BaseSpringTest
{ {
assertNotNull("MimetypeMap not present", mimetypeMap); assertNotNull("MimetypeMap not present", mimetypeMap);
// check that the quick resources are available // check that the quick resources are available
File sourceFile = AbstractMetadataExtracterTest.loadQuickTestFile("txt"); File sourceFile = AbstractContentTransformerTest.loadQuickTestFile("txt");
assertNotNull("quick.* files should be available from Tests", sourceFile); assertNotNull("quick.* files should be available from Tests", sourceFile);
} }
/** protected void testExtractFromMimetype(String mimetype) throws Exception
* Helper method to load one of the "The quick brown fox" files from the
* classpath.
*
* @param extension the extension of the file required
* @return Returns a test resource loaded from the classpath or
* <tt>null</tt> if no resource could be found.
* @throws IOException
*/
public static File loadQuickTestFile(String extension) throws IOException
{ {
URL url = AbstractMetadataExtracterTest.class.getClassLoader().getResource("quick/quick." + extension); Map<QName, Serializable> properties = extractFromMimetype(mimetype);
if (url == null) // check
{ testCommonMetadata(mimetype, properties);
return null;
}
File file = new File(url.getFile());
if (!file.exists())
{
return null;
}
return file;
} }
public Map<QName, Serializable> extractFromExtension(String ext, String mimetype) throws Exception protected Map<QName, Serializable> extractFromMimetype(String mimetype) throws Exception
{ {
Map<QName, Serializable> destination = new HashMap<QName, Serializable>(); Map<QName, Serializable> properties = new HashMap<QName, Serializable>();
// get the extension for the mimetype
String ext = mimetypeMap.getExtension(mimetype);
// attempt to get a source file for each mimetype // attempt to get a source file for each mimetype
File sourceFile = AbstractMetadataExtracterTest.loadQuickTestFile(ext); File sourceFile = AbstractContentTransformerTest.loadQuickTestFile(ext);
if (sourceFile == null) if (sourceFile == null)
{ {
throw new FileNotFoundException("No quick." + ext + " file found for test"); throw new FileNotFoundException("No quick." + ext + " file found for test");
@ -115,14 +100,17 @@ public abstract class AbstractMetadataExtracterTest extends BaseSpringTest
// construct a reader onto the source file // construct a reader onto the source file
ContentReader sourceReader = new FileContentReader(sourceFile); ContentReader sourceReader = new FileContentReader(sourceFile);
sourceReader.setMimetype(mimetype); sourceReader.setMimetype(mimetype);
getExtracter().extract(sourceReader, destination); getExtracter().extract(sourceReader, properties);
return destination; return properties;
} }
public void testCommonMetadata(Map<QName, Serializable> destination) protected void testCommonMetadata(String mimetype, Map<QName, Serializable> properties)
{ {
assertEquals(QUICK_TITLE, destination.get(ContentModel.PROP_TITLE)); assertEquals(
assertEquals(QUICK_DESCRIPTION, destination.get(ContentModel.PROP_DESCRIPTION)); "Property " + ContentModel.PROP_TITLE + " not found for mimetype " + mimetype,
assertEquals(QUICK_CREATOR, destination.get(ContentModel.PROP_AUTHOR)); QUICK_TITLE, properties.get(ContentModel.PROP_TITLE));
assertEquals(
"Property " + ContentModel.PROP_DESCRIPTION + " not found for mimetype " + mimetype,
QUICK_DESCRIPTION, properties.get(ContentModel.PROP_DESCRIPTION));
} }
} }

View File

@ -25,8 +25,10 @@ public class HtmlMetadataExtracterTest extends AbstractMetadataExtracterTest
{ {
private MetadataExtracter extracter; private MetadataExtracter extracter;
public void onSetUpInTransaction() throws Exception @Override
public void setUp() throws Exception
{ {
super.setUp();
extracter = new HtmlMetadataExtracter(); extracter = new HtmlMetadataExtracter();
} }
@ -50,7 +52,6 @@ public class HtmlMetadataExtracterTest extends AbstractMetadataExtracterTest
public void testHtmlExtraction() throws Exception public void testHtmlExtraction() throws Exception
{ {
testCommonMetadata(extractFromExtension("html", MimetypeMap.MIMETYPE_HTML)); testExtractFromMimetype(MimetypeMap.MIMETYPE_HTML);
} }
} }

View File

@ -42,12 +42,14 @@ import org.apache.poi.poifs.eventfilesystem.POIFSReaderListener;
*/ */
public class OfficeMetadataExtracter extends AbstractMetadataExtracter public class OfficeMetadataExtracter extends AbstractMetadataExtracter
{ {
private static String[] mimeTypes = new String[] { MimetypeMap.MIMETYPE_WORD, MimetypeMap.MIMETYPE_EXCEL, public static String[] SUPPORTED_MIMETYPES = new String[] {
MimetypeMap.MIMETYPE_PPT }; MimetypeMap.MIMETYPE_WORD,
MimetypeMap.MIMETYPE_EXCEL,
MimetypeMap.MIMETYPE_PPT };
public OfficeMetadataExtracter() public OfficeMetadataExtracter()
{ {
super(new HashSet<String>(Arrays.asList(mimeTypes)), 1.0, 1000); super(new HashSet<String>(Arrays.asList(SUPPORTED_MIMETYPES)), 1.0, 1000);
} }
public void extractInternal(ContentReader reader, final Map<QName, Serializable> destination) throws Throwable public void extractInternal(ContentReader reader, final Map<QName, Serializable> destination) throws Throwable

View File

@ -1,20 +1,19 @@
package org.alfresco.repo.content.metadata; package org.alfresco.repo.content.metadata;
import org.alfresco.repo.content.MimetypeMap;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
/** /**
* @see org.alfresco.repo.content.transform.OfficeMetadataExtracter * @see org.alfresco.repo.content.transform.OfficeMetadataExtracter
*
* @author Jesper Steen Møller * @author Jesper Steen Møller
*/ */
public class OfficeMetadataExtracterTest extends AbstractMetadataExtracterTest public class OfficeMetadataExtracterTest extends AbstractMetadataExtracterTest
{ {
private static final Log logger = LogFactory.getLog(OfficeMetadataExtracterTest.class);
private MetadataExtracter extracter; private MetadataExtracter extracter;
public void onSetUpInTransaction() throws Exception @Override
public void setUp() throws Exception
{ {
super.setUp();
extracter = new OfficeMetadataExtracter(); extracter = new OfficeMetadataExtracter();
} }
@ -28,33 +27,21 @@ public class OfficeMetadataExtracterTest extends AbstractMetadataExtracterTest
public void testReliability() throws Exception public void testReliability() throws Exception
{ {
double reliability = 0.0; for (String mimetype : OfficeMetadataExtracter.SUPPORTED_MIMETYPES)
reliability = extracter.getReliability(MimetypeMap.MIMETYPE_TEXT_PLAIN); {
assertEquals("Mimetype text should not be supported", 0.0, reliability); double reliability = extracter.getReliability(mimetype);
assertTrue("Expected above zero reliability", reliability > 0.0);
reliability = extracter.getReliability(MimetypeMap.MIMETYPE_WORD); }
assertEquals("Word should be supported", 1.0, reliability);
reliability = extracter.getReliability(MimetypeMap.MIMETYPE_EXCEL);
assertEquals("Excel should be supported", 1.0, reliability);
reliability = extracter.getReliability(MimetypeMap.MIMETYPE_PPT);
assertEquals("PowerPoint should be supported", 1.0, reliability);
} }
public void testWordExtraction() throws Exception /**
* Test all the supported mimetypes
*/
public void testSupportedMimetypes() throws Exception
{ {
testCommonMetadata(extractFromExtension("doc", MimetypeMap.MIMETYPE_WORD)); for (String mimetype : OfficeMetadataExtracter.SUPPORTED_MIMETYPES)
{
testExtractFromMimetype(mimetype);
}
} }
public void testExcelExtraction() throws Exception
{
testCommonMetadata(extractFromExtension("xls", MimetypeMap.MIMETYPE_EXCEL));
}
public void testPowerPointExtraction() throws Exception
{
testCommonMetadata(extractFromExtension("ppt", MimetypeMap.MIMETYPE_PPT));
}
} }

View File

@ -11,8 +11,10 @@ public class PdfBoxMetadataExtracterTest extends AbstractMetadataExtracterTest
{ {
private MetadataExtracter extracter; private MetadataExtracter extracter;
public void onSetUpInTransaction() throws Exception @Override
public void setUp() throws Exception
{ {
super.setUp();
extracter = new PdfBoxMetadataExtracter(); extracter = new PdfBoxMetadataExtracter();
} }
@ -36,6 +38,6 @@ public class PdfBoxMetadataExtracterTest extends AbstractMetadataExtracterTest
public void testPdfExtraction() throws Exception public void testPdfExtraction() throws Exception
{ {
testCommonMetadata(extractFromExtension("pdf", MimetypeMap.MIMETYPE_PDF)); testExtractFromMimetype(MimetypeMap.MIMETYPE_PDF);
} }
} }

View File

@ -41,35 +41,30 @@ import com.sun.star.ucb.XFileIdentifierConverter;
import com.sun.star.uno.UnoRuntime; import com.sun.star.uno.UnoRuntime;
/** /**
*
* @author Jesper Steen Møller * @author Jesper Steen Møller
*/ */
public class UnoMetadataExtracter extends AbstractMetadataExtracter public class UnoMetadataExtracter extends AbstractMetadataExtracter
{ {
private static String[] mimeTypes = new String[] { public static String[] SUPPORTED_MIMETYPES = new String[] {
MimetypeMap.MIMETYPE_OPENDOCUMENT_TEXT, MimetypeMap.MIMETYPE_STAROFFICE5_WRITER,
MimetypeMap.MIMETYPE_STAROFFICE5_IMPRESS,
MimetypeMap.MIMETYPE_OPENOFFICE1_WRITER, MimetypeMap.MIMETYPE_OPENOFFICE1_WRITER,
MimetypeMap.MIMETYPE_OPENOFFICE1_IMPRESS
// Add the other OpenOffice.org stuff here // Add the other OpenOffice.org stuff here
// In fact, other types may apply as well, but should be counted as lower // In fact, other types may apply as well, but should be counted as lower
// quality since they involve conversion. // quality since they involve conversion.
}; };
private MimetypeMap mimetypeMap;
private String contentUrl; private String contentUrl;
private MyUnoConnection connection; private MyUnoConnection connection;
private boolean isConnected; private boolean isConnected;
public UnoMetadataExtracter() public UnoMetadataExtracter()
{ {
super(new HashSet<String>(Arrays.asList(mimeTypes)), 1.00, 10000); super(new HashSet<String>(Arrays.asList(SUPPORTED_MIMETYPES)), 1.00, 10000);
this.contentUrl = UnoConnection.DEFAULT_CONNECTION_STRING; this.contentUrl = UnoConnection.DEFAULT_CONNECTION_STRING;
} }
public void setMimetypeMap(MimetypeMap mimetypeMap)
{
this.mimetypeMap = mimetypeMap;
}
/** /**
* *
* @param contentUrl the URL to connect to * @param contentUrl the URL to connect to
@ -115,7 +110,7 @@ public class UnoMetadataExtracter extends AbstractMetadataExtracter
// create temporary files to convert from and to // create temporary files to convert from and to
File tempFromFile = TempFileProvider.createTempFile( File tempFromFile = TempFileProvider.createTempFile(
"UnoContentTransformer_", "." "UnoContentTransformer_", "."
+ mimetypeMap.getExtension(sourceMimetype)); + getMimetypeService().getExtension(sourceMimetype));
// download the content from the source reader // download the content from the source reader
reader.getContent(tempFromFile); reader.getContent(tempFromFile);

View File

@ -16,7 +16,6 @@
*/ */
package org.alfresco.repo.content.metadata; package org.alfresco.repo.content.metadata;
import org.alfresco.repo.content.MimetypeMap;
/** /**
* @author Jesper Steen Møller * @author Jesper Steen Møller
@ -25,10 +24,13 @@ public class UnoMetadataExtracterTest extends AbstractMetadataExtracterTest
{ {
private UnoMetadataExtracter extracter; private UnoMetadataExtracter extracter;
public void onSetUpInTransaction() throws Exception @Override
public void setUp() throws Exception
{ {
super.setUp();
extracter = new UnoMetadataExtracter(); extracter = new UnoMetadataExtracter();
extracter.setMimetypeMap(mimetypeMap); extracter.setMimetypeService(mimetypeMap);
extracter.init();
} }
/** /**
@ -46,34 +48,22 @@ public class UnoMetadataExtracterTest extends AbstractMetadataExtracterTest
return; return;
} }
double reliability = 0.0; for (String mimetype : UnoMetadataExtracter.SUPPORTED_MIMETYPES)
reliability = extracter.getReliability(MimetypeMap.MIMETYPE_TEXT_PLAIN); {
assertEquals("Mimetype text should not be supported", 0.0, reliability); double reliability = extracter.getReliability(mimetype);
assertTrue("Expected above zero reliability", reliability > 0.0);
reliability = extracter.getReliability(MimetypeMap.MIMETYPE_OPENDOCUMENT_TEXT); }
assertEquals("OpenOffice 2.0 Writer (OpenDoc) should be supported", 1.0, reliability);
reliability = extracter.getReliability(MimetypeMap.MIMETYPE_OPENOFFICE1_WRITER);
assertEquals("OpenOffice 1.0 Writer should be supported", 1.0, reliability);
} }
public void testOOo20WriterExtraction() throws Exception public void testSupportedMimetypes() throws Exception
{ {
if (!extracter.isConnected()) if (!extracter.isConnected())
{ {
return; return;
} }
for (String mimetype : UnoMetadataExtracter.SUPPORTED_MIMETYPES)
testCommonMetadata(extractFromExtension("odt", MimetypeMap.MIMETYPE_OPENDOCUMENT_TEXT));
}
public void testOOo10WriterExtraction() throws Exception
{
if (!extracter.isConnected())
{ {
return; testExtractFromMimetype(mimetype);
} }
testCommonMetadata(extractFromExtension("sxw", MimetypeMap.MIMETYPE_OPENOFFICE1_WRITER));
} }
} }

View File

@ -113,9 +113,11 @@ public abstract class AbstractContentTransformer implements ContentTransformer
{ {
if (registry == null) if (registry == null)
{ {
if (logger.isDebugEnabled()) if (registry == null)
{ {
logger.debug("No registry assigned. Ignoring auto-registration."); logger.warn("Property 'registry' has not been set. Ignoring auto-registration: \n" +
" transformer: " + this);
return;
} }
return; return;
} }
@ -245,11 +247,15 @@ public abstract class AbstractContentTransformer implements ContentTransformer
// check that the reader and writer are both closed // check that the reader and writer are both closed
if (!reader.isClosed()) if (!reader.isClosed())
{ {
logger.error("Content reader not closed by transformer: \n" + reader); logger.error("Content reader not closed by transformer: \n" +
" reader: " + reader + "\n" +
" transformer: " + this);
} }
if (!writer.isClosed()) if (!writer.isClosed())
{ {
logger.error("Content writer not closed by transformer: \n" + writer); logger.error("Content writer not closed by transformer: \n" +
" writer: " + writer + "\n" +
" transformer: " + this);
} }
} }