Beefed up unit tests for content metadata extracters

git-svn-id: https://svn.alfresco.com/repos/alfresco-enterprise/alfresco/HEAD/root@2469 c4b6b30b-aa2e-2d43-bbcb-ca4b014f7261
This commit is contained in:
Derek Hulley 2006-02-22 13:16:56 +00:00
parent 31d9ef768b
commit 349183a535
10 changed files with 125 additions and 131 deletions

View File

@ -102,6 +102,9 @@
<property name="registry">
<ref bean="metadataExtracterRegistry" />
</property>
<property name="mimetypeService">
<ref bean="mimetypeService" />
</property>
</bean>
<!-- Content Metadata Extracters -->
@ -110,11 +113,7 @@
<bean class="org.alfresco.repo.content.metadata.HtmlMetadataExtracter" parent="baseMetadataExtracter" />
<bean class="org.alfresco.repo.content.metadata.MP3MetadataExtracter" parent="baseMetadataExtracter" />
<bean class="org.alfresco.repo.content.metadata.OpenDocumentMetadataExtracter" parent="baseMetadataExtracter" />
<bean class="org.alfresco.repo.content.metadata.UnoMetadataExtracter" parent="baseMetadataExtracter" init-method="init" >
<property name="mimetypeMap">
<ref bean="mimetypeService" />
</property>
</bean>
<bean class="org.alfresco.repo.content.metadata.UnoMetadataExtracter" parent="baseMetadataExtracter" init-method="init" />
<!-- Content Transformation Regisitry -->

View File

@ -24,6 +24,7 @@ import java.util.Set;
import org.alfresco.error.AlfrescoRuntimeException;
import org.alfresco.service.cmr.repository.ContentIOException;
import org.alfresco.service.cmr.repository.ContentReader;
import org.alfresco.service.cmr.repository.MimetypeService;
import org.alfresco.service.namespace.QName;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
@ -36,6 +37,7 @@ abstract public class AbstractMetadataExtracter implements MetadataExtracter
{
private static Log logger = LogFactory.getLog(AbstractMetadataExtracter.class);
private MimetypeService mimetypeService;
private MetadataExtracterRegistry registry;
private Set<String> supportedMimetypes;
private double reliability;
@ -64,6 +66,24 @@ abstract public class AbstractMetadataExtracter implements MetadataExtracter
{
this.registry = registry;
}
/**
* Helper setter of the mimetype service. This is not always required.
*
* @param mimetypeService
*/
public void setMimetypeService(MimetypeService mimetypeService)
{
this.mimetypeService = mimetypeService;
}
/**
* @return Returns the mimetype helper
*/
protected MimetypeService getMimetypeService()
{
return mimetypeService;
}
/**
* Registers this instance of the extracter with the registry.
@ -74,7 +94,9 @@ abstract public class AbstractMetadataExtracter implements MetadataExtracter
{
if (registry == null)
{
throw new IllegalArgumentException("Property 'registry' has not been set");
logger.warn("Property 'registry' has not been set. Ignoring auto-registration: \n" +
" extracter: " + this);
return;
}
registry.register(this);
}
@ -129,15 +151,17 @@ abstract public class AbstractMetadataExtracter implements MetadataExtracter
catch (Throwable e)
{
throw new ContentIOException("Metadata extraction failed: \n" +
" reader: " + reader + "\n" +
" reader: " + reader,
e);
}
finally
{
// check that the reader and writer are both closed
// check that the reader was closed
if (!reader.isClosed())
{
logger.error("Content reader not closed by metadata extracter: \n" + reader);
logger.error("Content reader not closed by metadata extracter: \n" +
" reader: " + reader + "\n" +
" extracter: " + this);
}
}

View File

@ -18,40 +18,37 @@ package org.alfresco.repo.content.metadata;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.Serializable;
import java.net.URL;
import java.util.HashMap;
import java.util.Map;
import junit.framework.TestCase;
import org.alfresco.model.ContentModel;
import org.alfresco.repo.content.MimetypeMap;
import org.alfresco.repo.content.filestore.FileContentReader;
import org.alfresco.repo.content.transform.AbstractContentTransformerTest;
import org.alfresco.service.cmr.repository.ContentReader;
import org.alfresco.service.namespace.QName;
import org.alfresco.util.BaseSpringTest;
import org.alfresco.util.ApplicationContextHelper;
import org.alfresco.util.TempFileProvider;
import org.springframework.context.ApplicationContext;
/**
* Provides a base set of tests for testing
* {@link org.alfresco.repo.content.metadata.MetadataExtracter} implementations.
* @see org.alfresco.repo.content.metadata.MetadataExtracter
* @see org.alfresco.repo.content.metadata.AbstractMetadataExtracter
*
* @author Jesper Steen Møller
*/
public abstract class AbstractMetadataExtracterTest extends BaseSpringTest
public abstract class AbstractMetadataExtracterTest extends TestCase
{
private static ApplicationContext ctx = ApplicationContextHelper.getApplicationContext();
protected static final String QUICK_TITLE = "The quick brown fox jumps over the lazy dog";
protected static final String QUICK_DESCRIPTION = "Gym class featuring a brown fox and lazy dog";
protected static final String QUICK_CREATOR = "Nevin Nollop";
protected static final String[] QUICK_WORDS = new String[] { "quick", "brown", "fox", "jumps", "lazy", "dog" };
protected MimetypeMap mimetypeMap;
protected MetadataExtracter transformer;
public final void setMimetypeMap(MimetypeMap mimetypeMap)
{
this.mimetypeMap = mimetypeMap;
}
protected abstract MetadataExtracter getExtracter();
@ -59,8 +56,10 @@ public abstract class AbstractMetadataExtracterTest extends BaseSpringTest
* Ensures that the temp locations are cleaned out before the tests start
*/
@Override
protected void onSetUpInTransaction() throws Exception
public void setUp() throws Exception
{
this.mimetypeMap = (MimetypeMap) ctx.getBean("mimetypeService");
// perform a little cleaning up
long now = System.currentTimeMillis();
TempFileProvider.TempFileCleanerJob.removeFiles(now);
@ -73,40 +72,26 @@ public abstract class AbstractMetadataExtracterTest extends BaseSpringTest
{
assertNotNull("MimetypeMap not present", mimetypeMap);
// check that the quick resources are available
File sourceFile = AbstractMetadataExtracterTest.loadQuickTestFile("txt");
File sourceFile = AbstractContentTransformerTest.loadQuickTestFile("txt");
assertNotNull("quick.* files should be available from Tests", sourceFile);
}
/**
* Helper method to load one of the "The quick brown fox" files from the
* classpath.
*
* @param extension the extension of the file required
* @return Returns a test resource loaded from the classpath or
* <tt>null</tt> if no resource could be found.
* @throws IOException
*/
public static File loadQuickTestFile(String extension) throws IOException
protected void testExtractFromMimetype(String mimetype) throws Exception
{
URL url = AbstractMetadataExtracterTest.class.getClassLoader().getResource("quick/quick." + extension);
if (url == null)
{
return null;
}
File file = new File(url.getFile());
if (!file.exists())
{
return null;
}
return file;
Map<QName, Serializable> properties = extractFromMimetype(mimetype);
// check
testCommonMetadata(mimetype, properties);
}
public Map<QName, Serializable> extractFromExtension(String ext, String mimetype) throws Exception
protected Map<QName, Serializable> extractFromMimetype(String mimetype) throws Exception
{
Map<QName, Serializable> destination = new HashMap<QName, Serializable>();
Map<QName, Serializable> properties = new HashMap<QName, Serializable>();
// get the extension for the mimetype
String ext = mimetypeMap.getExtension(mimetype);
// attempt to get a source file for each mimetype
File sourceFile = AbstractMetadataExtracterTest.loadQuickTestFile(ext);
File sourceFile = AbstractContentTransformerTest.loadQuickTestFile(ext);
if (sourceFile == null)
{
throw new FileNotFoundException("No quick." + ext + " file found for test");
@ -115,14 +100,17 @@ public abstract class AbstractMetadataExtracterTest extends BaseSpringTest
// construct a reader onto the source file
ContentReader sourceReader = new FileContentReader(sourceFile);
sourceReader.setMimetype(mimetype);
getExtracter().extract(sourceReader, destination);
return destination;
getExtracter().extract(sourceReader, properties);
return properties;
}
public void testCommonMetadata(Map<QName, Serializable> destination)
protected void testCommonMetadata(String mimetype, Map<QName, Serializable> properties)
{
assertEquals(QUICK_TITLE, destination.get(ContentModel.PROP_TITLE));
assertEquals(QUICK_DESCRIPTION, destination.get(ContentModel.PROP_DESCRIPTION));
assertEquals(QUICK_CREATOR, destination.get(ContentModel.PROP_AUTHOR));
assertEquals(
"Property " + ContentModel.PROP_TITLE + " not found for mimetype " + mimetype,
QUICK_TITLE, properties.get(ContentModel.PROP_TITLE));
assertEquals(
"Property " + ContentModel.PROP_DESCRIPTION + " not found for mimetype " + mimetype,
QUICK_DESCRIPTION, properties.get(ContentModel.PROP_DESCRIPTION));
}
}

View File

@ -25,8 +25,10 @@ public class HtmlMetadataExtracterTest extends AbstractMetadataExtracterTest
{
private MetadataExtracter extracter;
public void onSetUpInTransaction() throws Exception
@Override
public void setUp() throws Exception
{
super.setUp();
extracter = new HtmlMetadataExtracter();
}
@ -50,7 +52,6 @@ public class HtmlMetadataExtracterTest extends AbstractMetadataExtracterTest
public void testHtmlExtraction() throws Exception
{
testCommonMetadata(extractFromExtension("html", MimetypeMap.MIMETYPE_HTML));
testExtractFromMimetype(MimetypeMap.MIMETYPE_HTML);
}
}

View File

@ -42,12 +42,14 @@ import org.apache.poi.poifs.eventfilesystem.POIFSReaderListener;
*/
public class OfficeMetadataExtracter extends AbstractMetadataExtracter
{
private static String[] mimeTypes = new String[] { MimetypeMap.MIMETYPE_WORD, MimetypeMap.MIMETYPE_EXCEL,
MimetypeMap.MIMETYPE_PPT };
public static String[] SUPPORTED_MIMETYPES = new String[] {
MimetypeMap.MIMETYPE_WORD,
MimetypeMap.MIMETYPE_EXCEL,
MimetypeMap.MIMETYPE_PPT };
public OfficeMetadataExtracter()
{
super(new HashSet<String>(Arrays.asList(mimeTypes)), 1.0, 1000);
super(new HashSet<String>(Arrays.asList(SUPPORTED_MIMETYPES)), 1.0, 1000);
}
public void extractInternal(ContentReader reader, final Map<QName, Serializable> destination) throws Throwable

View File

@ -1,20 +1,19 @@
package org.alfresco.repo.content.metadata;
import org.alfresco.repo.content.MimetypeMap;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
/**
* @see org.alfresco.repo.content.transform.OfficeMetadataExtracter
*
* @author Jesper Steen Møller
*/
public class OfficeMetadataExtracterTest extends AbstractMetadataExtracterTest
{
private static final Log logger = LogFactory.getLog(OfficeMetadataExtracterTest.class);
private MetadataExtracter extracter;
public void onSetUpInTransaction() throws Exception
@Override
public void setUp() throws Exception
{
super.setUp();
extracter = new OfficeMetadataExtracter();
}
@ -28,33 +27,21 @@ public class OfficeMetadataExtracterTest extends AbstractMetadataExtracterTest
public void testReliability() throws Exception
{
double reliability = 0.0;
reliability = extracter.getReliability(MimetypeMap.MIMETYPE_TEXT_PLAIN);
assertEquals("Mimetype text should not be supported", 0.0, reliability);
reliability = extracter.getReliability(MimetypeMap.MIMETYPE_WORD);
assertEquals("Word should be supported", 1.0, reliability);
reliability = extracter.getReliability(MimetypeMap.MIMETYPE_EXCEL);
assertEquals("Excel should be supported", 1.0, reliability);
reliability = extracter.getReliability(MimetypeMap.MIMETYPE_PPT);
assertEquals("PowerPoint should be supported", 1.0, reliability);
for (String mimetype : OfficeMetadataExtracter.SUPPORTED_MIMETYPES)
{
double reliability = extracter.getReliability(mimetype);
assertTrue("Expected above zero reliability", reliability > 0.0);
}
}
public void testWordExtraction() throws Exception
/**
* Test all the supported mimetypes
*/
public void testSupportedMimetypes() throws Exception
{
testCommonMetadata(extractFromExtension("doc", MimetypeMap.MIMETYPE_WORD));
for (String mimetype : OfficeMetadataExtracter.SUPPORTED_MIMETYPES)
{
testExtractFromMimetype(mimetype);
}
}
public void testExcelExtraction() throws Exception
{
testCommonMetadata(extractFromExtension("xls", MimetypeMap.MIMETYPE_EXCEL));
}
public void testPowerPointExtraction() throws Exception
{
testCommonMetadata(extractFromExtension("ppt", MimetypeMap.MIMETYPE_PPT));
}
}

View File

@ -11,8 +11,10 @@ public class PdfBoxMetadataExtracterTest extends AbstractMetadataExtracterTest
{
private MetadataExtracter extracter;
public void onSetUpInTransaction() throws Exception
@Override
public void setUp() throws Exception
{
super.setUp();
extracter = new PdfBoxMetadataExtracter();
}
@ -36,6 +38,6 @@ public class PdfBoxMetadataExtracterTest extends AbstractMetadataExtracterTest
public void testPdfExtraction() throws Exception
{
testCommonMetadata(extractFromExtension("pdf", MimetypeMap.MIMETYPE_PDF));
testExtractFromMimetype(MimetypeMap.MIMETYPE_PDF);
}
}

View File

@ -41,35 +41,30 @@ import com.sun.star.ucb.XFileIdentifierConverter;
import com.sun.star.uno.UnoRuntime;
/**
*
* @author Jesper Steen Møller
*/
public class UnoMetadataExtracter extends AbstractMetadataExtracter
{
private static String[] mimeTypes = new String[] {
MimetypeMap.MIMETYPE_OPENDOCUMENT_TEXT,
public static String[] SUPPORTED_MIMETYPES = new String[] {
MimetypeMap.MIMETYPE_STAROFFICE5_WRITER,
MimetypeMap.MIMETYPE_STAROFFICE5_IMPRESS,
MimetypeMap.MIMETYPE_OPENOFFICE1_WRITER,
MimetypeMap.MIMETYPE_OPENOFFICE1_IMPRESS
// Add the other OpenOffice.org stuff here
// In fact, other types may apply as well, but should be counted as lower
// quality since they involve conversion.
};
private MimetypeMap mimetypeMap;
private String contentUrl;
private MyUnoConnection connection;
private boolean isConnected;
public UnoMetadataExtracter()
{
super(new HashSet<String>(Arrays.asList(mimeTypes)), 1.00, 10000);
super(new HashSet<String>(Arrays.asList(SUPPORTED_MIMETYPES)), 1.00, 10000);
this.contentUrl = UnoConnection.DEFAULT_CONNECTION_STRING;
}
public void setMimetypeMap(MimetypeMap mimetypeMap)
{
this.mimetypeMap = mimetypeMap;
}
/**
*
* @param contentUrl the URL to connect to
@ -115,7 +110,7 @@ public class UnoMetadataExtracter extends AbstractMetadataExtracter
// create temporary files to convert from and to
File tempFromFile = TempFileProvider.createTempFile(
"UnoContentTransformer_", "."
+ mimetypeMap.getExtension(sourceMimetype));
+ getMimetypeService().getExtension(sourceMimetype));
// download the content from the source reader
reader.getContent(tempFromFile);

View File

@ -16,7 +16,6 @@
*/
package org.alfresco.repo.content.metadata;
import org.alfresco.repo.content.MimetypeMap;
/**
* @author Jesper Steen Møller
@ -25,10 +24,13 @@ public class UnoMetadataExtracterTest extends AbstractMetadataExtracterTest
{
private UnoMetadataExtracter extracter;
public void onSetUpInTransaction() throws Exception
@Override
public void setUp() throws Exception
{
super.setUp();
extracter = new UnoMetadataExtracter();
extracter.setMimetypeMap(mimetypeMap);
extracter.setMimetypeService(mimetypeMap);
extracter.init();
}
/**
@ -46,34 +48,22 @@ public class UnoMetadataExtracterTest extends AbstractMetadataExtracterTest
return;
}
double reliability = 0.0;
reliability = extracter.getReliability(MimetypeMap.MIMETYPE_TEXT_PLAIN);
assertEquals("Mimetype text should not be supported", 0.0, reliability);
reliability = extracter.getReliability(MimetypeMap.MIMETYPE_OPENDOCUMENT_TEXT);
assertEquals("OpenOffice 2.0 Writer (OpenDoc) should be supported", 1.0, reliability);
reliability = extracter.getReliability(MimetypeMap.MIMETYPE_OPENOFFICE1_WRITER);
assertEquals("OpenOffice 1.0 Writer should be supported", 1.0, reliability);
for (String mimetype : UnoMetadataExtracter.SUPPORTED_MIMETYPES)
{
double reliability = extracter.getReliability(mimetype);
assertTrue("Expected above zero reliability", reliability > 0.0);
}
}
public void testOOo20WriterExtraction() throws Exception
public void testSupportedMimetypes() throws Exception
{
if (!extracter.isConnected())
{
return;
}
testCommonMetadata(extractFromExtension("odt", MimetypeMap.MIMETYPE_OPENDOCUMENT_TEXT));
}
public void testOOo10WriterExtraction() throws Exception
{
if (!extracter.isConnected())
for (String mimetype : UnoMetadataExtracter.SUPPORTED_MIMETYPES)
{
return;
testExtractFromMimetype(mimetype);
}
testCommonMetadata(extractFromExtension("sxw", MimetypeMap.MIMETYPE_OPENOFFICE1_WRITER));
}
}

View File

@ -113,9 +113,11 @@ public abstract class AbstractContentTransformer implements ContentTransformer
{
if (registry == null)
{
if (logger.isDebugEnabled())
if (registry == null)
{
logger.debug("No registry assigned. Ignoring auto-registration.");
logger.warn("Property 'registry' has not been set. Ignoring auto-registration: \n" +
" transformer: " + this);
return;
}
return;
}
@ -245,11 +247,15 @@ public abstract class AbstractContentTransformer implements ContentTransformer
// check that the reader and writer are both closed
if (!reader.isClosed())
{
logger.error("Content reader not closed by transformer: \n" + reader);
logger.error("Content reader not closed by transformer: \n" +
" reader: " + reader + "\n" +
" transformer: " + this);
}
if (!writer.isClosed())
{
logger.error("Content writer not closed by transformer: \n" + writer);
logger.error("Content writer not closed by transformer: \n" +
" writer: " + writer + "\n" +
" transformer: " + this);
}
}