mirror of
https://github.com/Alfresco/alfresco-community-repo.git
synced 2025-06-09 17:45:10 +00:00
Beefed up unit tests for content metadata extracters
git-svn-id: https://svn.alfresco.com/repos/alfresco-enterprise/alfresco/HEAD/root@2469 c4b6b30b-aa2e-2d43-bbcb-ca4b014f7261
This commit is contained in:
parent
31d9ef768b
commit
349183a535
@ -102,6 +102,9 @@
|
|||||||
<property name="registry">
|
<property name="registry">
|
||||||
<ref bean="metadataExtracterRegistry" />
|
<ref bean="metadataExtracterRegistry" />
|
||||||
</property>
|
</property>
|
||||||
|
<property name="mimetypeService">
|
||||||
|
<ref bean="mimetypeService" />
|
||||||
|
</property>
|
||||||
</bean>
|
</bean>
|
||||||
|
|
||||||
<!-- Content Metadata Extracters -->
|
<!-- Content Metadata Extracters -->
|
||||||
@ -110,11 +113,7 @@
|
|||||||
<bean class="org.alfresco.repo.content.metadata.HtmlMetadataExtracter" parent="baseMetadataExtracter" />
|
<bean class="org.alfresco.repo.content.metadata.HtmlMetadataExtracter" parent="baseMetadataExtracter" />
|
||||||
<bean class="org.alfresco.repo.content.metadata.MP3MetadataExtracter" parent="baseMetadataExtracter" />
|
<bean class="org.alfresco.repo.content.metadata.MP3MetadataExtracter" parent="baseMetadataExtracter" />
|
||||||
<bean class="org.alfresco.repo.content.metadata.OpenDocumentMetadataExtracter" parent="baseMetadataExtracter" />
|
<bean class="org.alfresco.repo.content.metadata.OpenDocumentMetadataExtracter" parent="baseMetadataExtracter" />
|
||||||
<bean class="org.alfresco.repo.content.metadata.UnoMetadataExtracter" parent="baseMetadataExtracter" init-method="init" >
|
<bean class="org.alfresco.repo.content.metadata.UnoMetadataExtracter" parent="baseMetadataExtracter" init-method="init" />
|
||||||
<property name="mimetypeMap">
|
|
||||||
<ref bean="mimetypeService" />
|
|
||||||
</property>
|
|
||||||
</bean>
|
|
||||||
|
|
||||||
|
|
||||||
<!-- Content Transformation Regisitry -->
|
<!-- Content Transformation Regisitry -->
|
||||||
|
@ -24,6 +24,7 @@ import java.util.Set;
|
|||||||
import org.alfresco.error.AlfrescoRuntimeException;
|
import org.alfresco.error.AlfrescoRuntimeException;
|
||||||
import org.alfresco.service.cmr.repository.ContentIOException;
|
import org.alfresco.service.cmr.repository.ContentIOException;
|
||||||
import org.alfresco.service.cmr.repository.ContentReader;
|
import org.alfresco.service.cmr.repository.ContentReader;
|
||||||
|
import org.alfresco.service.cmr.repository.MimetypeService;
|
||||||
import org.alfresco.service.namespace.QName;
|
import org.alfresco.service.namespace.QName;
|
||||||
import org.apache.commons.logging.Log;
|
import org.apache.commons.logging.Log;
|
||||||
import org.apache.commons.logging.LogFactory;
|
import org.apache.commons.logging.LogFactory;
|
||||||
@ -36,6 +37,7 @@ abstract public class AbstractMetadataExtracter implements MetadataExtracter
|
|||||||
{
|
{
|
||||||
private static Log logger = LogFactory.getLog(AbstractMetadataExtracter.class);
|
private static Log logger = LogFactory.getLog(AbstractMetadataExtracter.class);
|
||||||
|
|
||||||
|
private MimetypeService mimetypeService;
|
||||||
private MetadataExtracterRegistry registry;
|
private MetadataExtracterRegistry registry;
|
||||||
private Set<String> supportedMimetypes;
|
private Set<String> supportedMimetypes;
|
||||||
private double reliability;
|
private double reliability;
|
||||||
@ -64,6 +66,24 @@ abstract public class AbstractMetadataExtracter implements MetadataExtracter
|
|||||||
{
|
{
|
||||||
this.registry = registry;
|
this.registry = registry;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Helper setter of the mimetype service. This is not always required.
|
||||||
|
*
|
||||||
|
* @param mimetypeService
|
||||||
|
*/
|
||||||
|
public void setMimetypeService(MimetypeService mimetypeService)
|
||||||
|
{
|
||||||
|
this.mimetypeService = mimetypeService;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return Returns the mimetype helper
|
||||||
|
*/
|
||||||
|
protected MimetypeService getMimetypeService()
|
||||||
|
{
|
||||||
|
return mimetypeService;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Registers this instance of the extracter with the registry.
|
* Registers this instance of the extracter with the registry.
|
||||||
@ -74,7 +94,9 @@ abstract public class AbstractMetadataExtracter implements MetadataExtracter
|
|||||||
{
|
{
|
||||||
if (registry == null)
|
if (registry == null)
|
||||||
{
|
{
|
||||||
throw new IllegalArgumentException("Property 'registry' has not been set");
|
logger.warn("Property 'registry' has not been set. Ignoring auto-registration: \n" +
|
||||||
|
" extracter: " + this);
|
||||||
|
return;
|
||||||
}
|
}
|
||||||
registry.register(this);
|
registry.register(this);
|
||||||
}
|
}
|
||||||
@ -129,15 +151,17 @@ abstract public class AbstractMetadataExtracter implements MetadataExtracter
|
|||||||
catch (Throwable e)
|
catch (Throwable e)
|
||||||
{
|
{
|
||||||
throw new ContentIOException("Metadata extraction failed: \n" +
|
throw new ContentIOException("Metadata extraction failed: \n" +
|
||||||
" reader: " + reader + "\n" +
|
" reader: " + reader,
|
||||||
e);
|
e);
|
||||||
}
|
}
|
||||||
finally
|
finally
|
||||||
{
|
{
|
||||||
// check that the reader and writer are both closed
|
// check that the reader was closed
|
||||||
if (!reader.isClosed())
|
if (!reader.isClosed())
|
||||||
{
|
{
|
||||||
logger.error("Content reader not closed by metadata extracter: \n" + reader);
|
logger.error("Content reader not closed by metadata extracter: \n" +
|
||||||
|
" reader: " + reader + "\n" +
|
||||||
|
" extracter: " + this);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -18,40 +18,37 @@ package org.alfresco.repo.content.metadata;
|
|||||||
|
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.io.FileNotFoundException;
|
import java.io.FileNotFoundException;
|
||||||
import java.io.IOException;
|
|
||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
import java.net.URL;
|
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
|
import junit.framework.TestCase;
|
||||||
|
|
||||||
import org.alfresco.model.ContentModel;
|
import org.alfresco.model.ContentModel;
|
||||||
import org.alfresco.repo.content.MimetypeMap;
|
import org.alfresco.repo.content.MimetypeMap;
|
||||||
import org.alfresco.repo.content.filestore.FileContentReader;
|
import org.alfresco.repo.content.filestore.FileContentReader;
|
||||||
|
import org.alfresco.repo.content.transform.AbstractContentTransformerTest;
|
||||||
import org.alfresco.service.cmr.repository.ContentReader;
|
import org.alfresco.service.cmr.repository.ContentReader;
|
||||||
import org.alfresco.service.namespace.QName;
|
import org.alfresco.service.namespace.QName;
|
||||||
import org.alfresco.util.BaseSpringTest;
|
import org.alfresco.util.ApplicationContextHelper;
|
||||||
import org.alfresco.util.TempFileProvider;
|
import org.alfresco.util.TempFileProvider;
|
||||||
|
import org.springframework.context.ApplicationContext;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Provides a base set of tests for testing
|
* @see org.alfresco.repo.content.metadata.MetadataExtracter
|
||||||
* {@link org.alfresco.repo.content.metadata.MetadataExtracter} implementations.
|
* @see org.alfresco.repo.content.metadata.AbstractMetadataExtracter
|
||||||
*
|
*
|
||||||
* @author Jesper Steen Møller
|
* @author Jesper Steen Møller
|
||||||
*/
|
*/
|
||||||
public abstract class AbstractMetadataExtracterTest extends BaseSpringTest
|
public abstract class AbstractMetadataExtracterTest extends TestCase
|
||||||
{
|
{
|
||||||
|
private static ApplicationContext ctx = ApplicationContextHelper.getApplicationContext();
|
||||||
|
|
||||||
protected static final String QUICK_TITLE = "The quick brown fox jumps over the lazy dog";
|
protected static final String QUICK_TITLE = "The quick brown fox jumps over the lazy dog";
|
||||||
protected static final String QUICK_DESCRIPTION = "Gym class featuring a brown fox and lazy dog";
|
protected static final String QUICK_DESCRIPTION = "Gym class featuring a brown fox and lazy dog";
|
||||||
protected static final String QUICK_CREATOR = "Nevin Nollop";
|
protected static final String QUICK_CREATOR = "Nevin Nollop";
|
||||||
protected static final String[] QUICK_WORDS = new String[] { "quick", "brown", "fox", "jumps", "lazy", "dog" };
|
|
||||||
|
|
||||||
protected MimetypeMap mimetypeMap;
|
protected MimetypeMap mimetypeMap;
|
||||||
protected MetadataExtracter transformer;
|
|
||||||
|
|
||||||
public final void setMimetypeMap(MimetypeMap mimetypeMap)
|
|
||||||
{
|
|
||||||
this.mimetypeMap = mimetypeMap;
|
|
||||||
}
|
|
||||||
|
|
||||||
protected abstract MetadataExtracter getExtracter();
|
protected abstract MetadataExtracter getExtracter();
|
||||||
|
|
||||||
@ -59,8 +56,10 @@ public abstract class AbstractMetadataExtracterTest extends BaseSpringTest
|
|||||||
* Ensures that the temp locations are cleaned out before the tests start
|
* Ensures that the temp locations are cleaned out before the tests start
|
||||||
*/
|
*/
|
||||||
@Override
|
@Override
|
||||||
protected void onSetUpInTransaction() throws Exception
|
public void setUp() throws Exception
|
||||||
{
|
{
|
||||||
|
this.mimetypeMap = (MimetypeMap) ctx.getBean("mimetypeService");
|
||||||
|
|
||||||
// perform a little cleaning up
|
// perform a little cleaning up
|
||||||
long now = System.currentTimeMillis();
|
long now = System.currentTimeMillis();
|
||||||
TempFileProvider.TempFileCleanerJob.removeFiles(now);
|
TempFileProvider.TempFileCleanerJob.removeFiles(now);
|
||||||
@ -73,40 +72,26 @@ public abstract class AbstractMetadataExtracterTest extends BaseSpringTest
|
|||||||
{
|
{
|
||||||
assertNotNull("MimetypeMap not present", mimetypeMap);
|
assertNotNull("MimetypeMap not present", mimetypeMap);
|
||||||
// check that the quick resources are available
|
// check that the quick resources are available
|
||||||
File sourceFile = AbstractMetadataExtracterTest.loadQuickTestFile("txt");
|
File sourceFile = AbstractContentTransformerTest.loadQuickTestFile("txt");
|
||||||
assertNotNull("quick.* files should be available from Tests", sourceFile);
|
assertNotNull("quick.* files should be available from Tests", sourceFile);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
protected void testExtractFromMimetype(String mimetype) throws Exception
|
||||||
* Helper method to load one of the "The quick brown fox" files from the
|
|
||||||
* classpath.
|
|
||||||
*
|
|
||||||
* @param extension the extension of the file required
|
|
||||||
* @return Returns a test resource loaded from the classpath or
|
|
||||||
* <tt>null</tt> if no resource could be found.
|
|
||||||
* @throws IOException
|
|
||||||
*/
|
|
||||||
public static File loadQuickTestFile(String extension) throws IOException
|
|
||||||
{
|
{
|
||||||
URL url = AbstractMetadataExtracterTest.class.getClassLoader().getResource("quick/quick." + extension);
|
Map<QName, Serializable> properties = extractFromMimetype(mimetype);
|
||||||
if (url == null)
|
// check
|
||||||
{
|
testCommonMetadata(mimetype, properties);
|
||||||
return null;
|
|
||||||
}
|
|
||||||
File file = new File(url.getFile());
|
|
||||||
if (!file.exists())
|
|
||||||
{
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
return file;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public Map<QName, Serializable> extractFromExtension(String ext, String mimetype) throws Exception
|
protected Map<QName, Serializable> extractFromMimetype(String mimetype) throws Exception
|
||||||
{
|
{
|
||||||
Map<QName, Serializable> destination = new HashMap<QName, Serializable>();
|
Map<QName, Serializable> properties = new HashMap<QName, Serializable>();
|
||||||
|
|
||||||
|
// get the extension for the mimetype
|
||||||
|
String ext = mimetypeMap.getExtension(mimetype);
|
||||||
|
|
||||||
// attempt to get a source file for each mimetype
|
// attempt to get a source file for each mimetype
|
||||||
File sourceFile = AbstractMetadataExtracterTest.loadQuickTestFile(ext);
|
File sourceFile = AbstractContentTransformerTest.loadQuickTestFile(ext);
|
||||||
if (sourceFile == null)
|
if (sourceFile == null)
|
||||||
{
|
{
|
||||||
throw new FileNotFoundException("No quick." + ext + " file found for test");
|
throw new FileNotFoundException("No quick." + ext + " file found for test");
|
||||||
@ -115,14 +100,17 @@ public abstract class AbstractMetadataExtracterTest extends BaseSpringTest
|
|||||||
// construct a reader onto the source file
|
// construct a reader onto the source file
|
||||||
ContentReader sourceReader = new FileContentReader(sourceFile);
|
ContentReader sourceReader = new FileContentReader(sourceFile);
|
||||||
sourceReader.setMimetype(mimetype);
|
sourceReader.setMimetype(mimetype);
|
||||||
getExtracter().extract(sourceReader, destination);
|
getExtracter().extract(sourceReader, properties);
|
||||||
return destination;
|
return properties;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testCommonMetadata(Map<QName, Serializable> destination)
|
protected void testCommonMetadata(String mimetype, Map<QName, Serializable> properties)
|
||||||
{
|
{
|
||||||
assertEquals(QUICK_TITLE, destination.get(ContentModel.PROP_TITLE));
|
assertEquals(
|
||||||
assertEquals(QUICK_DESCRIPTION, destination.get(ContentModel.PROP_DESCRIPTION));
|
"Property " + ContentModel.PROP_TITLE + " not found for mimetype " + mimetype,
|
||||||
assertEquals(QUICK_CREATOR, destination.get(ContentModel.PROP_AUTHOR));
|
QUICK_TITLE, properties.get(ContentModel.PROP_TITLE));
|
||||||
|
assertEquals(
|
||||||
|
"Property " + ContentModel.PROP_DESCRIPTION + " not found for mimetype " + mimetype,
|
||||||
|
QUICK_DESCRIPTION, properties.get(ContentModel.PROP_DESCRIPTION));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -25,8 +25,10 @@ public class HtmlMetadataExtracterTest extends AbstractMetadataExtracterTest
|
|||||||
{
|
{
|
||||||
private MetadataExtracter extracter;
|
private MetadataExtracter extracter;
|
||||||
|
|
||||||
public void onSetUpInTransaction() throws Exception
|
@Override
|
||||||
|
public void setUp() throws Exception
|
||||||
{
|
{
|
||||||
|
super.setUp();
|
||||||
extracter = new HtmlMetadataExtracter();
|
extracter = new HtmlMetadataExtracter();
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -50,7 +52,6 @@ public class HtmlMetadataExtracterTest extends AbstractMetadataExtracterTest
|
|||||||
|
|
||||||
public void testHtmlExtraction() throws Exception
|
public void testHtmlExtraction() throws Exception
|
||||||
{
|
{
|
||||||
testCommonMetadata(extractFromExtension("html", MimetypeMap.MIMETYPE_HTML));
|
testExtractFromMimetype(MimetypeMap.MIMETYPE_HTML);
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -42,12 +42,14 @@ import org.apache.poi.poifs.eventfilesystem.POIFSReaderListener;
|
|||||||
*/
|
*/
|
||||||
public class OfficeMetadataExtracter extends AbstractMetadataExtracter
|
public class OfficeMetadataExtracter extends AbstractMetadataExtracter
|
||||||
{
|
{
|
||||||
private static String[] mimeTypes = new String[] { MimetypeMap.MIMETYPE_WORD, MimetypeMap.MIMETYPE_EXCEL,
|
public static String[] SUPPORTED_MIMETYPES = new String[] {
|
||||||
MimetypeMap.MIMETYPE_PPT };
|
MimetypeMap.MIMETYPE_WORD,
|
||||||
|
MimetypeMap.MIMETYPE_EXCEL,
|
||||||
|
MimetypeMap.MIMETYPE_PPT };
|
||||||
|
|
||||||
public OfficeMetadataExtracter()
|
public OfficeMetadataExtracter()
|
||||||
{
|
{
|
||||||
super(new HashSet<String>(Arrays.asList(mimeTypes)), 1.0, 1000);
|
super(new HashSet<String>(Arrays.asList(SUPPORTED_MIMETYPES)), 1.0, 1000);
|
||||||
}
|
}
|
||||||
|
|
||||||
public void extractInternal(ContentReader reader, final Map<QName, Serializable> destination) throws Throwable
|
public void extractInternal(ContentReader reader, final Map<QName, Serializable> destination) throws Throwable
|
||||||
|
@ -1,20 +1,19 @@
|
|||||||
package org.alfresco.repo.content.metadata;
|
package org.alfresco.repo.content.metadata;
|
||||||
|
|
||||||
import org.alfresco.repo.content.MimetypeMap;
|
|
||||||
import org.apache.commons.logging.Log;
|
|
||||||
import org.apache.commons.logging.LogFactory;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @see org.alfresco.repo.content.transform.OfficeMetadataExtracter
|
* @see org.alfresco.repo.content.transform.OfficeMetadataExtracter
|
||||||
|
*
|
||||||
* @author Jesper Steen Møller
|
* @author Jesper Steen Møller
|
||||||
*/
|
*/
|
||||||
public class OfficeMetadataExtracterTest extends AbstractMetadataExtracterTest
|
public class OfficeMetadataExtracterTest extends AbstractMetadataExtracterTest
|
||||||
{
|
{
|
||||||
private static final Log logger = LogFactory.getLog(OfficeMetadataExtracterTest.class);
|
|
||||||
private MetadataExtracter extracter;
|
private MetadataExtracter extracter;
|
||||||
|
|
||||||
public void onSetUpInTransaction() throws Exception
|
@Override
|
||||||
|
public void setUp() throws Exception
|
||||||
{
|
{
|
||||||
|
super.setUp();
|
||||||
extracter = new OfficeMetadataExtracter();
|
extracter = new OfficeMetadataExtracter();
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -28,33 +27,21 @@ public class OfficeMetadataExtracterTest extends AbstractMetadataExtracterTest
|
|||||||
|
|
||||||
public void testReliability() throws Exception
|
public void testReliability() throws Exception
|
||||||
{
|
{
|
||||||
double reliability = 0.0;
|
for (String mimetype : OfficeMetadataExtracter.SUPPORTED_MIMETYPES)
|
||||||
reliability = extracter.getReliability(MimetypeMap.MIMETYPE_TEXT_PLAIN);
|
{
|
||||||
assertEquals("Mimetype text should not be supported", 0.0, reliability);
|
double reliability = extracter.getReliability(mimetype);
|
||||||
|
assertTrue("Expected above zero reliability", reliability > 0.0);
|
||||||
reliability = extracter.getReliability(MimetypeMap.MIMETYPE_WORD);
|
}
|
||||||
assertEquals("Word should be supported", 1.0, reliability);
|
|
||||||
|
|
||||||
reliability = extracter.getReliability(MimetypeMap.MIMETYPE_EXCEL);
|
|
||||||
assertEquals("Excel should be supported", 1.0, reliability);
|
|
||||||
|
|
||||||
reliability = extracter.getReliability(MimetypeMap.MIMETYPE_PPT);
|
|
||||||
assertEquals("PowerPoint should be supported", 1.0, reliability);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testWordExtraction() throws Exception
|
/**
|
||||||
|
* Test all the supported mimetypes
|
||||||
|
*/
|
||||||
|
public void testSupportedMimetypes() throws Exception
|
||||||
{
|
{
|
||||||
testCommonMetadata(extractFromExtension("doc", MimetypeMap.MIMETYPE_WORD));
|
for (String mimetype : OfficeMetadataExtracter.SUPPORTED_MIMETYPES)
|
||||||
|
{
|
||||||
|
testExtractFromMimetype(mimetype);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testExcelExtraction() throws Exception
|
|
||||||
{
|
|
||||||
testCommonMetadata(extractFromExtension("xls", MimetypeMap.MIMETYPE_EXCEL));
|
|
||||||
}
|
|
||||||
|
|
||||||
public void testPowerPointExtraction() throws Exception
|
|
||||||
{
|
|
||||||
testCommonMetadata(extractFromExtension("ppt", MimetypeMap.MIMETYPE_PPT));
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -11,8 +11,10 @@ public class PdfBoxMetadataExtracterTest extends AbstractMetadataExtracterTest
|
|||||||
{
|
{
|
||||||
private MetadataExtracter extracter;
|
private MetadataExtracter extracter;
|
||||||
|
|
||||||
public void onSetUpInTransaction() throws Exception
|
@Override
|
||||||
|
public void setUp() throws Exception
|
||||||
{
|
{
|
||||||
|
super.setUp();
|
||||||
extracter = new PdfBoxMetadataExtracter();
|
extracter = new PdfBoxMetadataExtracter();
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -36,6 +38,6 @@ public class PdfBoxMetadataExtracterTest extends AbstractMetadataExtracterTest
|
|||||||
|
|
||||||
public void testPdfExtraction() throws Exception
|
public void testPdfExtraction() throws Exception
|
||||||
{
|
{
|
||||||
testCommonMetadata(extractFromExtension("pdf", MimetypeMap.MIMETYPE_PDF));
|
testExtractFromMimetype(MimetypeMap.MIMETYPE_PDF);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -41,35 +41,30 @@ import com.sun.star.ucb.XFileIdentifierConverter;
|
|||||||
import com.sun.star.uno.UnoRuntime;
|
import com.sun.star.uno.UnoRuntime;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
*
|
|
||||||
* @author Jesper Steen Møller
|
* @author Jesper Steen Møller
|
||||||
*/
|
*/
|
||||||
public class UnoMetadataExtracter extends AbstractMetadataExtracter
|
public class UnoMetadataExtracter extends AbstractMetadataExtracter
|
||||||
{
|
{
|
||||||
private static String[] mimeTypes = new String[] {
|
public static String[] SUPPORTED_MIMETYPES = new String[] {
|
||||||
MimetypeMap.MIMETYPE_OPENDOCUMENT_TEXT,
|
MimetypeMap.MIMETYPE_STAROFFICE5_WRITER,
|
||||||
|
MimetypeMap.MIMETYPE_STAROFFICE5_IMPRESS,
|
||||||
MimetypeMap.MIMETYPE_OPENOFFICE1_WRITER,
|
MimetypeMap.MIMETYPE_OPENOFFICE1_WRITER,
|
||||||
|
MimetypeMap.MIMETYPE_OPENOFFICE1_IMPRESS
|
||||||
// Add the other OpenOffice.org stuff here
|
// Add the other OpenOffice.org stuff here
|
||||||
// In fact, other types may apply as well, but should be counted as lower
|
// In fact, other types may apply as well, but should be counted as lower
|
||||||
// quality since they involve conversion.
|
// quality since they involve conversion.
|
||||||
};
|
};
|
||||||
|
|
||||||
private MimetypeMap mimetypeMap;
|
|
||||||
private String contentUrl;
|
private String contentUrl;
|
||||||
private MyUnoConnection connection;
|
private MyUnoConnection connection;
|
||||||
private boolean isConnected;
|
private boolean isConnected;
|
||||||
|
|
||||||
public UnoMetadataExtracter()
|
public UnoMetadataExtracter()
|
||||||
{
|
{
|
||||||
super(new HashSet<String>(Arrays.asList(mimeTypes)), 1.00, 10000);
|
super(new HashSet<String>(Arrays.asList(SUPPORTED_MIMETYPES)), 1.00, 10000);
|
||||||
this.contentUrl = UnoConnection.DEFAULT_CONNECTION_STRING;
|
this.contentUrl = UnoConnection.DEFAULT_CONNECTION_STRING;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void setMimetypeMap(MimetypeMap mimetypeMap)
|
|
||||||
{
|
|
||||||
this.mimetypeMap = mimetypeMap;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
*
|
*
|
||||||
* @param contentUrl the URL to connect to
|
* @param contentUrl the URL to connect to
|
||||||
@ -115,7 +110,7 @@ public class UnoMetadataExtracter extends AbstractMetadataExtracter
|
|||||||
// create temporary files to convert from and to
|
// create temporary files to convert from and to
|
||||||
File tempFromFile = TempFileProvider.createTempFile(
|
File tempFromFile = TempFileProvider.createTempFile(
|
||||||
"UnoContentTransformer_", "."
|
"UnoContentTransformer_", "."
|
||||||
+ mimetypeMap.getExtension(sourceMimetype));
|
+ getMimetypeService().getExtension(sourceMimetype));
|
||||||
// download the content from the source reader
|
// download the content from the source reader
|
||||||
reader.getContent(tempFromFile);
|
reader.getContent(tempFromFile);
|
||||||
|
|
||||||
|
@ -16,7 +16,6 @@
|
|||||||
*/
|
*/
|
||||||
package org.alfresco.repo.content.metadata;
|
package org.alfresco.repo.content.metadata;
|
||||||
|
|
||||||
import org.alfresco.repo.content.MimetypeMap;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @author Jesper Steen Møller
|
* @author Jesper Steen Møller
|
||||||
@ -25,10 +24,13 @@ public class UnoMetadataExtracterTest extends AbstractMetadataExtracterTest
|
|||||||
{
|
{
|
||||||
private UnoMetadataExtracter extracter;
|
private UnoMetadataExtracter extracter;
|
||||||
|
|
||||||
public void onSetUpInTransaction() throws Exception
|
@Override
|
||||||
|
public void setUp() throws Exception
|
||||||
{
|
{
|
||||||
|
super.setUp();
|
||||||
extracter = new UnoMetadataExtracter();
|
extracter = new UnoMetadataExtracter();
|
||||||
extracter.setMimetypeMap(mimetypeMap);
|
extracter.setMimetypeService(mimetypeMap);
|
||||||
|
extracter.init();
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -46,34 +48,22 @@ public class UnoMetadataExtracterTest extends AbstractMetadataExtracterTest
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
double reliability = 0.0;
|
for (String mimetype : UnoMetadataExtracter.SUPPORTED_MIMETYPES)
|
||||||
reliability = extracter.getReliability(MimetypeMap.MIMETYPE_TEXT_PLAIN);
|
{
|
||||||
assertEquals("Mimetype text should not be supported", 0.0, reliability);
|
double reliability = extracter.getReliability(mimetype);
|
||||||
|
assertTrue("Expected above zero reliability", reliability > 0.0);
|
||||||
reliability = extracter.getReliability(MimetypeMap.MIMETYPE_OPENDOCUMENT_TEXT);
|
}
|
||||||
assertEquals("OpenOffice 2.0 Writer (OpenDoc) should be supported", 1.0, reliability);
|
|
||||||
|
|
||||||
reliability = extracter.getReliability(MimetypeMap.MIMETYPE_OPENOFFICE1_WRITER);
|
|
||||||
assertEquals("OpenOffice 1.0 Writer should be supported", 1.0, reliability);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testOOo20WriterExtraction() throws Exception
|
public void testSupportedMimetypes() throws Exception
|
||||||
{
|
{
|
||||||
if (!extracter.isConnected())
|
if (!extracter.isConnected())
|
||||||
{
|
{
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
for (String mimetype : UnoMetadataExtracter.SUPPORTED_MIMETYPES)
|
||||||
testCommonMetadata(extractFromExtension("odt", MimetypeMap.MIMETYPE_OPENDOCUMENT_TEXT));
|
|
||||||
}
|
|
||||||
|
|
||||||
public void testOOo10WriterExtraction() throws Exception
|
|
||||||
{
|
|
||||||
if (!extracter.isConnected())
|
|
||||||
{
|
{
|
||||||
return;
|
testExtractFromMimetype(mimetype);
|
||||||
}
|
}
|
||||||
|
|
||||||
testCommonMetadata(extractFromExtension("sxw", MimetypeMap.MIMETYPE_OPENOFFICE1_WRITER));
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -113,9 +113,11 @@ public abstract class AbstractContentTransformer implements ContentTransformer
|
|||||||
{
|
{
|
||||||
if (registry == null)
|
if (registry == null)
|
||||||
{
|
{
|
||||||
if (logger.isDebugEnabled())
|
if (registry == null)
|
||||||
{
|
{
|
||||||
logger.debug("No registry assigned. Ignoring auto-registration.");
|
logger.warn("Property 'registry' has not been set. Ignoring auto-registration: \n" +
|
||||||
|
" transformer: " + this);
|
||||||
|
return;
|
||||||
}
|
}
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
@ -245,11 +247,15 @@ public abstract class AbstractContentTransformer implements ContentTransformer
|
|||||||
// check that the reader and writer are both closed
|
// check that the reader and writer are both closed
|
||||||
if (!reader.isClosed())
|
if (!reader.isClosed())
|
||||||
{
|
{
|
||||||
logger.error("Content reader not closed by transformer: \n" + reader);
|
logger.error("Content reader not closed by transformer: \n" +
|
||||||
|
" reader: " + reader + "\n" +
|
||||||
|
" transformer: " + this);
|
||||||
}
|
}
|
||||||
if (!writer.isClosed())
|
if (!writer.isClosed())
|
||||||
{
|
{
|
||||||
logger.error("Content writer not closed by transformer: \n" + writer);
|
logger.error("Content writer not closed by transformer: \n" +
|
||||||
|
" writer: " + writer + "\n" +
|
||||||
|
" transformer: " + this);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user