diff --git a/config/alfresco/content-services-context.xml b/config/alfresco/content-services-context.xml
index 7fd5b8840f..8e639bef00 100644
--- a/config/alfresco/content-services-context.xml
+++ b/config/alfresco/content-services-context.xml
@@ -102,6 +102,9 @@
+
+
+
@@ -110,11 +113,7 @@
-
+
diff --git a/source/java/org/alfresco/repo/content/metadata/AbstractMetadataExtracter.java b/source/java/org/alfresco/repo/content/metadata/AbstractMetadataExtracter.java
index f94cb5a529..3a8b8d4f3e 100644
--- a/source/java/org/alfresco/repo/content/metadata/AbstractMetadataExtracter.java
+++ b/source/java/org/alfresco/repo/content/metadata/AbstractMetadataExtracter.java
@@ -24,6 +24,7 @@ import java.util.Set;
import org.alfresco.error.AlfrescoRuntimeException;
import org.alfresco.service.cmr.repository.ContentIOException;
import org.alfresco.service.cmr.repository.ContentReader;
+import org.alfresco.service.cmr.repository.MimetypeService;
import org.alfresco.service.namespace.QName;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
@@ -36,6 +37,7 @@ abstract public class AbstractMetadataExtracter implements MetadataExtracter
{
private static Log logger = LogFactory.getLog(AbstractMetadataExtracter.class);
+ private MimetypeService mimetypeService;
private MetadataExtracterRegistry registry;
private Set supportedMimetypes;
private double reliability;
@@ -64,6 +66,24 @@ abstract public class AbstractMetadataExtracter implements MetadataExtracter
{
this.registry = registry;
}
+
+ /**
+ * Helper setter of the mimetype service. This is not always required.
+ *
+ * @param mimetypeService
+ */
+ public void setMimetypeService(MimetypeService mimetypeService)
+ {
+ this.mimetypeService = mimetypeService;
+ }
+
+ /**
+ * @return Returns the mimetype helper
+ */
+ protected MimetypeService getMimetypeService()
+ {
+ return mimetypeService;
+ }
/**
* Registers this instance of the extracter with the registry.
@@ -74,7 +94,9 @@ abstract public class AbstractMetadataExtracter implements MetadataExtracter
{
if (registry == null)
{
- throw new IllegalArgumentException("Property 'registry' has not been set");
+ logger.warn("Property 'registry' has not been set. Ignoring auto-registration: \n" +
+ " extracter: " + this);
+ return;
}
registry.register(this);
}
@@ -129,15 +151,17 @@ abstract public class AbstractMetadataExtracter implements MetadataExtracter
catch (Throwable e)
{
throw new ContentIOException("Metadata extraction failed: \n" +
- " reader: " + reader + "\n" +
+ " reader: " + reader,
e);
}
finally
{
- // check that the reader and writer are both closed
+ // check that the reader was closed
if (!reader.isClosed())
{
- logger.error("Content reader not closed by metadata extracter: \n" + reader);
+ logger.error("Content reader not closed by metadata extracter: \n" +
+ " reader: " + reader + "\n" +
+ " extracter: " + this);
}
}
diff --git a/source/java/org/alfresco/repo/content/metadata/AbstractMetadataExtracterTest.java b/source/java/org/alfresco/repo/content/metadata/AbstractMetadataExtracterTest.java
index bcddc285cf..51ead94f88 100644
--- a/source/java/org/alfresco/repo/content/metadata/AbstractMetadataExtracterTest.java
+++ b/source/java/org/alfresco/repo/content/metadata/AbstractMetadataExtracterTest.java
@@ -18,40 +18,37 @@ package org.alfresco.repo.content.metadata;
import java.io.File;
import java.io.FileNotFoundException;
-import java.io.IOException;
import java.io.Serializable;
-import java.net.URL;
import java.util.HashMap;
import java.util.Map;
+import junit.framework.TestCase;
+
import org.alfresco.model.ContentModel;
import org.alfresco.repo.content.MimetypeMap;
import org.alfresco.repo.content.filestore.FileContentReader;
+import org.alfresco.repo.content.transform.AbstractContentTransformerTest;
import org.alfresco.service.cmr.repository.ContentReader;
import org.alfresco.service.namespace.QName;
-import org.alfresco.util.BaseSpringTest;
+import org.alfresco.util.ApplicationContextHelper;
import org.alfresco.util.TempFileProvider;
+import org.springframework.context.ApplicationContext;
/**
- * Provides a base set of tests for testing
- * {@link org.alfresco.repo.content.metadata.MetadataExtracter} implementations.
+ * @see org.alfresco.repo.content.metadata.MetadataExtracter
+ * @see org.alfresco.repo.content.metadata.AbstractMetadataExtracter
*
* @author Jesper Steen Møller
*/
-public abstract class AbstractMetadataExtracterTest extends BaseSpringTest
+public abstract class AbstractMetadataExtracterTest extends TestCase
{
+ private static ApplicationContext ctx = ApplicationContextHelper.getApplicationContext();
+
protected static final String QUICK_TITLE = "The quick brown fox jumps over the lazy dog";
protected static final String QUICK_DESCRIPTION = "Gym class featuring a brown fox and lazy dog";
protected static final String QUICK_CREATOR = "Nevin Nollop";
- protected static final String[] QUICK_WORDS = new String[] { "quick", "brown", "fox", "jumps", "lazy", "dog" };
protected MimetypeMap mimetypeMap;
- protected MetadataExtracter transformer;
-
- public final void setMimetypeMap(MimetypeMap mimetypeMap)
- {
- this.mimetypeMap = mimetypeMap;
- }
protected abstract MetadataExtracter getExtracter();
@@ -59,8 +56,10 @@ public abstract class AbstractMetadataExtracterTest extends BaseSpringTest
* Ensures that the temp locations are cleaned out before the tests start
*/
@Override
- protected void onSetUpInTransaction() throws Exception
+ public void setUp() throws Exception
{
+ this.mimetypeMap = (MimetypeMap) ctx.getBean("mimetypeService");
+
// perform a little cleaning up
long now = System.currentTimeMillis();
TempFileProvider.TempFileCleanerJob.removeFiles(now);
@@ -73,40 +72,26 @@ public abstract class AbstractMetadataExtracterTest extends BaseSpringTest
{
assertNotNull("MimetypeMap not present", mimetypeMap);
// check that the quick resources are available
- File sourceFile = AbstractMetadataExtracterTest.loadQuickTestFile("txt");
+ File sourceFile = AbstractContentTransformerTest.loadQuickTestFile("txt");
assertNotNull("quick.* files should be available from Tests", sourceFile);
}
-
- /**
- * Helper method to load one of the "The quick brown fox" files from the
- * classpath.
- *
- * @param extension the extension of the file required
- * @return Returns a test resource loaded from the classpath or
- * null if no resource could be found.
- * @throws IOException
- */
- public static File loadQuickTestFile(String extension) throws IOException
+
+ protected void testExtractFromMimetype(String mimetype) throws Exception
{
- URL url = AbstractMetadataExtracterTest.class.getClassLoader().getResource("quick/quick." + extension);
- if (url == null)
- {
- return null;
- }
- File file = new File(url.getFile());
- if (!file.exists())
- {
- return null;
- }
- return file;
+ Map properties = extractFromMimetype(mimetype);
+ // check
+ testCommonMetadata(mimetype, properties);
}
- public Map extractFromExtension(String ext, String mimetype) throws Exception
+ protected Map extractFromMimetype(String mimetype) throws Exception
{
- Map destination = new HashMap();
+ Map properties = new HashMap();
+
+ // get the extension for the mimetype
+ String ext = mimetypeMap.getExtension(mimetype);
// attempt to get a source file for each mimetype
- File sourceFile = AbstractMetadataExtracterTest.loadQuickTestFile(ext);
+ File sourceFile = AbstractContentTransformerTest.loadQuickTestFile(ext);
if (sourceFile == null)
{
throw new FileNotFoundException("No quick." + ext + " file found for test");
@@ -115,14 +100,17 @@ public abstract class AbstractMetadataExtracterTest extends BaseSpringTest
// construct a reader onto the source file
ContentReader sourceReader = new FileContentReader(sourceFile);
sourceReader.setMimetype(mimetype);
- getExtracter().extract(sourceReader, destination);
- return destination;
+ getExtracter().extract(sourceReader, properties);
+ return properties;
}
- public void testCommonMetadata(Map destination)
+ protected void testCommonMetadata(String mimetype, Map properties)
{
- assertEquals(QUICK_TITLE, destination.get(ContentModel.PROP_TITLE));
- assertEquals(QUICK_DESCRIPTION, destination.get(ContentModel.PROP_DESCRIPTION));
- assertEquals(QUICK_CREATOR, destination.get(ContentModel.PROP_AUTHOR));
+ assertEquals(
+ "Property " + ContentModel.PROP_TITLE + " not found for mimetype " + mimetype,
+ QUICK_TITLE, properties.get(ContentModel.PROP_TITLE));
+ assertEquals(
+ "Property " + ContentModel.PROP_DESCRIPTION + " not found for mimetype " + mimetype,
+ QUICK_DESCRIPTION, properties.get(ContentModel.PROP_DESCRIPTION));
}
}
diff --git a/source/java/org/alfresco/repo/content/metadata/HtmlMetadataExtracterTest.java b/source/java/org/alfresco/repo/content/metadata/HtmlMetadataExtracterTest.java
index 39f627d3b0..a4ed6efaba 100644
--- a/source/java/org/alfresco/repo/content/metadata/HtmlMetadataExtracterTest.java
+++ b/source/java/org/alfresco/repo/content/metadata/HtmlMetadataExtracterTest.java
@@ -25,8 +25,10 @@ public class HtmlMetadataExtracterTest extends AbstractMetadataExtracterTest
{
private MetadataExtracter extracter;
- public void onSetUpInTransaction() throws Exception
+ @Override
+ public void setUp() throws Exception
{
+ super.setUp();
extracter = new HtmlMetadataExtracter();
}
@@ -50,7 +52,6 @@ public class HtmlMetadataExtracterTest extends AbstractMetadataExtracterTest
public void testHtmlExtraction() throws Exception
{
- testCommonMetadata(extractFromExtension("html", MimetypeMap.MIMETYPE_HTML));
+ testExtractFromMimetype(MimetypeMap.MIMETYPE_HTML);
}
-
}
diff --git a/source/java/org/alfresco/repo/content/metadata/OfficeMetadataExtracter.java b/source/java/org/alfresco/repo/content/metadata/OfficeMetadataExtracter.java
index 9f0917d50e..250f9bdfc2 100644
--- a/source/java/org/alfresco/repo/content/metadata/OfficeMetadataExtracter.java
+++ b/source/java/org/alfresco/repo/content/metadata/OfficeMetadataExtracter.java
@@ -42,12 +42,14 @@ import org.apache.poi.poifs.eventfilesystem.POIFSReaderListener;
*/
public class OfficeMetadataExtracter extends AbstractMetadataExtracter
{
- private static String[] mimeTypes = new String[] { MimetypeMap.MIMETYPE_WORD, MimetypeMap.MIMETYPE_EXCEL,
- MimetypeMap.MIMETYPE_PPT };
+ public static String[] SUPPORTED_MIMETYPES = new String[] {
+ MimetypeMap.MIMETYPE_WORD,
+ MimetypeMap.MIMETYPE_EXCEL,
+ MimetypeMap.MIMETYPE_PPT };
public OfficeMetadataExtracter()
{
- super(new HashSet(Arrays.asList(mimeTypes)), 1.0, 1000);
+ super(new HashSet(Arrays.asList(SUPPORTED_MIMETYPES)), 1.0, 1000);
}
public void extractInternal(ContentReader reader, final Map destination) throws Throwable
diff --git a/source/java/org/alfresco/repo/content/metadata/OfficeMetadataExtracterTest.java b/source/java/org/alfresco/repo/content/metadata/OfficeMetadataExtracterTest.java
index 37f274a7df..2630ee4ab1 100644
--- a/source/java/org/alfresco/repo/content/metadata/OfficeMetadataExtracterTest.java
+++ b/source/java/org/alfresco/repo/content/metadata/OfficeMetadataExtracterTest.java
@@ -1,20 +1,19 @@
package org.alfresco.repo.content.metadata;
-import org.alfresco.repo.content.MimetypeMap;
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
/**
* @see org.alfresco.repo.content.transform.OfficeMetadataExtracter
+ *
* @author Jesper Steen Møller
*/
public class OfficeMetadataExtracterTest extends AbstractMetadataExtracterTest
{
- private static final Log logger = LogFactory.getLog(OfficeMetadataExtracterTest.class);
private MetadataExtracter extracter;
- public void onSetUpInTransaction() throws Exception
+ @Override
+ public void setUp() throws Exception
{
+ super.setUp();
extracter = new OfficeMetadataExtracter();
}
@@ -28,33 +27,21 @@ public class OfficeMetadataExtracterTest extends AbstractMetadataExtracterTest
public void testReliability() throws Exception
{
- double reliability = 0.0;
- reliability = extracter.getReliability(MimetypeMap.MIMETYPE_TEXT_PLAIN);
- assertEquals("Mimetype text should not be supported", 0.0, reliability);
-
- reliability = extracter.getReliability(MimetypeMap.MIMETYPE_WORD);
- assertEquals("Word should be supported", 1.0, reliability);
-
- reliability = extracter.getReliability(MimetypeMap.MIMETYPE_EXCEL);
- assertEquals("Excel should be supported", 1.0, reliability);
-
- reliability = extracter.getReliability(MimetypeMap.MIMETYPE_PPT);
- assertEquals("PowerPoint should be supported", 1.0, reliability);
+ for (String mimetype : OfficeMetadataExtracter.SUPPORTED_MIMETYPES)
+ {
+ double reliability = extracter.getReliability(mimetype);
+ assertTrue("Expected above zero reliability", reliability > 0.0);
+ }
}
- public void testWordExtraction() throws Exception
+ /**
+ * Test all the supported mimetypes
+ */
+ public void testSupportedMimetypes() throws Exception
{
- testCommonMetadata(extractFromExtension("doc", MimetypeMap.MIMETYPE_WORD));
+ for (String mimetype : OfficeMetadataExtracter.SUPPORTED_MIMETYPES)
+ {
+ testExtractFromMimetype(mimetype);
+ }
}
-
- public void testExcelExtraction() throws Exception
- {
- testCommonMetadata(extractFromExtension("xls", MimetypeMap.MIMETYPE_EXCEL));
- }
-
- public void testPowerPointExtraction() throws Exception
- {
- testCommonMetadata(extractFromExtension("ppt", MimetypeMap.MIMETYPE_PPT));
- }
-
}
diff --git a/source/java/org/alfresco/repo/content/metadata/PdfBoxMetadataExtracterTest.java b/source/java/org/alfresco/repo/content/metadata/PdfBoxMetadataExtracterTest.java
index ddb3dd91cf..6b82efa45e 100644
--- a/source/java/org/alfresco/repo/content/metadata/PdfBoxMetadataExtracterTest.java
+++ b/source/java/org/alfresco/repo/content/metadata/PdfBoxMetadataExtracterTest.java
@@ -11,8 +11,10 @@ public class PdfBoxMetadataExtracterTest extends AbstractMetadataExtracterTest
{
private MetadataExtracter extracter;
- public void onSetUpInTransaction() throws Exception
+ @Override
+ public void setUp() throws Exception
{
+ super.setUp();
extracter = new PdfBoxMetadataExtracter();
}
@@ -36,6 +38,6 @@ public class PdfBoxMetadataExtracterTest extends AbstractMetadataExtracterTest
public void testPdfExtraction() throws Exception
{
- testCommonMetadata(extractFromExtension("pdf", MimetypeMap.MIMETYPE_PDF));
+ testExtractFromMimetype(MimetypeMap.MIMETYPE_PDF);
}
}
diff --git a/source/java/org/alfresco/repo/content/metadata/UnoMetadataExtracter.java b/source/java/org/alfresco/repo/content/metadata/UnoMetadataExtracter.java
index 5364f668b2..465d10e759 100644
--- a/source/java/org/alfresco/repo/content/metadata/UnoMetadataExtracter.java
+++ b/source/java/org/alfresco/repo/content/metadata/UnoMetadataExtracter.java
@@ -41,35 +41,30 @@ import com.sun.star.ucb.XFileIdentifierConverter;
import com.sun.star.uno.UnoRuntime;
/**
- *
* @author Jesper Steen Møller
*/
public class UnoMetadataExtracter extends AbstractMetadataExtracter
{
- private static String[] mimeTypes = new String[] {
- MimetypeMap.MIMETYPE_OPENDOCUMENT_TEXT,
+ public static String[] SUPPORTED_MIMETYPES = new String[] {
+ MimetypeMap.MIMETYPE_STAROFFICE5_WRITER,
+ MimetypeMap.MIMETYPE_STAROFFICE5_IMPRESS,
MimetypeMap.MIMETYPE_OPENOFFICE1_WRITER,
+ MimetypeMap.MIMETYPE_OPENOFFICE1_IMPRESS
// Add the other OpenOffice.org stuff here
// In fact, other types may apply as well, but should be counted as lower
// quality since they involve conversion.
};
- private MimetypeMap mimetypeMap;
private String contentUrl;
private MyUnoConnection connection;
private boolean isConnected;
public UnoMetadataExtracter()
{
- super(new HashSet(Arrays.asList(mimeTypes)), 1.00, 10000);
+ super(new HashSet(Arrays.asList(SUPPORTED_MIMETYPES)), 1.00, 10000);
this.contentUrl = UnoConnection.DEFAULT_CONNECTION_STRING;
}
- public void setMimetypeMap(MimetypeMap mimetypeMap)
- {
- this.mimetypeMap = mimetypeMap;
- }
-
/**
*
* @param contentUrl the URL to connect to
@@ -115,7 +110,7 @@ public class UnoMetadataExtracter extends AbstractMetadataExtracter
// create temporary files to convert from and to
File tempFromFile = TempFileProvider.createTempFile(
"UnoContentTransformer_", "."
- + mimetypeMap.getExtension(sourceMimetype));
+ + getMimetypeService().getExtension(sourceMimetype));
// download the content from the source reader
reader.getContent(tempFromFile);
diff --git a/source/java/org/alfresco/repo/content/metadata/UnoMetadataExtracterTest.java b/source/java/org/alfresco/repo/content/metadata/UnoMetadataExtracterTest.java
index 474d9f7700..f43013c2a3 100644
--- a/source/java/org/alfresco/repo/content/metadata/UnoMetadataExtracterTest.java
+++ b/source/java/org/alfresco/repo/content/metadata/UnoMetadataExtracterTest.java
@@ -16,7 +16,6 @@
*/
package org.alfresco.repo.content.metadata;
-import org.alfresco.repo.content.MimetypeMap;
/**
* @author Jesper Steen Møller
@@ -25,10 +24,13 @@ public class UnoMetadataExtracterTest extends AbstractMetadataExtracterTest
{
private UnoMetadataExtracter extracter;
- public void onSetUpInTransaction() throws Exception
+ @Override
+ public void setUp() throws Exception
{
+ super.setUp();
extracter = new UnoMetadataExtracter();
- extracter.setMimetypeMap(mimetypeMap);
+ extracter.setMimetypeService(mimetypeMap);
+ extracter.init();
}
/**
@@ -46,34 +48,22 @@ public class UnoMetadataExtracterTest extends AbstractMetadataExtracterTest
return;
}
- double reliability = 0.0;
- reliability = extracter.getReliability(MimetypeMap.MIMETYPE_TEXT_PLAIN);
- assertEquals("Mimetype text should not be supported", 0.0, reliability);
-
- reliability = extracter.getReliability(MimetypeMap.MIMETYPE_OPENDOCUMENT_TEXT);
- assertEquals("OpenOffice 2.0 Writer (OpenDoc) should be supported", 1.0, reliability);
-
- reliability = extracter.getReliability(MimetypeMap.MIMETYPE_OPENOFFICE1_WRITER);
- assertEquals("OpenOffice 1.0 Writer should be supported", 1.0, reliability);
+ for (String mimetype : UnoMetadataExtracter.SUPPORTED_MIMETYPES)
+ {
+ double reliability = extracter.getReliability(mimetype);
+ assertTrue("Expected above zero reliability", reliability > 0.0);
+ }
}
- public void testOOo20WriterExtraction() throws Exception
+ public void testSupportedMimetypes() throws Exception
{
if (!extracter.isConnected())
{
return;
}
-
- testCommonMetadata(extractFromExtension("odt", MimetypeMap.MIMETYPE_OPENDOCUMENT_TEXT));
- }
-
- public void testOOo10WriterExtraction() throws Exception
- {
- if (!extracter.isConnected())
+ for (String mimetype : UnoMetadataExtracter.SUPPORTED_MIMETYPES)
{
- return;
+ testExtractFromMimetype(mimetype);
}
-
- testCommonMetadata(extractFromExtension("sxw", MimetypeMap.MIMETYPE_OPENOFFICE1_WRITER));
}
}
diff --git a/source/java/org/alfresco/repo/content/transform/AbstractContentTransformer.java b/source/java/org/alfresco/repo/content/transform/AbstractContentTransformer.java
index 934009cd08..e355888e5c 100644
--- a/source/java/org/alfresco/repo/content/transform/AbstractContentTransformer.java
+++ b/source/java/org/alfresco/repo/content/transform/AbstractContentTransformer.java
@@ -113,9 +113,11 @@ public abstract class AbstractContentTransformer implements ContentTransformer
{
if (registry == null)
{
- if (logger.isDebugEnabled())
+ if (registry == null)
{
- logger.debug("No registry assigned. Ignoring auto-registration.");
+ logger.warn("Property 'registry' has not been set. Ignoring auto-registration: \n" +
+ " transformer: " + this);
+ return;
}
return;
}
@@ -245,11 +247,15 @@ public abstract class AbstractContentTransformer implements ContentTransformer
// check that the reader and writer are both closed
if (!reader.isClosed())
{
- logger.error("Content reader not closed by transformer: \n" + reader);
+ logger.error("Content reader not closed by transformer: \n" +
+ " reader: " + reader + "\n" +
+ " transformer: " + this);
}
if (!writer.isClosed())
{
- logger.error("Content writer not closed by transformer: \n" + writer);
+ logger.error("Content writer not closed by transformer: \n" +
+ " writer: " + writer + "\n" +
+ " transformer: " + this);
}
}