diff --git a/config/alfresco/content-services-context.xml b/config/alfresco/content-services-context.xml
index 7a97118f04..7fd5b8840f 100644
--- a/config/alfresco/content-services-context.xml
+++ b/config/alfresco/content-services-context.xml
@@ -87,27 +87,35 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
@@ -116,9 +124,7 @@
-
+
mimetypes;
+ private static Log logger = LogFactory.getLog(AbstractMetadataExtracter.class);
+
+ private MetadataExtracterRegistry registry;
+ private Set supportedMimetypes;
private double reliability;
private long extractionTime;
- protected AbstractMetadataExtracter(String mimetype, double reliability, long extractionTime)
+ protected AbstractMetadataExtracter(String supportedMimetype, double reliability, long extractionTime)
{
- this.mimetypes = Collections.singleton(mimetype);
+ this.supportedMimetypes = Collections.singleton(supportedMimetype);
this.reliability = reliability;
this.extractionTime = extractionTime;
}
- protected AbstractMetadataExtracter(Set mimetypes, double reliability, long extractionTime)
+ protected AbstractMetadataExtracter(Set supportedMimetypes, double reliability, long extractionTime)
{
- this.mimetypes = mimetypes;
+ this.supportedMimetypes = supportedMimetypes;
this.reliability = reliability;
this.extractionTime = extractionTime;
}
- public double getReliability(String sourceMimetype)
+ /**
+ * Set the registry to register with
+ *
+ * @param registry a metadata extracter registry
+ */
+ public void setRegistry(MetadataExtracterRegistry registry)
{
- if (mimetypes.contains(sourceMimetype))
+ this.registry = registry;
+ }
+
+ /**
+ * Registers this instance of the extracter with the registry.
+ *
+ * @see #setRegistry(MetadataExtracterRegistry)
+ */
+ public void register()
+ {
+ if (registry == null)
+ {
+ throw new IllegalArgumentException("Property 'registry' has not been set");
+ }
+ registry.register(this);
+ }
+
+ /**
+ * Default reliability check that returns the reliability as configured by the contstructor
+ * if the mimetype is in the list of supported mimetypes.
+ *
+ * @param mimetype the mimetype to check
+ */
+ public double getReliability(String mimetype)
+ {
+ if (supportedMimetypes.contains(mimetype))
return reliability;
else
return 0.0;
@@ -60,7 +97,69 @@ abstract public class AbstractMetadataExtracter implements MetadataExtracter
{
return extractionTime;
}
+
+ /**
+ * Checks if the mimetype is supported.
+ *
+ * @param reader the reader to check
+ * @throws AlfrescoRuntimeException if the mimetype is not supported
+ */
+ protected void checkReliability(ContentReader reader)
+ {
+ String mimetype = reader.getMimetype();
+ if (getReliability(mimetype) <= 0.0)
+ {
+ throw new AlfrescoRuntimeException(
+ "Metadata extracter does not support mimetype: \n" +
+ " reader: " + reader + "\n" +
+ " supported: " + supportedMimetypes + "\n" +
+ " extracter: " + this);
+ }
+ }
+ public final void extract(ContentReader reader, Map destination) throws ContentIOException
+ {
+ // check the reliability
+ checkReliability(reader);
+
+ try
+ {
+ extractInternal(reader, destination);
+ }
+ catch (Throwable e)
+ {
+ throw new ContentIOException("Metadata extraction failed: \n" +
+ " reader: " + reader + "\n" +
+ e);
+ }
+ finally
+ {
+ // check that the reader and writer are both closed
+ if (!reader.isClosed())
+ {
+ logger.error("Content reader not closed by metadata extracter: \n" + reader);
+ }
+ }
+
+ // done
+ if (logger.isDebugEnabled())
+ {
+ logger.debug("Completed metadata extraction: \n" +
+ " reader: " + reader + "\n" +
+ " extracter: " + this);
+ }
+ }
+
+ /**
+ * Override to provide the necessary extraction logic. Implementations must ensure that the reader
+ * is closed before the method exits.
+ *
+ * @param reader the source of the content
+ * @param destination the property map to fill
+ * @throws Throwable an exception
+ */
+ protected abstract void extractInternal(ContentReader reader, Map destination) throws Throwable;
+
/**
* Examines a value or string for nulls and adds it to the map (if
* non-empty)
diff --git a/source/java/org/alfresco/repo/content/metadata/HtmlMetadataExtracter.java b/source/java/org/alfresco/repo/content/metadata/HtmlMetadataExtracter.java
index 84021d805f..63b731e3c2 100644
--- a/source/java/org/alfresco/repo/content/metadata/HtmlMetadataExtracter.java
+++ b/source/java/org/alfresco/repo/content/metadata/HtmlMetadataExtracter.java
@@ -16,7 +16,6 @@
*/
package org.alfresco.repo.content.metadata;
-import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
@@ -34,7 +33,6 @@ import javax.swing.text.html.parser.ParserDelegator;
import org.alfresco.model.ContentModel;
import org.alfresco.repo.content.MimetypeMap;
-import org.alfresco.service.cmr.repository.ContentIOException;
import org.alfresco.service.cmr.repository.ContentReader;
import org.alfresco.service.namespace.QName;
@@ -56,122 +54,116 @@ public class HtmlMetadataExtracter extends AbstractMetadataExtracter
super(MIMETYPES, 1.0, 1000);
}
- public void extract(ContentReader reader, Map destination) throws ContentIOException
+ public void extractInternal(ContentReader reader, Map destination) throws Throwable
{
final Map tempDestination = new HashMap();
- try
+
+ HTMLEditorKit.ParserCallback callback = new HTMLEditorKit.ParserCallback()
{
- HTMLEditorKit.ParserCallback callback = new HTMLEditorKit.ParserCallback()
+ StringBuffer title = null;
+ boolean inHead = false;
+
+ public void handleText(char[] data, int pos)
{
- StringBuffer title = null;
- boolean inHead = false;
-
- public void handleText(char[] data, int pos)
+ if (title != null)
{
- if (title != null)
- {
- title.append(data);
- }
- }
-
- public void handleComment(char[] data, int pos)
- {
- // Perhaps sniff for Office 9+ metadata in here?
- }
-
- public void handleStartTag(HTML.Tag t, MutableAttributeSet a, int pos)
- {
- if (HTML.Tag.HEAD.equals(t))
- {
- inHead = true;
- }
- else if (HTML.Tag.TITLE.equals(t) && inHead)
- {
- title = new StringBuffer();
- }
- else
- handleSimpleTag(t, a, pos);
- }
-
- public void handleEndTag(HTML.Tag t, int pos)
- {
- if (HTML.Tag.HEAD.equals(t))
- {
- inHead = false;
- }
- else if (HTML.Tag.TITLE.equals(t) && title != null)
- {
- trimPut(ContentModel.PROP_TITLE, title.toString(), tempDestination);
- title = null;
- }
- }
-
- public void handleSimpleTag(HTML.Tag t, MutableAttributeSet a, int pos)
- {
- if (HTML.Tag.META.equals(t))
- {
- Object nameO = a.getAttribute(HTML.Attribute.NAME);
- Object valueO = a.getAttribute(HTML.Attribute.CONTENT);
- if (nameO == null || valueO == null)
- return;
-
- String name = nameO.toString();
-
- if (name.equalsIgnoreCase("creator") || name.equalsIgnoreCase("author")
- || name.equalsIgnoreCase("dc.creator"))
- {
- trimPut(ContentModel.PROP_AUTHOR, valueO, tempDestination);
- }
- if (name.equalsIgnoreCase("description") || name.equalsIgnoreCase("dc.description"))
- {
- trimPut(ContentModel.PROP_DESCRIPTION, valueO, tempDestination);
- }
- }
- }
-
- public void handleError(String errorMsg, int pos)
- {
- }
- };
-
- String charsetGuess = "UTF-8";
- int tries = 0;
- while (tries < 3)
- {
- tempDestination.clear();
- Reader r = null;
- InputStream cis = null;
- try
- {
- cis = reader.getContentInputStream();
- // TODO: for now, use default charset; we should attempt to map from html meta-data
- r = new InputStreamReader(cis);
- HTMLEditorKit.Parser parser = new ParserDelegator();
- parser.parse(r, callback, tries > 0);
- destination.putAll(tempDestination);
- break;
- }
- catch (ChangedCharSetException ccse)
- {
- tries++;
- charsetGuess = ccse.getCharSetSpec();
- int begin = charsetGuess.indexOf("charset=");
- if (begin > 0)
- charsetGuess = charsetGuess.substring(begin + 8, charsetGuess.length());
- reader = reader.getReader();
- }
- finally
- {
- if (r != null)
- r.close();
- if (cis != null)
- cis.close();
+ title.append(data);
}
}
- }
- catch (IOException e)
+
+ public void handleComment(char[] data, int pos)
+ {
+ // Perhaps sniff for Office 9+ metadata in here?
+ }
+
+ public void handleStartTag(HTML.Tag t, MutableAttributeSet a, int pos)
+ {
+ if (HTML.Tag.HEAD.equals(t))
+ {
+ inHead = true;
+ }
+ else if (HTML.Tag.TITLE.equals(t) && inHead)
+ {
+ title = new StringBuffer();
+ }
+ else
+ handleSimpleTag(t, a, pos);
+ }
+
+ public void handleEndTag(HTML.Tag t, int pos)
+ {
+ if (HTML.Tag.HEAD.equals(t))
+ {
+ inHead = false;
+ }
+ else if (HTML.Tag.TITLE.equals(t) && title != null)
+ {
+ trimPut(ContentModel.PROP_TITLE, title.toString(), tempDestination);
+ title = null;
+ }
+ }
+
+ public void handleSimpleTag(HTML.Tag t, MutableAttributeSet a, int pos)
+ {
+ if (HTML.Tag.META.equals(t))
+ {
+ Object nameO = a.getAttribute(HTML.Attribute.NAME);
+ Object valueO = a.getAttribute(HTML.Attribute.CONTENT);
+ if (nameO == null || valueO == null)
+ return;
+
+ String name = nameO.toString();
+
+ if (name.equalsIgnoreCase("creator") || name.equalsIgnoreCase("author")
+ || name.equalsIgnoreCase("dc.creator"))
+ {
+ trimPut(ContentModel.PROP_AUTHOR, valueO, tempDestination);
+ }
+ if (name.equalsIgnoreCase("description") || name.equalsIgnoreCase("dc.description"))
+ {
+ trimPut(ContentModel.PROP_DESCRIPTION, valueO, tempDestination);
+ }
+ }
+ }
+
+ public void handleError(String errorMsg, int pos)
+ {
+ }
+ };
+
+ String charsetGuess = "UTF-8";
+ int tries = 0;
+ while (tries < 3)
{
- throw new ContentIOException("HTML metadata extraction failed: \n" + " reader: " + reader, e);
+ tempDestination.clear();
+ Reader r = null;
+ InputStream cis = null;
+ try
+ {
+ cis = reader.getContentInputStream();
+ // TODO: for now, use default charset; we should attempt to map from html meta-data
+ r = new InputStreamReader(cis);
+ HTMLEditorKit.Parser parser = new ParserDelegator();
+ parser.parse(r, callback, tries > 0);
+ destination.putAll(tempDestination);
+ break;
+ }
+ catch (ChangedCharSetException ccse)
+ {
+ tries++;
+ charsetGuess = ccse.getCharSetSpec();
+ int begin = charsetGuess.indexOf("charset=");
+ if (begin > 0)
+ charsetGuess = charsetGuess.substring(begin + 8, charsetGuess.length());
+ reader = reader.getReader();
+ }
+ finally
+ {
+ if (r != null)
+ r.close();
+ if (cis != null)
+ cis.close();
+ }
}
}
}
diff --git a/source/java/org/alfresco/repo/content/metadata/HtmlMetadataExtracterTest.java b/source/java/org/alfresco/repo/content/metadata/HtmlMetadataExtracterTest.java
index 49acfb2026..39f627d3b0 100644
--- a/source/java/org/alfresco/repo/content/metadata/HtmlMetadataExtracterTest.java
+++ b/source/java/org/alfresco/repo/content/metadata/HtmlMetadataExtracterTest.java
@@ -17,16 +17,12 @@
package org.alfresco.repo.content.metadata;
import org.alfresco.repo.content.MimetypeMap;
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
/**
- * @see org.alfresco.repo.content.transform.OfficeMetadataExtracter
* @author Jesper Steen Møller
*/
public class HtmlMetadataExtracterTest extends AbstractMetadataExtracterTest
{
- private static final Log logger = LogFactory.getLog(HtmlMetadataExtracterTest.class);
private MetadataExtracter extracter;
public void onSetUpInTransaction() throws Exception
diff --git a/source/java/org/alfresco/repo/content/metadata/MP3MetadataExtracter.java b/source/java/org/alfresco/repo/content/metadata/MP3MetadataExtracter.java
index e0ced6bb33..e24e6bd19c 100644
--- a/source/java/org/alfresco/repo/content/metadata/MP3MetadataExtracter.java
+++ b/source/java/org/alfresco/repo/content/metadata/MP3MetadataExtracter.java
@@ -17,20 +17,17 @@
package org.alfresco.repo.content.metadata;
import java.io.File;
-import java.io.IOException;
import java.io.Serializable;
import java.util.HashMap;
import java.util.Map;
import org.alfresco.model.ContentModel;
import org.alfresco.repo.content.MimetypeMap;
-import org.alfresco.service.cmr.repository.ContentIOException;
import org.alfresco.service.cmr.repository.ContentReader;
import org.alfresco.service.namespace.QName;
-import org.alfresco.util.GUID;
+import org.alfresco.util.TempFileProvider;
import org.farng.mp3.AbstractMP3FragmentBody;
import org.farng.mp3.MP3File;
-import org.farng.mp3.TagException;
import org.farng.mp3.id3.AbstractID3v2;
import org.farng.mp3.id3.AbstractID3v2Frame;
import org.farng.mp3.id3.ID3v1;
@@ -58,103 +55,88 @@ public class MP3MetadataExtracter extends AbstractMetadataExtracter
super(MimetypeMap.MIMETYPE_MP3, 1.0, 1000);
}
- /**
- * @see org.alfresco.repo.content.metadata.MetadataExtracter#extract(org.alfresco.service.cmr.repository.ContentReader, java.util.Map)
- */
- public void extract(ContentReader reader,
- Map destination) throws ContentIOException
+ public void extractInternal(
+ ContentReader reader,
+ Map destination) throws Throwable
{
+ Map props = new HashMap();
+
+ // Create a temp file
+ File tempFile = TempFileProvider.createTempFile("MP3MetadataExtracter_", ".tmp");
try
{
- Map props = new HashMap();
+ reader.getContent(tempFile);
- // Create a temp file
- File tempFile = File.createTempFile(GUID.generate(), ".tmp");
- try
+ // Create the MP3 object from the file
+ MP3File mp3File = new MP3File(tempFile);
+
+ ID3v1 id3v1 = mp3File.getID3v1Tag();
+ if (id3v1 != null)
{
- reader.getContent(tempFile);
+ setTagValue(props, PROP_ALBUM_TITLE, id3v1.getAlbum());
+ setTagValue(props, PROP_SONG_TITLE, id3v1.getTitle());
+ setTagValue(props, PROP_ARTIST, id3v1.getArtist());
+ setTagValue(props, PROP_COMMENT, id3v1.getComment());
+ setTagValue(props, PROP_YEAR_RELEASED, id3v1.getYear());
- // Create the MP3 object from the file
- MP3File mp3File = new MP3File(tempFile);
+ // TODO sort out the genre
+ //setTagValue(props, MusicModel.PROP_GENRE, id3v1.getGenre());
- ID3v1 id3v1 = mp3File.getID3v1Tag();
- if (id3v1 != null)
- {
- setTagValue(props, PROP_ALBUM_TITLE, id3v1.getAlbum());
- setTagValue(props, PROP_SONG_TITLE, id3v1.getTitle());
- setTagValue(props, PROP_ARTIST, id3v1.getArtist());
- setTagValue(props, PROP_COMMENT, id3v1.getComment());
- setTagValue(props, PROP_YEAR_RELEASED, id3v1.getYear());
-
- // TODO sort out the genre
- //setTagValue(props, MusicModel.PROP_GENRE, id3v1.getGenre());
-
- // TODO sort out the size
- //setTagValue(props, MusicModel.PROP_SIZE, id3v1.getSize());
- }
-
- AbstractID3v2 id3v2 = mp3File.getID3v2Tag();
- if (id3v2 != null)
- {
- setTagValue(props, PROP_SONG_TITLE, getID3V2Value(id3v2, "TIT2"));
- setTagValue(props, PROP_ARTIST, getID3V2Value(id3v2, "TPE1"));
- setTagValue(props, PROP_ALBUM_TITLE, getID3V2Value(id3v2, "TALB"));
- setTagValue(props, PROP_YEAR_RELEASED, getID3V2Value(id3v2, "TDRC"));
- setTagValue(props, PROP_COMMENT, getID3V2Value(id3v2, "COMM"));
- setTagValue(props, PROP_TRACK_NUMBER, getID3V2Value(id3v2, "TRCK"));
- setTagValue(props, PROP_GENRE, getID3V2Value(id3v2, "TCON"));
- setTagValue(props, PROP_COMPOSER, getID3V2Value(id3v2, "TCOM"));
-
- // TODO sort out the lyrics
- //System.out.println("Lyrics: " + getID3V2Value(id3v2, "SYLT"));
- //System.out.println("Lyrics: " + getID3V2Value(id3v2, "USLT"));
- }
-
- AbstractLyrics3 lyrics3Tag = mp3File.getLyrics3Tag();
- if (lyrics3Tag != null)
- {
- System.out.println("Lyrics3 tag found.");
- if (lyrics3Tag instanceof Lyrics3v2)
- {
- setTagValue(props, PROP_SONG_TITLE, getLyrics3v2Value((Lyrics3v2)lyrics3Tag, "TIT2"));
- setTagValue(props, PROP_ARTIST, getLyrics3v2Value((Lyrics3v2)lyrics3Tag, "TPE1"));
- setTagValue(props, PROP_ALBUM_TITLE, getLyrics3v2Value((Lyrics3v2)lyrics3Tag, "TALB"));
- setTagValue(props, PROP_COMMENT, getLyrics3v2Value((Lyrics3v2)lyrics3Tag, "COMM"));
- setTagValue(props, PROP_LYRICS, getLyrics3v2Value((Lyrics3v2)lyrics3Tag, "SYLT"));
- setTagValue(props, PROP_COMPOSER, getLyrics3v2Value((Lyrics3v2)lyrics3Tag, "TCOM"));
- }
- }
-
- }
- finally
- {
- tempFile.delete();
+ // TODO sort out the size
+ //setTagValue(props, MusicModel.PROP_SIZE, id3v1.getSize());
}
- // Set the destination values
- if (props.get(PROP_SONG_TITLE) != null)
+ AbstractID3v2 id3v2 = mp3File.getID3v2Tag();
+ if (id3v2 != null)
{
- destination.put(ContentModel.PROP_TITLE, props.get(PROP_SONG_TITLE));
+ setTagValue(props, PROP_SONG_TITLE, getID3V2Value(id3v2, "TIT2"));
+ setTagValue(props, PROP_ARTIST, getID3V2Value(id3v2, "TPE1"));
+ setTagValue(props, PROP_ALBUM_TITLE, getID3V2Value(id3v2, "TALB"));
+ setTagValue(props, PROP_YEAR_RELEASED, getID3V2Value(id3v2, "TDRC"));
+ setTagValue(props, PROP_COMMENT, getID3V2Value(id3v2, "COMM"));
+ setTagValue(props, PROP_TRACK_NUMBER, getID3V2Value(id3v2, "TRCK"));
+ setTagValue(props, PROP_GENRE, getID3V2Value(id3v2, "TCON"));
+ setTagValue(props, PROP_COMPOSER, getID3V2Value(id3v2, "TCOM"));
+
+ // TODO sort out the lyrics
+ //System.out.println("Lyrics: " + getID3V2Value(id3v2, "SYLT"));
+ //System.out.println("Lyrics: " + getID3V2Value(id3v2, "USLT"));
}
- if (props.get(PROP_ARTIST) != null)
+
+ AbstractLyrics3 lyrics3Tag = mp3File.getLyrics3Tag();
+ if (lyrics3Tag != null)
{
- destination.put(ContentModel.PROP_AUTHOR, props.get(PROP_ARTIST));
+ System.out.println("Lyrics3 tag found.");
+ if (lyrics3Tag instanceof Lyrics3v2)
+ {
+ setTagValue(props, PROP_SONG_TITLE, getLyrics3v2Value((Lyrics3v2)lyrics3Tag, "TIT2"));
+ setTagValue(props, PROP_ARTIST, getLyrics3v2Value((Lyrics3v2)lyrics3Tag, "TPE1"));
+ setTagValue(props, PROP_ALBUM_TITLE, getLyrics3v2Value((Lyrics3v2)lyrics3Tag, "TALB"));
+ setTagValue(props, PROP_COMMENT, getLyrics3v2Value((Lyrics3v2)lyrics3Tag, "COMM"));
+ setTagValue(props, PROP_LYRICS, getLyrics3v2Value((Lyrics3v2)lyrics3Tag, "SYLT"));
+ setTagValue(props, PROP_COMPOSER, getLyrics3v2Value((Lyrics3v2)lyrics3Tag, "TCOM"));
+ }
}
- String description = getDescription(props);
- if (description != null)
- {
- destination.put(ContentModel.PROP_DESCRIPTION, description);
- }
- }
- catch (IOException ioException)
- {
- // TODO sort out exception handling
- throw new RuntimeException("Error reading mp3 file.", ioException);
+
}
- catch (TagException tagException)
+ finally
{
- // TODO sort out exception handling
- throw new RuntimeException("Error reading mp3 tag information.", tagException);
+ tempFile.delete();
+ }
+
+ // Set the destination values
+ if (props.get(PROP_SONG_TITLE) != null)
+ {
+ destination.put(ContentModel.PROP_TITLE, props.get(PROP_SONG_TITLE));
+ }
+ if (props.get(PROP_ARTIST) != null)
+ {
+ destination.put(ContentModel.PROP_AUTHOR, props.get(PROP_ARTIST));
+ }
+ String description = getDescription(props);
+ if (description != null)
+ {
+ destination.put(ContentModel.PROP_DESCRIPTION, description);
}
}
diff --git a/source/java/org/alfresco/repo/content/metadata/MetadataExtracterRegistry.java b/source/java/org/alfresco/repo/content/metadata/MetadataExtracterRegistry.java
index 53940a390a..0a3fd4fe1a 100644
--- a/source/java/org/alfresco/repo/content/metadata/MetadataExtracterRegistry.java
+++ b/source/java/org/alfresco/repo/content/metadata/MetadataExtracterRegistry.java
@@ -16,7 +16,7 @@
*/
package org.alfresco.repo.content.metadata;
-import java.util.Collections;
+import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
@@ -28,7 +28,6 @@ import org.alfresco.error.AlfrescoRuntimeException;
import org.alfresco.repo.content.MimetypeMap;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
-import org.springframework.util.Assert;
/**
* Holds and provides the most appropriate metadate extracter for a particular
@@ -52,15 +51,10 @@ public class MetadataExtracterRegistry
/** controls write access to the cache */
private Lock extracterCacheWriteLock;
- /**
- * @param mimetypeMap all the mimetypes available to the system
- */
- public MetadataExtracterRegistry(MimetypeMap mimetypeMap)
+ public MetadataExtracterRegistry()
{
- Assert.notNull(mimetypeMap, "The MimetypeMap is mandatory");
- this.mimetypeMap = mimetypeMap;
-
- extracters = Collections.emptyList(); // just in case it isn't set
+ // initialise lists
+ extracters = new ArrayList(10);
extracterCache = new HashMap(17);
// create lock objects for access to the cache
@@ -69,6 +63,40 @@ public class MetadataExtracterRegistry
extracterCacheWriteLock = extractionCacheLock.writeLock();
}
+ /**
+ * The mimetype map that will be used to check requests against
+ *
+ * @param mimetypeMap a map of mimetypes
+ */
+ public void setMimetypeMap(MimetypeMap mimetypeMap)
+ {
+ this.mimetypeMap = mimetypeMap;
+ }
+
+ /**
+ * Register an instance of an extracter for use
+ *
+ * @param extracter an extracter
+ */
+ public void register(MetadataExtracter extracter)
+ {
+ if (logger.isDebugEnabled())
+ {
+ logger.debug("Registering metadata extracter: " + extracter);
+ }
+
+ extracterCacheWriteLock.lock();
+ try
+ {
+ extracters.add(extracter);
+ extracterCache.clear();
+ }
+ finally
+ {
+ extracterCacheWriteLock.unlock();
+ }
+ }
+
/**
* Gets the best metadata extracter. This is a combination of the most
* reliable and the most performant extracter.
@@ -123,8 +151,8 @@ public class MetadataExtracterRegistry
/**
* @param sourceMimetype The MIME type under examination
- * @return The fastest of the most reliable extracters in
- * extracters
for the given MIME type.
+ * @return The fastest of the most reliable extracters in extracters
+ * for the given MIME type, or null if none is available.
*/
private MetadataExtracter findBestExtracter(String sourceMimetype)
{
@@ -137,7 +165,12 @@ public class MetadataExtracterRegistry
for (MetadataExtracter ext : extracters)
{
double r = ext.getReliability(sourceMimetype);
- if (r == bestReliability)
+ if (r <= 0.0)
+ {
+ // extraction not achievable
+ continue;
+ }
+ else if (r == bestReliability)
{
long time = ext.getExtractionTime();
if (time < bestTime)
@@ -155,26 +188,4 @@ public class MetadataExtracterRegistry
}
return bestExtracter;
}
-
- /**
- * Provides a list of self-discovering extracters.
- *
- * @param transformers all the available extracters that the registry can
- * work with
- */
- public void setExtracters(List extracters)
- {
- logger.debug("Setting " + extracters.size() + "new extracters.");
-
- extracterCacheWriteLock.lock();
- try
- {
- this.extracters = extracters;
- this.extracterCache.clear();
- }
- finally
- {
- extracterCacheWriteLock.unlock();
- }
- }
}
\ No newline at end of file
diff --git a/source/java/org/alfresco/repo/content/metadata/OfficeMetadataExtracter.java b/source/java/org/alfresco/repo/content/metadata/OfficeMetadataExtracter.java
index 1c5b234ceb..9f0917d50e 100644
--- a/source/java/org/alfresco/repo/content/metadata/OfficeMetadataExtracter.java
+++ b/source/java/org/alfresco/repo/content/metadata/OfficeMetadataExtracter.java
@@ -50,7 +50,7 @@ public class OfficeMetadataExtracter extends AbstractMetadataExtracter
super(new HashSet(Arrays.asList(mimeTypes)), 1.0, 1000);
}
- public void extract(ContentReader reader, final Map destination) throws ContentIOException
+ public void extractInternal(ContentReader reader, final Map destination) throws Throwable
{
POIFSReaderListener readerListener = new POIFSReaderListener()
{
@@ -96,12 +96,6 @@ public class OfficeMetadataExtracter extends AbstractMetadataExtracter
poiFSReader.registerListener(readerListener, SummaryInformation.DEFAULT_STREAM_NAME);
poiFSReader.read(is);
}
- catch (IOException e)
- {
- throw new ContentIOException("Compound Document SummaryInformation metadata extraction failed: \n"
- + " reader: " + reader,
- e);
- }
finally
{
if (is != null)
diff --git a/source/java/org/alfresco/repo/content/metadata/OpenDocumentMetadataExtracter.java b/source/java/org/alfresco/repo/content/metadata/OpenDocumentMetadataExtracter.java
index 0364ad9b55..302518f98e 100644
--- a/source/java/org/alfresco/repo/content/metadata/OpenDocumentMetadataExtracter.java
+++ b/source/java/org/alfresco/repo/content/metadata/OpenDocumentMetadataExtracter.java
@@ -16,6 +16,8 @@
*/
package org.alfresco.repo.content.metadata;
+import java.io.IOException;
+import java.io.InputStream;
import java.io.Serializable;
import java.util.Arrays;
import java.util.HashSet;
@@ -23,11 +25,8 @@ import java.util.Map;
import org.alfresco.model.ContentModel;
import org.alfresco.repo.content.MimetypeMap;
-import org.alfresco.service.cmr.repository.ContentIOException;
import org.alfresco.service.cmr.repository.ContentReader;
import org.alfresco.service.namespace.QName;
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
import com.catcode.odf.ODFMetaFileAnalyzer;
import com.catcode.odf.OpenDocumentMetadata;
@@ -41,8 +40,6 @@ import com.catcode.odf.OpenDocumentMetadata;
*/
public class OpenDocumentMetadataExtracter extends AbstractMetadataExtracter
{
- private static final Log logger = LogFactory.getLog(OpenDocumentMetadataExtracter.class);
-
private static String[] mimeTypes = new String[] {
MimetypeMap.MIMETYPE_OPENDOCUMENT_TEXT,
MimetypeMap.MIMETYPE_OPENDOCUMENT_TEXT_TEMPLATE,
@@ -67,13 +64,15 @@ public class OpenDocumentMetadataExtracter extends AbstractMetadataExtracter
super(new HashSet(Arrays.asList(mimeTypes)), 1.00, 1000);
}
- public void extract(ContentReader reader, Map destination) throws ContentIOException
+ public void extractInternal(ContentReader reader, Map destination) throws Throwable
{
ODFMetaFileAnalyzer analyzer = new ODFMetaFileAnalyzer();
+ InputStream is = null;
try
{
+ is = reader.getContentInputStream();
// stream the document in
- OpenDocumentMetadata docInfo = analyzer.analyzeZip(reader.getContentInputStream());
+ OpenDocumentMetadata docInfo = analyzer.analyzeZip(is);
if (docInfo != null)
{
@@ -84,12 +83,12 @@ public class OpenDocumentMetadataExtracter extends AbstractMetadataExtracter
destination.put(ContentModel.PROP_CREATED, docInfo.getCreationDate());
}
}
- catch (Throwable e)
+ finally
{
- String message = "Metadata extraction failed: \n" +
- " reader: " + reader;
- logger.debug(message, e);
- throw new ContentIOException(message, e);
+ if (is != null)
+ {
+ try { is.close(); } catch (IOException e) {}
+ }
}
}
}
\ No newline at end of file
diff --git a/source/java/org/alfresco/repo/content/metadata/PdfBoxMetadataExtracter.java b/source/java/org/alfresco/repo/content/metadata/PdfBoxMetadataExtracter.java
index c34f0b81c3..e335c6cf83 100644
--- a/source/java/org/alfresco/repo/content/metadata/PdfBoxMetadataExtracter.java
+++ b/source/java/org/alfresco/repo/content/metadata/PdfBoxMetadataExtracter.java
@@ -17,17 +17,15 @@
package org.alfresco.repo.content.metadata;
import java.io.IOException;
+import java.io.InputStream;
import java.io.Serializable;
import java.util.Calendar;
import java.util.Map;
import org.alfresco.model.ContentModel;
import org.alfresco.repo.content.MimetypeMap;
-import org.alfresco.service.cmr.repository.ContentIOException;
import org.alfresco.service.cmr.repository.ContentReader;
import org.alfresco.service.namespace.QName;
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
import org.pdfbox.pdmodel.PDDocument;
import org.pdfbox.pdmodel.PDDocumentInformation;
@@ -37,26 +35,20 @@ import org.pdfbox.pdmodel.PDDocumentInformation;
*/
public class PdfBoxMetadataExtracter extends AbstractMetadataExtracter
{
-
- private static final Log logger = LogFactory.getLog(PdfBoxMetadataExtracter.class);
-
public PdfBoxMetadataExtracter()
{
super(MimetypeMap.MIMETYPE_PDF, 1.0, 1000);
}
- public void extract(ContentReader reader, Map destination) throws ContentIOException
+ public void extractInternal(ContentReader reader, Map destination) throws Throwable
{
- if (!MimetypeMap.MIMETYPE_PDF.equals(reader.getMimetype()))
- {
- logger.debug("No metadata extracted for " + reader.getMimetype());
- return;
- }
PDDocument pdf = null;
+ InputStream is = null;
try
{
+ is = reader.getContentInputStream();
// stream the document in
- pdf = PDDocument.load(reader.getContentInputStream());
+ pdf = PDDocument.load(is);
// Scoop out the metadata
PDDocumentInformation docInfo = pdf.getDocumentInformation();
@@ -68,23 +60,15 @@ public class PdfBoxMetadataExtracter extends AbstractMetadataExtracter
if (created != null)
destination.put(ContentModel.PROP_CREATED, created.getTime());
}
- catch (IOException e)
- {
- throw new ContentIOException("PDF metadata extraction failed: \n" +
- " reader: " + reader);
- }
finally
{
+ if (is != null)
+ {
+ try { is.close(); } catch (IOException e) {}
+ }
if (pdf != null)
{
- try
- {
- pdf.close();
- }
- catch (Throwable e)
- {
- e.printStackTrace();
- }
+ try { pdf.close(); } catch (Throwable e) { e.printStackTrace(); }
}
}
}
diff --git a/source/java/org/alfresco/repo/content/metadata/PdfBoxMetadataExtracterTest.java b/source/java/org/alfresco/repo/content/metadata/PdfBoxMetadataExtracterTest.java
index f218508d22..ddb3dd91cf 100644
--- a/source/java/org/alfresco/repo/content/metadata/PdfBoxMetadataExtracterTest.java
+++ b/source/java/org/alfresco/repo/content/metadata/PdfBoxMetadataExtracterTest.java
@@ -1,16 +1,14 @@
package org.alfresco.repo.content.metadata;
import org.alfresco.repo.content.MimetypeMap;
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
/**
- * @see org.alfresco.repo.content.transform.PdfBoxContentTransformer
+ * @see org.alfresco.repo.content.metadata.PdfBoxMetadataExtracter
+ *
* @author Jesper Steen Møller
*/
public class PdfBoxMetadataExtracterTest extends AbstractMetadataExtracterTest
{
- private static final Log logger = LogFactory.getLog(PdfBoxMetadataExtracterTest.class);
private MetadataExtracter extracter;
public void onSetUpInTransaction() throws Exception
diff --git a/source/java/org/alfresco/repo/content/metadata/StringMetadataExtracter.java b/source/java/org/alfresco/repo/content/metadata/StringMetadataExtracter.java
deleted file mode 100644
index 29cba14764..0000000000
--- a/source/java/org/alfresco/repo/content/metadata/StringMetadataExtracter.java
+++ /dev/null
@@ -1,58 +0,0 @@
-/*
- * Copyright (C) 2005 Jesper Steen Møller
- *
- * Licensed under the Mozilla Public License version 1.1
- * with a permitted attribution clause. You may obtain a
- * copy of the License at
- *
- * http://www.alfresco.org/legal/license.txt
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
- * either express or implied. See the License for the specific
- * language governing permissions and limitations under the
- * License.
- */
-package org.alfresco.repo.content.metadata;
-
-import java.io.Serializable;
-import java.util.Map;
-
-import org.alfresco.service.cmr.repository.ContentIOException;
-import org.alfresco.service.cmr.repository.ContentReader;
-import org.alfresco.service.namespace.QName;
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-
-/**
- *
- * @author Jesper Steen Møller
- */
-public class StringMetadataExtracter implements MetadataExtracter
-{
- public static final String PREFIX_TEXT = "text/";
-
- private static final Log logger = LogFactory.getLog(StringMetadataExtracter.class);
-
- public double getReliability(String sourceMimetype)
- {
- if (sourceMimetype.startsWith(PREFIX_TEXT))
- return 0.1;
- else
- return 0.0;
- }
-
- public long getExtractionTime()
- {
- return 1000;
- }
-
- public void extract(ContentReader reader, Map destination) throws ContentIOException
- {
- if (logger.isDebugEnabled())
- {
- logger.debug("No metadata extracted for " + reader.getMimetype());
- }
- }
-}
diff --git a/source/java/org/alfresco/repo/content/metadata/UnoMetadataExtracter.java b/source/java/org/alfresco/repo/content/metadata/UnoMetadataExtracter.java
index 4bdc69ddd0..5364f668b2 100644
--- a/source/java/org/alfresco/repo/content/metadata/UnoMetadataExtracter.java
+++ b/source/java/org/alfresco/repo/content/metadata/UnoMetadataExtracter.java
@@ -28,12 +28,9 @@ import net.sf.joott.uno.UnoConnection;
import org.alfresco.model.ContentModel;
import org.alfresco.repo.content.MimetypeMap;
-import org.alfresco.service.cmr.repository.ContentIOException;
import org.alfresco.service.cmr.repository.ContentReader;
import org.alfresco.service.namespace.QName;
import org.alfresco.util.TempFileProvider;
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
import com.sun.star.beans.PropertyValue;
import com.sun.star.beans.XPropertySet;
@@ -49,9 +46,6 @@ import com.sun.star.uno.UnoRuntime;
*/
public class UnoMetadataExtracter extends AbstractMetadataExtracter
{
-
- private static final Log logger = LogFactory.getLog(UnoMetadataExtracter.class);
-
private static String[] mimeTypes = new String[] {
MimetypeMap.MIMETYPE_OPENDOCUMENT_TEXT,
MimetypeMap.MIMETYPE_OPENOFFICE1_WRITER,
@@ -60,33 +54,44 @@ public class UnoMetadataExtracter extends AbstractMetadataExtracter
// quality since they involve conversion.
};
- public UnoMetadataExtracter(MimetypeMap mimetypeMap, String connectionUrl)
- {
- super(new HashSet(Arrays.asList(mimeTypes)), 1.00, 10000);
- this.mimetypeMap = mimetypeMap;
- init(connectionUrl);
- }
-
- public UnoMetadataExtracter(MimetypeMap mimetypeMap)
- {
- this(mimetypeMap, UnoConnection.DEFAULT_CONNECTION_STRING);
- }
-
private MimetypeMap mimetypeMap;
+ private String contentUrl;
private MyUnoConnection connection;
private boolean isConnected;
- /**
- * @param unoConnectionUrl the URL of the Uno server
- */
- private synchronized void init(String unoConnectionUrl)
+ public UnoMetadataExtracter()
{
- connection = new MyUnoConnection(unoConnectionUrl);
+ super(new HashSet(Arrays.asList(mimeTypes)), 1.00, 10000);
+ this.contentUrl = UnoConnection.DEFAULT_CONNECTION_STRING;
+ }
+
+ public void setMimetypeMap(MimetypeMap mimetypeMap)
+ {
+ this.mimetypeMap = mimetypeMap;
+ }
+
+ /**
+ *
+ * @param contentUrl the URL to connect to
+ */
+ public void setContentUrl(String contentUrl)
+ {
+ this.contentUrl = contentUrl;
+ }
+
+ /**
+ * Initialises the bean by establishing an UNO connection
+ */
+ public synchronized void init()
+ {
+ connection = new MyUnoConnection(contentUrl);
// attempt to make an connection
try
{
connection.connect();
isConnected = true;
+ // register
+ super.register();
}
catch (ConnectException e)
{
@@ -103,66 +108,58 @@ public class UnoMetadataExtracter extends AbstractMetadataExtracter
return isConnected;
}
- public void extract(ContentReader reader, final Map destination) throws ContentIOException
+ public void extractInternal(ContentReader reader, final Map destination) throws Throwable
{
String sourceMimetype = reader.getMimetype();
// create temporary files to convert from and to
- File tempFromFile = TempFileProvider.createTempFile("UnoContentTransformer", "."
+ File tempFromFile = TempFileProvider.createTempFile(
+ "UnoContentTransformer_", "."
+ mimetypeMap.getExtension(sourceMimetype));
// download the content from the source reader
reader.getContent(tempFromFile);
- String sourceUrl = tempFromFile.toString();
- try
- {
- sourceUrl = toUrl(tempFromFile, connection);
- // UNO Interprocess Bridge *should* be thread-safe, but...
- synchronized (connection)
+ String sourceUrl = toUrl(tempFromFile, connection);
+
+ // UNO Interprocess Bridge *should* be thread-safe, but...
+ synchronized (connection)
+ {
+ XComponentLoader desktop = connection.getDesktop();
+ XComponent document = desktop.loadComponentFromURL(
+ sourceUrl,
+ "_blank",
+ 0,
+ new PropertyValue[] { property("Hidden", Boolean.TRUE) });
+ if (document == null)
{
- XComponentLoader desktop = connection.getDesktop();
- XComponent document = desktop.loadComponentFromURL(
- sourceUrl,
- "_blank",
- 0,
- new PropertyValue[] { property("Hidden", Boolean.TRUE) });
- if (document == null)
- {
- throw new FileNotFoundException("could not open source document: " + sourceUrl);
- }
- try
- {
- XDocumentInfoSupplier infoSupplier = (XDocumentInfoSupplier) UnoRuntime.queryInterface(
- XDocumentInfoSupplier.class, document);
- XPropertySet propSet = (XPropertySet) UnoRuntime.queryInterface(
- XPropertySet.class,
- infoSupplier
- .getDocumentInfo());
-
- // Titled aspect
- trimPut(ContentModel.PROP_TITLE, propSet.getPropertyValue("Title"), destination);
- trimPut(ContentModel.PROP_DESCRIPTION, propSet.getPropertyValue("Subject"), destination);
-
- // Auditable aspect
- // trimPut(ContentModel.PROP_CREATED,
- // si.getCreateDateTime(), destination);
- trimPut(ContentModel.PROP_AUTHOR, propSet.getPropertyValue("Author"), destination);
- // trimPut(ContentModel.PROP_MODIFIED,
- // si.getLastSaveDateTime(), destination);
- // trimPut(ContentModel.PROP_MODIFIER, si.getLastAuthor(),
- // destination);
- }
- finally
- {
- document.dispose();
- }
+ throw new FileNotFoundException("could not open source document: " + sourceUrl);
+ }
+ try
+ {
+ XDocumentInfoSupplier infoSupplier = (XDocumentInfoSupplier) UnoRuntime.queryInterface(
+ XDocumentInfoSupplier.class, document);
+ XPropertySet propSet = (XPropertySet) UnoRuntime.queryInterface(
+ XPropertySet.class,
+ infoSupplier
+ .getDocumentInfo());
+
+ // Titled aspect
+ trimPut(ContentModel.PROP_TITLE, propSet.getPropertyValue("Title"), destination);
+ trimPut(ContentModel.PROP_DESCRIPTION, propSet.getPropertyValue("Subject"), destination);
+
+ // Auditable aspect
+ // trimPut(ContentModel.PROP_CREATED,
+ // si.getCreateDateTime(), destination);
+ trimPut(ContentModel.PROP_AUTHOR, propSet.getPropertyValue("Author"), destination);
+ // trimPut(ContentModel.PROP_MODIFIED,
+ // si.getLastSaveDateTime(), destination);
+ // trimPut(ContentModel.PROP_MODIFIER, si.getLastAuthor(),
+ // destination);
+ }
+ finally
+ {
+ document.dispose();
}
- }
- catch (Throwable e)
- {
- throw new ContentIOException("Conversion failed: \n" +
- " source: " + sourceUrl + "\n",
- e);
}
}
diff --git a/source/java/org/alfresco/repo/content/metadata/UnoMetadataExtracterTest.java b/source/java/org/alfresco/repo/content/metadata/UnoMetadataExtracterTest.java
index 3676728cc9..474d9f7700 100644
--- a/source/java/org/alfresco/repo/content/metadata/UnoMetadataExtracterTest.java
+++ b/source/java/org/alfresco/repo/content/metadata/UnoMetadataExtracterTest.java
@@ -19,7 +19,6 @@ package org.alfresco.repo.content.metadata;
import org.alfresco.repo.content.MimetypeMap;
/**
- * @see org.alfresco.repo.content.transform.UnoMetadataExtracter
* @author Jesper Steen Møller
*/
public class UnoMetadataExtracterTest extends AbstractMetadataExtracterTest
@@ -28,7 +27,8 @@ public class UnoMetadataExtracterTest extends AbstractMetadataExtracterTest
public void onSetUpInTransaction() throws Exception
{
- extracter = new UnoMetadataExtracter(mimetypeMap);
+ extracter = new UnoMetadataExtracter();
+ extracter.setMimetypeMap(mimetypeMap);
}
/**