From 8288d99e9887d40a15354f0651c80ff9053c2799 Mon Sep 17 00:00:00 2001 From: Derek Hulley Date: Fri, 13 Jul 2007 15:35:58 +0000 Subject: [PATCH] Final fix for AR-357: Metadata extractors are configurable git-svn-id: https://svn.alfresco.com/repos/alfresco-enterprise/alfresco/HEAD/root@6246 c4b6b30b-aa2e-2d43-bbcb-ca4b014f7261 --- .../metadata/HtmlMetadataExtracter.java | 36 +++-- .../metadata/HtmlMetadataExtracter.properties | 12 ++ .../metadata/MP3MetadataExtracter.java | 144 ++++++++---------- .../metadata/MP3MetadataExtracter.properties | 20 +++ .../metadata/MailMetadataExtracter.java | 48 ++++-- .../metadata/MailMetadataExtracter.properties | 14 ++ .../metadata/OpenOfficeMetadataExtracter.java | 42 ++--- .../OpenOfficeMetadataExtracter.properties | 12 ++ 8 files changed, 199 insertions(+), 129 deletions(-) create mode 100644 source/java/org/alfresco/repo/content/metadata/HtmlMetadataExtracter.properties create mode 100644 source/java/org/alfresco/repo/content/metadata/MP3MetadataExtracter.properties create mode 100644 source/java/org/alfresco/repo/content/metadata/MailMetadataExtracter.properties create mode 100644 source/java/org/alfresco/repo/content/metadata/OpenOfficeMetadataExtracter.properties diff --git a/source/java/org/alfresco/repo/content/metadata/HtmlMetadataExtracter.java b/source/java/org/alfresco/repo/content/metadata/HtmlMetadataExtracter.java index b736eeebb3..83fa378f27 100644 --- a/source/java/org/alfresco/repo/content/metadata/HtmlMetadataExtracter.java +++ b/source/java/org/alfresco/repo/content/metadata/HtmlMetadataExtracter.java @@ -28,7 +28,6 @@ import java.io.InputStream; import java.io.InputStreamReader; import java.io.Reader; import java.io.Serializable; -import java.util.HashMap; import java.util.HashSet; import java.util.Map; import java.util.Set; @@ -39,17 +38,26 @@ import javax.swing.text.html.HTML; import javax.swing.text.html.HTMLEditorKit; import javax.swing.text.html.parser.ParserDelegator; -import org.alfresco.model.ContentModel; import org.alfresco.repo.content.MimetypeMap; import org.alfresco.service.cmr.repository.ContentReader; -import org.alfresco.service.namespace.QName; /** + * Extracts the following values from HTML documents: + *
+ *   author:                 --      cm:author
+ *   title:                  --      cm:title
+ *   description:            --      cm:description
+ * 
* * @author Jesper Steen Møller + * @author Derek Hulley */ -public class HtmlMetadataExtracter extends AbstractMetadataExtracter +public class HtmlMetadataExtracter extends AbstractMappingMetadataExtracter { + private static final String KEY_AUTHOR = "author"; + private static final String KEY_TITLE = "title"; + private static final String KEY_DESCRIPTION= "description"; + private static final Set MIMETYPES = new HashSet(5); static { @@ -59,12 +67,13 @@ public class HtmlMetadataExtracter extends AbstractMetadataExtracter public HtmlMetadataExtracter() { - super(MIMETYPES, 1.0, 1000); + super(MIMETYPES); } - public void extractInternal(ContentReader reader, Map destination) throws Throwable + @Override + protected Map extractRaw(ContentReader reader) throws Throwable { - final Map tempDestination = new HashMap(); + final Map rawProperties = newRawMap(); HTMLEditorKit.ParserCallback callback = new HTMLEditorKit.ParserCallback() { @@ -106,7 +115,7 @@ public class HtmlMetadataExtracter extends AbstractMetadataExtracter } else if (HTML.Tag.TITLE.equals(t) && title != null) { - trimPut(ContentModel.PROP_TITLE, title.toString(), tempDestination); + putRawValue(KEY_TITLE, title.toString(), rawProperties); title = null; } } @@ -125,11 +134,11 @@ public class HtmlMetadataExtracter extends AbstractMetadataExtracter if (name.equalsIgnoreCase("creator") || name.equalsIgnoreCase("author") || name.equalsIgnoreCase("dc.creator")) { - trimPut(ContentModel.PROP_AUTHOR, valueO, tempDestination); + putRawValue(KEY_AUTHOR, valueO.toString(), rawProperties); } - if (name.equalsIgnoreCase("description") || name.equalsIgnoreCase("dc.description")) + else if (name.equalsIgnoreCase("description") || name.equalsIgnoreCase("dc.description")) { - trimPut(ContentModel.PROP_DESCRIPTION, valueO, tempDestination); + putRawValue(KEY_DESCRIPTION, valueO.toString(), rawProperties); } } } @@ -143,7 +152,7 @@ public class HtmlMetadataExtracter extends AbstractMetadataExtracter int tries = 0; while (tries < 3) { - tempDestination.clear(); + rawProperties.clear(); Reader r = null; InputStream cis = null; try @@ -153,7 +162,6 @@ public class HtmlMetadataExtracter extends AbstractMetadataExtracter r = new InputStreamReader(cis); HTMLEditorKit.Parser parser = new ParserDelegator(); parser.parse(r, callback, tries > 0); - destination.putAll(tempDestination); break; } catch (ChangedCharSetException ccse) @@ -173,5 +181,7 @@ public class HtmlMetadataExtracter extends AbstractMetadataExtracter cis.close(); } } + // Done + return rawProperties; } } diff --git a/source/java/org/alfresco/repo/content/metadata/HtmlMetadataExtracter.properties b/source/java/org/alfresco/repo/content/metadata/HtmlMetadataExtracter.properties new file mode 100644 index 0000000000..7a5496b2ad --- /dev/null +++ b/source/java/org/alfresco/repo/content/metadata/HtmlMetadataExtracter.properties @@ -0,0 +1,12 @@ +# +# HtmlMetadataExtracter - default mapping +# +# author: Derek Hulley + +# Namespaces +namespace.prefix.cm=http://www.alfresco.org/model/content/1.0 + +# Mappings +author=cm:author +title=cm:title +description=cm:description diff --git a/source/java/org/alfresco/repo/content/metadata/MP3MetadataExtracter.java b/source/java/org/alfresco/repo/content/metadata/MP3MetadataExtracter.java index dee0ef614b..1100d581c9 100644 --- a/source/java/org/alfresco/repo/content/metadata/MP3MetadataExtracter.java +++ b/source/java/org/alfresco/repo/content/metadata/MP3MetadataExtracter.java @@ -26,13 +26,12 @@ package org.alfresco.repo.content.metadata; import java.io.File; import java.io.Serializable; -import java.util.HashMap; +import java.util.Arrays; +import java.util.HashSet; import java.util.Map; -import org.alfresco.model.ContentModel; import org.alfresco.repo.content.MimetypeMap; import org.alfresco.service.cmr.repository.ContentReader; -import org.alfresco.service.namespace.QName; import org.alfresco.util.TempFileProvider; import org.farng.mp3.AbstractMP3FragmentBody; import org.farng.mp3.MP3File; @@ -44,30 +43,46 @@ import org.farng.mp3.lyrics3.Lyrics3v2; import org.farng.mp3.lyrics3.Lyrics3v2Field; /** + * Extracts the following values from MP3 files: + *
+ *   songTitle:              --      {music}songTitle, cm:title
+ *   albumTitle:             --      {music}albumTitle
+ *   artist:                 --      {music}artist, cm:author
+ *   description:            --      cm:description
+ *   comment:                --      {music}comment
+ *   yearReleased:           --      {music}yearReleased
+ *   trackNumber:            --      {music}trackNumber
+ *   genre:                  --      {music}genre
+ *   composer:               --      {music}composer
+ *   lyrics:                 --      {music}lyrics
+ * 
+ * * @author Roy Wetherall */ -public class MP3MetadataExtracter extends AbstractMetadataExtracter +public class MP3MetadataExtracter extends AbstractMappingMetadataExtracter { - private static final QName PROP_ALBUM_TITLE = QName.createQName("{music}albumTitle"); - private static final QName PROP_SONG_TITLE = QName.createQName("{music}songTitle");; - private static final QName PROP_ARTIST = QName.createQName("{music}artist");; - private static final QName PROP_COMMENT = QName.createQName("{music}comment");; - private static final QName PROP_YEAR_RELEASED = QName.createQName("{music}yearReleased");; - private static final QName PROP_TRACK_NUMBER = QName.createQName("{music}trackNumber");; - private static final QName PROP_GENRE = QName.createQName("{music}genre");; - private static final QName PROP_COMPOSER = QName.createQName("{music}composer");; - private static final QName PROP_LYRICS = QName.createQName("{music}lyrics");; + private static final String KEY_SONG_TITLE = "songTitle"; + private static final String KEY_ALBUM_TITLE = "albumTitle"; + private static final String KEY_ARTIST = "artist"; + private static final String KEY_DESCRIPTION = "description"; + private static final String KEY_COMMENT = "comment"; + private static final String KEY_YEAR_RELEASED = "yearReleased"; + private static final String KEY_TRACK_NUMBER = "trackNumber"; + private static final String KEY_GENRE = "genre"; + private static final String KEY_COMPOSER = "composer"; + private static final String KEY_LYRICS = "lyrics"; + public static String[] SUPPORTED_MIMETYPES = new String[] {MimetypeMap.MIMETYPE_MP3 }; + public MP3MetadataExtracter() { - super(MimetypeMap.MIMETYPE_MP3, 1.0, 1000); + super(new HashSet(Arrays.asList(SUPPORTED_MIMETYPES))); } - public void extractInternal( - ContentReader reader, - Map destination) throws Throwable + @Override + public Map extractRaw(ContentReader reader) throws Throwable { - Map props = new HashMap(); + Map rawProperties = newRawMap(); // Create a temp file File tempFile = TempFileProvider.createTempFile("MP3MetadataExtracter_", ".tmp"); @@ -81,30 +96,30 @@ public class MP3MetadataExtracter extends AbstractMetadataExtracter ID3v1 id3v1 = mp3File.getID3v1Tag(); if (id3v1 != null) { - setTagValue(props, PROP_ALBUM_TITLE, id3v1.getAlbum()); - setTagValue(props, PROP_SONG_TITLE, id3v1.getTitle()); - setTagValue(props, PROP_ARTIST, id3v1.getArtist()); - setTagValue(props, PROP_COMMENT, id3v1.getComment()); - setTagValue(props, PROP_YEAR_RELEASED, id3v1.getYear()); + putRawValue(KEY_ALBUM_TITLE, id3v1.getAlbum(), rawProperties); + putRawValue(KEY_SONG_TITLE, id3v1.getTitle(), rawProperties); + putRawValue(KEY_ARTIST, id3v1.getArtist(), rawProperties); + putRawValue(KEY_COMMENT, id3v1.getComment(), rawProperties); + putRawValue(KEY_YEAR_RELEASED, id3v1.getYear(), rawProperties); // TODO sort out the genre - //setTagValue(props, MusicModel.PROP_GENRE, id3v1.getGenre()); + //putRawValue(MusicModel.KEY_GENRE, id3v1.getGenre()); // TODO sort out the size - //setTagValue(props, MusicModel.PROP_SIZE, id3v1.getSize()); + //putRawValue(MusicModel.KEY_SIZE, id3v1.getSize()); } AbstractID3v2 id3v2 = mp3File.getID3v2Tag(); if (id3v2 != null) { - setTagValue(props, PROP_SONG_TITLE, getID3V2Value(id3v2, "TIT2")); - setTagValue(props, PROP_ARTIST, getID3V2Value(id3v2, "TPE1")); - setTagValue(props, PROP_ALBUM_TITLE, getID3V2Value(id3v2, "TALB")); - setTagValue(props, PROP_YEAR_RELEASED, getID3V2Value(id3v2, "TDRC")); - setTagValue(props, PROP_COMMENT, getID3V2Value(id3v2, "COMM")); - setTagValue(props, PROP_TRACK_NUMBER, getID3V2Value(id3v2, "TRCK")); - setTagValue(props, PROP_GENRE, getID3V2Value(id3v2, "TCON")); - setTagValue(props, PROP_COMPOSER, getID3V2Value(id3v2, "TCOM")); + putRawValue(KEY_SONG_TITLE, getID3V2Value(id3v2, "TIT2"), rawProperties); + putRawValue(KEY_ARTIST, getID3V2Value(id3v2, "TPE1"), rawProperties); + putRawValue(KEY_ALBUM_TITLE, getID3V2Value(id3v2, "TALB"), rawProperties); + putRawValue(KEY_YEAR_RELEASED, getID3V2Value(id3v2, "TDRC"), rawProperties); + putRawValue(KEY_COMMENT, getID3V2Value(id3v2, "COMM"), rawProperties); + putRawValue(KEY_TRACK_NUMBER, getID3V2Value(id3v2, "TRCK"), rawProperties); + putRawValue(KEY_GENRE, getID3V2Value(id3v2, "TCON"), rawProperties); + putRawValue(KEY_COMPOSER, getID3V2Value(id3v2, "TCOM"), rawProperties); // TODO sort out the lyrics //System.out.println("Lyrics: " + getID3V2Value(id3v2, "SYLT")); @@ -117,12 +132,12 @@ public class MP3MetadataExtracter extends AbstractMetadataExtracter System.out.println("Lyrics3 tag found."); if (lyrics3Tag instanceof Lyrics3v2) { - setTagValue(props, PROP_SONG_TITLE, getLyrics3v2Value((Lyrics3v2)lyrics3Tag, "TIT2")); - setTagValue(props, PROP_ARTIST, getLyrics3v2Value((Lyrics3v2)lyrics3Tag, "TPE1")); - setTagValue(props, PROP_ALBUM_TITLE, getLyrics3v2Value((Lyrics3v2)lyrics3Tag, "TALB")); - setTagValue(props, PROP_COMMENT, getLyrics3v2Value((Lyrics3v2)lyrics3Tag, "COMM")); - setTagValue(props, PROP_LYRICS, getLyrics3v2Value((Lyrics3v2)lyrics3Tag, "SYLT")); - setTagValue(props, PROP_COMPOSER, getLyrics3v2Value((Lyrics3v2)lyrics3Tag, "TCOM")); + putRawValue(KEY_SONG_TITLE, getLyrics3v2Value((Lyrics3v2)lyrics3Tag, "TIT2"), rawProperties); + putRawValue(KEY_ARTIST, getLyrics3v2Value((Lyrics3v2)lyrics3Tag, "TPE1"), rawProperties); + putRawValue(KEY_ALBUM_TITLE, getLyrics3v2Value((Lyrics3v2)lyrics3Tag, "TALB"), rawProperties); + putRawValue(KEY_COMMENT, getLyrics3v2Value((Lyrics3v2)lyrics3Tag, "COMM"), rawProperties); + putRawValue(KEY_LYRICS, getLyrics3v2Value((Lyrics3v2)lyrics3Tag, "SYLT"), rawProperties); + putRawValue(KEY_COMPOSER, getLyrics3v2Value((Lyrics3v2)lyrics3Tag, "TCOM"), rawProperties); } } @@ -132,20 +147,14 @@ public class MP3MetadataExtracter extends AbstractMetadataExtracter tempFile.delete(); } - // Set the destination values - if (props.get(PROP_SONG_TITLE) != null) - { - destination.put(ContentModel.PROP_TITLE, props.get(PROP_SONG_TITLE)); - } - if (props.get(PROP_ARTIST) != null) - { - destination.put(ContentModel.PROP_AUTHOR, props.get(PROP_ARTIST)); - } - String description = getDescription(props); + String description = getDescription(rawProperties); if (description != null) { - destination.put(ContentModel.PROP_DESCRIPTION, description); + putRawValue(KEY_DESCRIPTION, description, rawProperties); } + + // Done + return rawProperties; } @@ -155,17 +164,17 @@ public class MP3MetadataExtracter extends AbstractMetadataExtracter * @param props the properties extracted from the file * @return the description */ - private String getDescription(Map props) + private String getDescription(Map props) { StringBuilder result = new StringBuilder(); - if (props.get(PROP_SONG_TITLE) != null && props.get(PROP_ARTIST) != null && props.get(PROP_ALBUM_TITLE) != null) + if (props.get(KEY_SONG_TITLE) != null && props.get(KEY_ARTIST) != null && props.get(KEY_ALBUM_TITLE) != null) { result - .append(props.get(PROP_SONG_TITLE)) + .append(props.get(KEY_SONG_TITLE)) .append(" - ") - .append(props.get(PROP_ALBUM_TITLE)) + .append(props.get(KEY_ALBUM_TITLE)) .append(" (") - .append(props.get(PROP_ARTIST)) + .append(props.get(KEY_ARTIST)) .append(")"); } @@ -173,26 +182,6 @@ public class MP3MetadataExtracter extends AbstractMetadataExtracter return result.toString(); } - /** - * - * @param props - * @param propQName - * @param propvalue - */ - private void setTagValue(Map props, QName propQName, String propvalue) - { - if (propvalue != null && propvalue.length() != 0) - { - trimPut(propQName, propvalue, props); - } - } - - /** - * - * @param lyrics3Tag - * @param name - * @return - */ private String getLyrics3v2Value(Lyrics3v2 lyrics3Tag, String name) { String result = ""; @@ -210,10 +199,6 @@ public class MP3MetadataExtracter extends AbstractMetadataExtracter /** * Get the ID3V2 tag value in a safe way - * - * @param id3v2 - * @param name - * @return */ private String getID3V2Value(AbstractID3v2 id3v2, String name) { @@ -231,5 +216,4 @@ public class MP3MetadataExtracter extends AbstractMetadataExtracter return result; } - } diff --git a/source/java/org/alfresco/repo/content/metadata/MP3MetadataExtracter.properties b/source/java/org/alfresco/repo/content/metadata/MP3MetadataExtracter.properties new file mode 100644 index 0000000000..ec737c7948 --- /dev/null +++ b/source/java/org/alfresco/repo/content/metadata/MP3MetadataExtracter.properties @@ -0,0 +1,20 @@ +# +# MP3MetadataExtracter - default mapping +# +# author: Derek Hulley + +# Namespaces +namespace.prefix.cm=http://www.alfresco.org/model/content/1.0 +namespace.prefix.music=music + +# Mappings +songTitle=music:songTitle, cm:title +albumTitle=music:albumTitle +artist=music:artist, cm:author +description=cm:description +comment=music:comment +yearReleased=music:yearReleased +trackNumber=music:trackNumber +genre=music:genre +composer=music:composer +lyrics=music:lyrics \ No newline at end of file diff --git a/source/java/org/alfresco/repo/content/metadata/MailMetadataExtracter.java b/source/java/org/alfresco/repo/content/metadata/MailMetadataExtracter.java index c49d6d3bbf..19b3d63cfc 100644 --- a/source/java/org/alfresco/repo/content/metadata/MailMetadataExtracter.java +++ b/source/java/org/alfresco/repo/content/metadata/MailMetadataExtracter.java @@ -30,29 +30,39 @@ import java.io.Serializable; import java.util.ArrayList; import java.util.Arrays; import java.util.Calendar; -import java.util.Date; import java.util.HashSet; import java.util.List; import java.util.Map; -import org.alfresco.model.ContentModel; import org.alfresco.service.cmr.repository.ContentIOException; import org.alfresco.service.cmr.repository.ContentReader; -import org.alfresco.service.namespace.QName; import org.apache.poi.poifs.eventfilesystem.POIFSReader; import org.apache.poi.poifs.eventfilesystem.POIFSReaderEvent; import org.apache.poi.poifs.eventfilesystem.POIFSReaderListener; import org.apache.poi.poifs.filesystem.DocumentInputStream; /** - * Outlook format email meta-data extractor + * Outlook format email meta-data extractor extracting the following values: + *
+ *   sentDate:               --      cm:sentdate
+ *   originator:             --      cm:originator,    cm:author
+ *   addressee:              --      cm:addressee
+ *   addressees:             --      cm:addressees
+ *   subjectLine:            --      cm:subjectline,   cm:description
+ * 
* + * @since 2.1 * @author Kevin Roast */ -public class MailMetadataExtracter extends AbstractMetadataExtracter +public class MailMetadataExtracter extends AbstractMappingMetadataExtracter { - public static String[] SUPPORTED_MIMETYPES = new String[] { - "message/rfc822"}; + private static final String KEY_SENT_DATE = "sentDate"; + private static final String KEY_ORIGINATOR = "originator"; + private static final String KEY_ADDRESSEE = "addressee"; + private static final String KEY_ADDRESSEES = "addressees"; + private static final String KEY_SUBJECT = "subjectLine"; + + public static String[] SUPPORTED_MIMETYPES = new String[] {"message/rfc822"}; private static final String STREAM_PREFIX = "__substg1.0_"; private static final int STREAM_PREFIX_LENGTH = STREAM_PREFIX.length(); @@ -62,11 +72,14 @@ public class MailMetadataExtracter extends AbstractMetadataExtracter public MailMetadataExtracter() { - super(new HashSet(Arrays.asList(SUPPORTED_MIMETYPES)), 1.0, 1000); + super(new HashSet(Arrays.asList(SUPPORTED_MIMETYPES))); } - public void extractInternal(ContentReader reader, final Map destination) throws Throwable + @Override + public Map extractRaw(ContentReader reader) throws Throwable { + final Map rawProperties = newRawMap(); + POIFSReaderListener readerListener = new POIFSReaderListener() { public void processPOIFSReaderEvent(final POIFSReaderEvent event) @@ -76,7 +89,7 @@ public class MailMetadataExtracter extends AbstractMetadataExtracter if (event.getName().startsWith(STREAM_PREFIX)) { StreamHandler handler = new StreamHandler(event.getName(), event.getStream()); - handler.process(destination); + handler.process(rawProperties); } } catch (Exception ex) @@ -109,7 +122,7 @@ public class MailMetadataExtracter extends AbstractMetadataExtracter // store multi-value extracted property if (this.receipientEmails.get().size() != 0) { - destination.put(ContentModel.PROP_ADDRESSEES, (Serializable)receipientEmails.get()); + putRawValue(KEY_ADDRESSEES, (Serializable)receipientEmails.get(), rawProperties); } } finally @@ -119,6 +132,8 @@ public class MailMetadataExtracter extends AbstractMetadataExtracter try { is.close(); } catch (IOException e) {} } } + // Done + return rawProperties; } private static String convertExchangeAddress(String email) @@ -138,6 +153,7 @@ public class MailMetadataExtracter extends AbstractMetadataExtracter private static final String ENCODING_BINARY = "0102"; private static final String ENCODING_UNICODE = "001F"; + @SuppressWarnings("unused") private static final String SUBSTG_MESSAGEBODY = "1000"; private static final String SUBSTG_RECIPIENTEMAIL = "39FE"; // 7bit email address private static final String SUBSTG_RECIPIENTSEARCH = "300B"; // address 'search' variant @@ -158,12 +174,12 @@ public class MailMetadataExtracter extends AbstractMetadataExtracter this.stream = stream; } - void process(final Map destination) + void process(final Map destination) throws IOException { if (type.equals(SUBSTG_SENDEREMAIL)) { - destination.put(ContentModel.PROP_ORIGINATOR, convertExchangeAddress(extractText())); + putRawValue(KEY_ORIGINATOR, convertExchangeAddress(extractText()), destination); } else if (type.equals(SUBSTG_RECIPIENTEMAIL)) { @@ -192,11 +208,11 @@ public class MailMetadataExtracter extends AbstractMetadataExtracter } else if (type.equals(SUBSTG_RECEIVEDEMAIL)) { - destination.put(ContentModel.PROP_ADDRESSEE, convertExchangeAddress(extractText())); + putRawValue(KEY_ADDRESSEE, convertExchangeAddress(extractText()), destination); } else if (type.equals(SUBSTG_SUBJECT)) { - destination.put(ContentModel.PROP_SUBJECT, extractText()); + putRawValue(KEY_SUBJECT, extractText(), destination); } else if (type.equals(SUBSTG_DATE)) { @@ -221,7 +237,7 @@ public class MailMetadataExtracter extends AbstractMetadataExtracter String strMinute = date.substring(dateIndex + 10, dateIndex + 12); c.set(Calendar.MINUTE, Integer.parseInt(strMinute)); c.set(Calendar.SECOND, 0); - destination.put(ContentModel.PROP_SENTDATE, c.getTime()); + putRawValue(KEY_SENT_DATE, c.getTime(), destination); } } } diff --git a/source/java/org/alfresco/repo/content/metadata/MailMetadataExtracter.properties b/source/java/org/alfresco/repo/content/metadata/MailMetadataExtracter.properties new file mode 100644 index 0000000000..514fa1fc05 --- /dev/null +++ b/source/java/org/alfresco/repo/content/metadata/MailMetadataExtracter.properties @@ -0,0 +1,14 @@ +# +# MailMetadataExtracter - default mapping +# +# author: Derek Hulley + +# Namespaces +namespace.prefix.cm=http://www.alfresco.org/model/content/1.0 + +# Mappings +sentDate=cm:sentdate +originator=cm:originator, cm:author +addressee=cm:addressee +addressees=cm:addressees +subjectLine=cm:subjectline, cm:description \ No newline at end of file diff --git a/source/java/org/alfresco/repo/content/metadata/OpenOfficeMetadataExtracter.java b/source/java/org/alfresco/repo/content/metadata/OpenOfficeMetadataExtracter.java index 4730786b4e..f5127cf1ba 100644 --- a/source/java/org/alfresco/repo/content/metadata/OpenOfficeMetadataExtracter.java +++ b/source/java/org/alfresco/repo/content/metadata/OpenOfficeMetadataExtracter.java @@ -32,10 +32,8 @@ import java.util.Map; import net.sf.jooreports.openoffice.connection.OpenOfficeConnection; -import org.alfresco.model.ContentModel; import org.alfresco.repo.content.MimetypeMap; import org.alfresco.service.cmr.repository.ContentReader; -import org.alfresco.service.namespace.QName; import org.alfresco.util.PropertyCheck; import org.alfresco.util.TempFileProvider; @@ -48,25 +46,33 @@ import com.sun.star.ucb.XFileIdentifierConverter; import com.sun.star.uno.UnoRuntime; /** + * Extracts values from Star Office documents into the following: + *
+ *   author:                 --      cm:author
+ *   title:                  --      cm:title
+ *   description:            --      cm:description
+ * 
+ * * @author Jesper Steen Møller */ -public class OpenOfficeMetadataExtracter extends AbstractMetadataExtracter +public class OpenOfficeMetadataExtracter extends AbstractMappingMetadataExtracter { + private static final String KEY_AUTHOR = "author"; + private static final String KEY_TITLE = "title"; + private static final String KEY_DESCRIPTION = "description"; + public static String[] SUPPORTED_MIMETYPES = new String[] { MimetypeMap.MIMETYPE_STAROFFICE5_WRITER, MimetypeMap.MIMETYPE_STAROFFICE5_IMPRESS, MimetypeMap.MIMETYPE_OPENOFFICE1_WRITER, MimetypeMap.MIMETYPE_OPENOFFICE1_IMPRESS - // Add the other OpenOffice.org stuff here - // In fact, other types may apply as well, but should be counted as lower - // quality since they involve conversion. }; private OpenOfficeConnection connection; public OpenOfficeMetadataExtracter() { - super(new HashSet(Arrays.asList(SUPPORTED_MIMETYPES)), 1.00, 10000); + super(new HashSet(Arrays.asList(SUPPORTED_MIMETYPES))); } public void setConnection(OpenOfficeConnection connection) @@ -119,8 +125,11 @@ public class OpenOfficeMetadataExtracter extends AbstractMetadataExtracter return connection.isConnected(); } - public void extractInternal(ContentReader reader, final Map destination) throws Throwable + @Override + public Map extractRaw(ContentReader reader) throws Throwable { + Map rawProperties = newRawMap(); + String sourceMimetype = reader.getMimetype(); // create temporary files to convert from and to @@ -154,24 +163,17 @@ public class OpenOfficeMetadataExtracter extends AbstractMetadataExtracter infoSupplier .getDocumentInfo()); - // Titled aspect - trimPut(ContentModel.PROP_TITLE, propSet.getPropertyValue("Title"), destination); - trimPut(ContentModel.PROP_DESCRIPTION, propSet.getPropertyValue("Subject"), destination); - - // Auditable aspect - // trimPut(ContentModel.PROP_CREATED, - // si.getCreateDateTime(), destination); - trimPut(ContentModel.PROP_AUTHOR, propSet.getPropertyValue("Author"), destination); - // trimPut(ContentModel.PROP_MODIFIED, - // si.getLastSaveDateTime(), destination); - // trimPut(ContentModel.PROP_MODIFIER, si.getLastAuthor(), - // destination); + putRawValue(KEY_TITLE, propSet.getPropertyValue("Title").toString(), rawProperties); + putRawValue(KEY_DESCRIPTION, propSet.getPropertyValue("Subject").toString(), rawProperties); + putRawValue(KEY_AUTHOR, propSet.getPropertyValue("Author").toString(), rawProperties); } finally { document.dispose(); } } + // Done + return rawProperties; } public String toUrl(File file, OpenOfficeConnection connection) throws ConnectException diff --git a/source/java/org/alfresco/repo/content/metadata/OpenOfficeMetadataExtracter.properties b/source/java/org/alfresco/repo/content/metadata/OpenOfficeMetadataExtracter.properties new file mode 100644 index 0000000000..dc8e947e21 --- /dev/null +++ b/source/java/org/alfresco/repo/content/metadata/OpenOfficeMetadataExtracter.properties @@ -0,0 +1,12 @@ +# +# OpenOfficeMetadataExtracter - default mapping +# +# author: Derek Hulley + +# Namespaces +namespace.prefix.cm=http://www.alfresco.org/model/content/1.0 + +# Mappings +author=cm:author +title=cm:title +description=cm:description