Final fix for AR-357: Metadata extractors are configurable

git-svn-id: https://svn.alfresco.com/repos/alfresco-enterprise/alfresco/HEAD/root@6246 c4b6b30b-aa2e-2d43-bbcb-ca4b014f7261
This commit is contained in:
Derek Hulley
2007-07-13 15:35:58 +00:00
parent 44f6b94cff
commit 8288d99e98
8 changed files with 199 additions and 129 deletions

View File

@@ -28,7 +28,6 @@ import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.io.Serializable;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
@@ -39,17 +38,26 @@ import javax.swing.text.html.HTML;
import javax.swing.text.html.HTMLEditorKit;
import javax.swing.text.html.parser.ParserDelegator;
import org.alfresco.model.ContentModel;
import org.alfresco.repo.content.MimetypeMap;
import org.alfresco.service.cmr.repository.ContentReader;
import org.alfresco.service.namespace.QName;
/**
* Extracts the following values from HTML documents:
* <pre>
* <b>author:</b> -- cm:author
* <b>title:</b> -- cm:title
* <b>description:</b> -- cm:description
* </pre>
*
* @author Jesper Steen Møller
* @author Derek Hulley
*/
public class HtmlMetadataExtracter extends AbstractMetadataExtracter
public class HtmlMetadataExtracter extends AbstractMappingMetadataExtracter
{
private static final String KEY_AUTHOR = "author";
private static final String KEY_TITLE = "title";
private static final String KEY_DESCRIPTION= "description";
private static final Set<String> MIMETYPES = new HashSet<String>(5);
static
{
@@ -59,12 +67,13 @@ public class HtmlMetadataExtracter extends AbstractMetadataExtracter
public HtmlMetadataExtracter()
{
super(MIMETYPES, 1.0, 1000);
super(MIMETYPES);
}
public void extractInternal(ContentReader reader, Map<QName, Serializable> destination) throws Throwable
@Override
protected Map<String, Serializable> extractRaw(ContentReader reader) throws Throwable
{
final Map<QName, Serializable> tempDestination = new HashMap<QName, Serializable>();
final Map<String, Serializable> rawProperties = newRawMap();
HTMLEditorKit.ParserCallback callback = new HTMLEditorKit.ParserCallback()
{
@@ -106,7 +115,7 @@ public class HtmlMetadataExtracter extends AbstractMetadataExtracter
}
else if (HTML.Tag.TITLE.equals(t) && title != null)
{
trimPut(ContentModel.PROP_TITLE, title.toString(), tempDestination);
putRawValue(KEY_TITLE, title.toString(), rawProperties);
title = null;
}
}
@@ -125,11 +134,11 @@ public class HtmlMetadataExtracter extends AbstractMetadataExtracter
if (name.equalsIgnoreCase("creator") || name.equalsIgnoreCase("author")
|| name.equalsIgnoreCase("dc.creator"))
{
trimPut(ContentModel.PROP_AUTHOR, valueO, tempDestination);
putRawValue(KEY_AUTHOR, valueO.toString(), rawProperties);
}
if (name.equalsIgnoreCase("description") || name.equalsIgnoreCase("dc.description"))
else if (name.equalsIgnoreCase("description") || name.equalsIgnoreCase("dc.description"))
{
trimPut(ContentModel.PROP_DESCRIPTION, valueO, tempDestination);
putRawValue(KEY_DESCRIPTION, valueO.toString(), rawProperties);
}
}
}
@@ -143,7 +152,7 @@ public class HtmlMetadataExtracter extends AbstractMetadataExtracter
int tries = 0;
while (tries < 3)
{
tempDestination.clear();
rawProperties.clear();
Reader r = null;
InputStream cis = null;
try
@@ -153,7 +162,6 @@ public class HtmlMetadataExtracter extends AbstractMetadataExtracter
r = new InputStreamReader(cis);
HTMLEditorKit.Parser parser = new ParserDelegator();
parser.parse(r, callback, tries > 0);
destination.putAll(tempDestination);
break;
}
catch (ChangedCharSetException ccse)
@@ -173,5 +181,7 @@ public class HtmlMetadataExtracter extends AbstractMetadataExtracter
cis.close();
}
}
// Done
return rawProperties;
}
}

View File

@@ -0,0 +1,12 @@
#
# HtmlMetadataExtracter - default mapping
#
# author: Derek Hulley
# Namespaces
namespace.prefix.cm=http://www.alfresco.org/model/content/1.0
# Mappings
author=cm:author
title=cm:title
description=cm:description

View File

@@ -26,13 +26,12 @@ package org.alfresco.repo.content.metadata;
import java.io.File;
import java.io.Serializable;
import java.util.HashMap;
import java.util.Arrays;
import java.util.HashSet;
import java.util.Map;
import org.alfresco.model.ContentModel;
import org.alfresco.repo.content.MimetypeMap;
import org.alfresco.service.cmr.repository.ContentReader;
import org.alfresco.service.namespace.QName;
import org.alfresco.util.TempFileProvider;
import org.farng.mp3.AbstractMP3FragmentBody;
import org.farng.mp3.MP3File;
@@ -44,30 +43,46 @@ import org.farng.mp3.lyrics3.Lyrics3v2;
import org.farng.mp3.lyrics3.Lyrics3v2Field;
/**
* Extracts the following values from MP3 files:
* <pre>
* <b>songTitle:</b> -- {music}songTitle, cm:title
* <b>albumTitle:</b> -- {music}albumTitle
* <b>artist:</b> -- {music}artist, cm:author
* <b>description:</b> -- cm:description
* <b>comment:</b> -- {music}comment
* <b>yearReleased:</b> -- {music}yearReleased
* <b>trackNumber:</b> -- {music}trackNumber
* <b>genre:</b> -- {music}genre
* <b>composer:</b> -- {music}composer
* <b>lyrics:</b> -- {music}lyrics
* </pre>
*
* @author Roy Wetherall
*/
public class MP3MetadataExtracter extends AbstractMetadataExtracter
public class MP3MetadataExtracter extends AbstractMappingMetadataExtracter
{
private static final QName PROP_ALBUM_TITLE = QName.createQName("{music}albumTitle");
private static final QName PROP_SONG_TITLE = QName.createQName("{music}songTitle");;
private static final QName PROP_ARTIST = QName.createQName("{music}artist");;
private static final QName PROP_COMMENT = QName.createQName("{music}comment");;
private static final QName PROP_YEAR_RELEASED = QName.createQName("{music}yearReleased");;
private static final QName PROP_TRACK_NUMBER = QName.createQName("{music}trackNumber");;
private static final QName PROP_GENRE = QName.createQName("{music}genre");;
private static final QName PROP_COMPOSER = QName.createQName("{music}composer");;
private static final QName PROP_LYRICS = QName.createQName("{music}lyrics");;
private static final String KEY_SONG_TITLE = "songTitle";
private static final String KEY_ALBUM_TITLE = "albumTitle";
private static final String KEY_ARTIST = "artist";
private static final String KEY_DESCRIPTION = "description";
private static final String KEY_COMMENT = "comment";
private static final String KEY_YEAR_RELEASED = "yearReleased";
private static final String KEY_TRACK_NUMBER = "trackNumber";
private static final String KEY_GENRE = "genre";
private static final String KEY_COMPOSER = "composer";
private static final String KEY_LYRICS = "lyrics";
public static String[] SUPPORTED_MIMETYPES = new String[] {MimetypeMap.MIMETYPE_MP3 };
public MP3MetadataExtracter()
{
super(MimetypeMap.MIMETYPE_MP3, 1.0, 1000);
super(new HashSet<String>(Arrays.asList(SUPPORTED_MIMETYPES)));
}
public void extractInternal(
ContentReader reader,
Map<QName, Serializable> destination) throws Throwable
@Override
public Map<String, Serializable> extractRaw(ContentReader reader) throws Throwable
{
Map<QName, Serializable> props = new HashMap<QName, Serializable>();
Map<String, Serializable> rawProperties = newRawMap();
// Create a temp file
File tempFile = TempFileProvider.createTempFile("MP3MetadataExtracter_", ".tmp");
@@ -81,30 +96,30 @@ public class MP3MetadataExtracter extends AbstractMetadataExtracter
ID3v1 id3v1 = mp3File.getID3v1Tag();
if (id3v1 != null)
{
setTagValue(props, PROP_ALBUM_TITLE, id3v1.getAlbum());
setTagValue(props, PROP_SONG_TITLE, id3v1.getTitle());
setTagValue(props, PROP_ARTIST, id3v1.getArtist());
setTagValue(props, PROP_COMMENT, id3v1.getComment());
setTagValue(props, PROP_YEAR_RELEASED, id3v1.getYear());
putRawValue(KEY_ALBUM_TITLE, id3v1.getAlbum(), rawProperties);
putRawValue(KEY_SONG_TITLE, id3v1.getTitle(), rawProperties);
putRawValue(KEY_ARTIST, id3v1.getArtist(), rawProperties);
putRawValue(KEY_COMMENT, id3v1.getComment(), rawProperties);
putRawValue(KEY_YEAR_RELEASED, id3v1.getYear(), rawProperties);
// TODO sort out the genre
//setTagValue(props, MusicModel.PROP_GENRE, id3v1.getGenre());
//putRawValue(MusicModel.KEY_GENRE, id3v1.getGenre());
// TODO sort out the size
//setTagValue(props, MusicModel.PROP_SIZE, id3v1.getSize());
//putRawValue(MusicModel.KEY_SIZE, id3v1.getSize());
}
AbstractID3v2 id3v2 = mp3File.getID3v2Tag();
if (id3v2 != null)
{
setTagValue(props, PROP_SONG_TITLE, getID3V2Value(id3v2, "TIT2"));
setTagValue(props, PROP_ARTIST, getID3V2Value(id3v2, "TPE1"));
setTagValue(props, PROP_ALBUM_TITLE, getID3V2Value(id3v2, "TALB"));
setTagValue(props, PROP_YEAR_RELEASED, getID3V2Value(id3v2, "TDRC"));
setTagValue(props, PROP_COMMENT, getID3V2Value(id3v2, "COMM"));
setTagValue(props, PROP_TRACK_NUMBER, getID3V2Value(id3v2, "TRCK"));
setTagValue(props, PROP_GENRE, getID3V2Value(id3v2, "TCON"));
setTagValue(props, PROP_COMPOSER, getID3V2Value(id3v2, "TCOM"));
putRawValue(KEY_SONG_TITLE, getID3V2Value(id3v2, "TIT2"), rawProperties);
putRawValue(KEY_ARTIST, getID3V2Value(id3v2, "TPE1"), rawProperties);
putRawValue(KEY_ALBUM_TITLE, getID3V2Value(id3v2, "TALB"), rawProperties);
putRawValue(KEY_YEAR_RELEASED, getID3V2Value(id3v2, "TDRC"), rawProperties);
putRawValue(KEY_COMMENT, getID3V2Value(id3v2, "COMM"), rawProperties);
putRawValue(KEY_TRACK_NUMBER, getID3V2Value(id3v2, "TRCK"), rawProperties);
putRawValue(KEY_GENRE, getID3V2Value(id3v2, "TCON"), rawProperties);
putRawValue(KEY_COMPOSER, getID3V2Value(id3v2, "TCOM"), rawProperties);
// TODO sort out the lyrics
//System.out.println("Lyrics: " + getID3V2Value(id3v2, "SYLT"));
@@ -117,12 +132,12 @@ public class MP3MetadataExtracter extends AbstractMetadataExtracter
System.out.println("Lyrics3 tag found.");
if (lyrics3Tag instanceof Lyrics3v2)
{
setTagValue(props, PROP_SONG_TITLE, getLyrics3v2Value((Lyrics3v2)lyrics3Tag, "TIT2"));
setTagValue(props, PROP_ARTIST, getLyrics3v2Value((Lyrics3v2)lyrics3Tag, "TPE1"));
setTagValue(props, PROP_ALBUM_TITLE, getLyrics3v2Value((Lyrics3v2)lyrics3Tag, "TALB"));
setTagValue(props, PROP_COMMENT, getLyrics3v2Value((Lyrics3v2)lyrics3Tag, "COMM"));
setTagValue(props, PROP_LYRICS, getLyrics3v2Value((Lyrics3v2)lyrics3Tag, "SYLT"));
setTagValue(props, PROP_COMPOSER, getLyrics3v2Value((Lyrics3v2)lyrics3Tag, "TCOM"));
putRawValue(KEY_SONG_TITLE, getLyrics3v2Value((Lyrics3v2)lyrics3Tag, "TIT2"), rawProperties);
putRawValue(KEY_ARTIST, getLyrics3v2Value((Lyrics3v2)lyrics3Tag, "TPE1"), rawProperties);
putRawValue(KEY_ALBUM_TITLE, getLyrics3v2Value((Lyrics3v2)lyrics3Tag, "TALB"), rawProperties);
putRawValue(KEY_COMMENT, getLyrics3v2Value((Lyrics3v2)lyrics3Tag, "COMM"), rawProperties);
putRawValue(KEY_LYRICS, getLyrics3v2Value((Lyrics3v2)lyrics3Tag, "SYLT"), rawProperties);
putRawValue(KEY_COMPOSER, getLyrics3v2Value((Lyrics3v2)lyrics3Tag, "TCOM"), rawProperties);
}
}
@@ -132,20 +147,14 @@ public class MP3MetadataExtracter extends AbstractMetadataExtracter
tempFile.delete();
}
// Set the destination values
if (props.get(PROP_SONG_TITLE) != null)
{
destination.put(ContentModel.PROP_TITLE, props.get(PROP_SONG_TITLE));
}
if (props.get(PROP_ARTIST) != null)
{
destination.put(ContentModel.PROP_AUTHOR, props.get(PROP_ARTIST));
}
String description = getDescription(props);
String description = getDescription(rawProperties);
if (description != null)
{
destination.put(ContentModel.PROP_DESCRIPTION, description);
putRawValue(KEY_DESCRIPTION, description, rawProperties);
}
// Done
return rawProperties;
}
@@ -155,17 +164,17 @@ public class MP3MetadataExtracter extends AbstractMetadataExtracter
* @param props the properties extracted from the file
* @return the description
*/
private String getDescription(Map<QName, Serializable> props)
private String getDescription(Map<String, Serializable> props)
{
StringBuilder result = new StringBuilder();
if (props.get(PROP_SONG_TITLE) != null && props.get(PROP_ARTIST) != null && props.get(PROP_ALBUM_TITLE) != null)
if (props.get(KEY_SONG_TITLE) != null && props.get(KEY_ARTIST) != null && props.get(KEY_ALBUM_TITLE) != null)
{
result
.append(props.get(PROP_SONG_TITLE))
.append(props.get(KEY_SONG_TITLE))
.append(" - ")
.append(props.get(PROP_ALBUM_TITLE))
.append(props.get(KEY_ALBUM_TITLE))
.append(" (")
.append(props.get(PROP_ARTIST))
.append(props.get(KEY_ARTIST))
.append(")");
}
@@ -173,26 +182,6 @@ public class MP3MetadataExtracter extends AbstractMetadataExtracter
return result.toString();
}
/**
*
* @param props
* @param propQName
* @param propvalue
*/
private void setTagValue(Map<QName, Serializable> props, QName propQName, String propvalue)
{
if (propvalue != null && propvalue.length() != 0)
{
trimPut(propQName, propvalue, props);
}
}
/**
*
* @param lyrics3Tag
* @param name
* @return
*/
private String getLyrics3v2Value(Lyrics3v2 lyrics3Tag, String name)
{
String result = "";
@@ -210,10 +199,6 @@ public class MP3MetadataExtracter extends AbstractMetadataExtracter
/**
* Get the ID3V2 tag value in a safe way
*
* @param id3v2
* @param name
* @return
*/
private String getID3V2Value(AbstractID3v2 id3v2, String name)
{
@@ -231,5 +216,4 @@ public class MP3MetadataExtracter extends AbstractMetadataExtracter
return result;
}
}

View File

@@ -0,0 +1,20 @@
#
# MP3MetadataExtracter - default mapping
#
# author: Derek Hulley
# Namespaces
namespace.prefix.cm=http://www.alfresco.org/model/content/1.0
namespace.prefix.music=music
# Mappings
songTitle=music:songTitle, cm:title
albumTitle=music:albumTitle
artist=music:artist, cm:author
description=cm:description
comment=music:comment
yearReleased=music:yearReleased
trackNumber=music:trackNumber
genre=music:genre
composer=music:composer
lyrics=music:lyrics

View File

@@ -30,29 +30,39 @@ import java.io.Serializable;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Calendar;
import java.util.Date;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import org.alfresco.model.ContentModel;
import org.alfresco.service.cmr.repository.ContentIOException;
import org.alfresco.service.cmr.repository.ContentReader;
import org.alfresco.service.namespace.QName;
import org.apache.poi.poifs.eventfilesystem.POIFSReader;
import org.apache.poi.poifs.eventfilesystem.POIFSReaderEvent;
import org.apache.poi.poifs.eventfilesystem.POIFSReaderListener;
import org.apache.poi.poifs.filesystem.DocumentInputStream;
/**
* Outlook format email meta-data extractor
* Outlook format email meta-data extractor extracting the following values:
* <pre>
* <b>sentDate:</b> -- cm:sentdate
* <b>originator:</b> -- cm:originator, cm:author
* <b>addressee:</b> -- cm:addressee
* <b>addressees:</b> -- cm:addressees
* <b>subjectLine:</b> -- cm:subjectline, cm:description
* </pre>
*
* @since 2.1
* @author Kevin Roast
*/
public class MailMetadataExtracter extends AbstractMetadataExtracter
public class MailMetadataExtracter extends AbstractMappingMetadataExtracter
{
public static String[] SUPPORTED_MIMETYPES = new String[] {
"message/rfc822"};
private static final String KEY_SENT_DATE = "sentDate";
private static final String KEY_ORIGINATOR = "originator";
private static final String KEY_ADDRESSEE = "addressee";
private static final String KEY_ADDRESSEES = "addressees";
private static final String KEY_SUBJECT = "subjectLine";
public static String[] SUPPORTED_MIMETYPES = new String[] {"message/rfc822"};
private static final String STREAM_PREFIX = "__substg1.0_";
private static final int STREAM_PREFIX_LENGTH = STREAM_PREFIX.length();
@@ -62,11 +72,14 @@ public class MailMetadataExtracter extends AbstractMetadataExtracter
public MailMetadataExtracter()
{
super(new HashSet<String>(Arrays.asList(SUPPORTED_MIMETYPES)), 1.0, 1000);
super(new HashSet<String>(Arrays.asList(SUPPORTED_MIMETYPES)));
}
public void extractInternal(ContentReader reader, final Map<QName, Serializable> destination) throws Throwable
@Override
public Map<String, Serializable> extractRaw(ContentReader reader) throws Throwable
{
final Map<String, Serializable> rawProperties = newRawMap();
POIFSReaderListener readerListener = new POIFSReaderListener()
{
public void processPOIFSReaderEvent(final POIFSReaderEvent event)
@@ -76,7 +89,7 @@ public class MailMetadataExtracter extends AbstractMetadataExtracter
if (event.getName().startsWith(STREAM_PREFIX))
{
StreamHandler handler = new StreamHandler(event.getName(), event.getStream());
handler.process(destination);
handler.process(rawProperties);
}
}
catch (Exception ex)
@@ -109,7 +122,7 @@ public class MailMetadataExtracter extends AbstractMetadataExtracter
// store multi-value extracted property
if (this.receipientEmails.get().size() != 0)
{
destination.put(ContentModel.PROP_ADDRESSEES, (Serializable)receipientEmails.get());
putRawValue(KEY_ADDRESSEES, (Serializable)receipientEmails.get(), rawProperties);
}
}
finally
@@ -119,6 +132,8 @@ public class MailMetadataExtracter extends AbstractMetadataExtracter
try { is.close(); } catch (IOException e) {}
}
}
// Done
return rawProperties;
}
private static String convertExchangeAddress(String email)
@@ -138,6 +153,7 @@ public class MailMetadataExtracter extends AbstractMetadataExtracter
private static final String ENCODING_BINARY = "0102";
private static final String ENCODING_UNICODE = "001F";
@SuppressWarnings("unused")
private static final String SUBSTG_MESSAGEBODY = "1000";
private static final String SUBSTG_RECIPIENTEMAIL = "39FE"; // 7bit email address
private static final String SUBSTG_RECIPIENTSEARCH = "300B"; // address 'search' variant
@@ -158,12 +174,12 @@ public class MailMetadataExtracter extends AbstractMetadataExtracter
this.stream = stream;
}
void process(final Map<QName, Serializable> destination)
void process(final Map<String, Serializable> destination)
throws IOException
{
if (type.equals(SUBSTG_SENDEREMAIL))
{
destination.put(ContentModel.PROP_ORIGINATOR, convertExchangeAddress(extractText()));
putRawValue(KEY_ORIGINATOR, convertExchangeAddress(extractText()), destination);
}
else if (type.equals(SUBSTG_RECIPIENTEMAIL))
{
@@ -192,11 +208,11 @@ public class MailMetadataExtracter extends AbstractMetadataExtracter
}
else if (type.equals(SUBSTG_RECEIVEDEMAIL))
{
destination.put(ContentModel.PROP_ADDRESSEE, convertExchangeAddress(extractText()));
putRawValue(KEY_ADDRESSEE, convertExchangeAddress(extractText()), destination);
}
else if (type.equals(SUBSTG_SUBJECT))
{
destination.put(ContentModel.PROP_SUBJECT, extractText());
putRawValue(KEY_SUBJECT, extractText(), destination);
}
else if (type.equals(SUBSTG_DATE))
{
@@ -221,7 +237,7 @@ public class MailMetadataExtracter extends AbstractMetadataExtracter
String strMinute = date.substring(dateIndex + 10, dateIndex + 12);
c.set(Calendar.MINUTE, Integer.parseInt(strMinute));
c.set(Calendar.SECOND, 0);
destination.put(ContentModel.PROP_SENTDATE, c.getTime());
putRawValue(KEY_SENT_DATE, c.getTime(), destination);
}
}
}

View File

@@ -0,0 +1,14 @@
#
# MailMetadataExtracter - default mapping
#
# author: Derek Hulley
# Namespaces
namespace.prefix.cm=http://www.alfresco.org/model/content/1.0
# Mappings
sentDate=cm:sentdate
originator=cm:originator, cm:author
addressee=cm:addressee
addressees=cm:addressees
subjectLine=cm:subjectline, cm:description

View File

@@ -32,10 +32,8 @@ import java.util.Map;
import net.sf.jooreports.openoffice.connection.OpenOfficeConnection;
import org.alfresco.model.ContentModel;
import org.alfresco.repo.content.MimetypeMap;
import org.alfresco.service.cmr.repository.ContentReader;
import org.alfresco.service.namespace.QName;
import org.alfresco.util.PropertyCheck;
import org.alfresco.util.TempFileProvider;
@@ -48,25 +46,33 @@ import com.sun.star.ucb.XFileIdentifierConverter;
import com.sun.star.uno.UnoRuntime;
/**
* Extracts values from Star Office documents into the following:
* <pre>
* <b>author:</b> -- cm:author
* <b>title:</b> -- cm:title
* <b>description:</b> -- cm:description
* </pre>
*
* @author Jesper Steen Møller
*/
public class OpenOfficeMetadataExtracter extends AbstractMetadataExtracter
public class OpenOfficeMetadataExtracter extends AbstractMappingMetadataExtracter
{
private static final String KEY_AUTHOR = "author";
private static final String KEY_TITLE = "title";
private static final String KEY_DESCRIPTION = "description";
public static String[] SUPPORTED_MIMETYPES = new String[] {
MimetypeMap.MIMETYPE_STAROFFICE5_WRITER,
MimetypeMap.MIMETYPE_STAROFFICE5_IMPRESS,
MimetypeMap.MIMETYPE_OPENOFFICE1_WRITER,
MimetypeMap.MIMETYPE_OPENOFFICE1_IMPRESS
// Add the other OpenOffice.org stuff here
// In fact, other types may apply as well, but should be counted as lower
// quality since they involve conversion.
};
private OpenOfficeConnection connection;
public OpenOfficeMetadataExtracter()
{
super(new HashSet<String>(Arrays.asList(SUPPORTED_MIMETYPES)), 1.00, 10000);
super(new HashSet<String>(Arrays.asList(SUPPORTED_MIMETYPES)));
}
public void setConnection(OpenOfficeConnection connection)
@@ -119,8 +125,11 @@ public class OpenOfficeMetadataExtracter extends AbstractMetadataExtracter
return connection.isConnected();
}
public void extractInternal(ContentReader reader, final Map<QName, Serializable> destination) throws Throwable
@Override
public Map<String, Serializable> extractRaw(ContentReader reader) throws Throwable
{
Map<String, Serializable> rawProperties = newRawMap();
String sourceMimetype = reader.getMimetype();
// create temporary files to convert from and to
@@ -154,24 +163,17 @@ public class OpenOfficeMetadataExtracter extends AbstractMetadataExtracter
infoSupplier
.getDocumentInfo());
// Titled aspect
trimPut(ContentModel.PROP_TITLE, propSet.getPropertyValue("Title"), destination);
trimPut(ContentModel.PROP_DESCRIPTION, propSet.getPropertyValue("Subject"), destination);
// Auditable aspect
// trimPut(ContentModel.PROP_CREATED,
// si.getCreateDateTime(), destination);
trimPut(ContentModel.PROP_AUTHOR, propSet.getPropertyValue("Author"), destination);
// trimPut(ContentModel.PROP_MODIFIED,
// si.getLastSaveDateTime(), destination);
// trimPut(ContentModel.PROP_MODIFIER, si.getLastAuthor(),
// destination);
putRawValue(KEY_TITLE, propSet.getPropertyValue("Title").toString(), rawProperties);
putRawValue(KEY_DESCRIPTION, propSet.getPropertyValue("Subject").toString(), rawProperties);
putRawValue(KEY_AUTHOR, propSet.getPropertyValue("Author").toString(), rawProperties);
}
finally
{
document.dispose();
}
}
// Done
return rawProperties;
}
public String toUrl(File file, OpenOfficeConnection connection) throws ConnectException

View File

@@ -0,0 +1,12 @@
#
# OpenOfficeMetadataExtracter - default mapping
#
# author: Derek Hulley
# Namespaces
namespace.prefix.cm=http://www.alfresco.org/model/content/1.0
# Mappings
author=cm:author
title=cm:title
description=cm:description