mirror of
https://github.com/Alfresco/alfresco-community-repo.git
synced 2025-07-24 17:32:48 +00:00
Final fix for AR-357: Metadata extractors are configurable
git-svn-id: https://svn.alfresco.com/repos/alfresco-enterprise/alfresco/HEAD/root@6246 c4b6b30b-aa2e-2d43-bbcb-ca4b014f7261
This commit is contained in:
@@ -28,7 +28,6 @@ import java.io.InputStream;
|
||||
import java.io.InputStreamReader;
|
||||
import java.io.Reader;
|
||||
import java.io.Serializable;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
@@ -39,17 +38,26 @@ import javax.swing.text.html.HTML;
|
||||
import javax.swing.text.html.HTMLEditorKit;
|
||||
import javax.swing.text.html.parser.ParserDelegator;
|
||||
|
||||
import org.alfresco.model.ContentModel;
|
||||
import org.alfresco.repo.content.MimetypeMap;
|
||||
import org.alfresco.service.cmr.repository.ContentReader;
|
||||
import org.alfresco.service.namespace.QName;
|
||||
|
||||
/**
|
||||
* Extracts the following values from HTML documents:
|
||||
* <pre>
|
||||
* <b>author:</b> -- cm:author
|
||||
* <b>title:</b> -- cm:title
|
||||
* <b>description:</b> -- cm:description
|
||||
* </pre>
|
||||
*
|
||||
* @author Jesper Steen Møller
|
||||
* @author Derek Hulley
|
||||
*/
|
||||
public class HtmlMetadataExtracter extends AbstractMetadataExtracter
|
||||
public class HtmlMetadataExtracter extends AbstractMappingMetadataExtracter
|
||||
{
|
||||
private static final String KEY_AUTHOR = "author";
|
||||
private static final String KEY_TITLE = "title";
|
||||
private static final String KEY_DESCRIPTION= "description";
|
||||
|
||||
private static final Set<String> MIMETYPES = new HashSet<String>(5);
|
||||
static
|
||||
{
|
||||
@@ -59,12 +67,13 @@ public class HtmlMetadataExtracter extends AbstractMetadataExtracter
|
||||
|
||||
public HtmlMetadataExtracter()
|
||||
{
|
||||
super(MIMETYPES, 1.0, 1000);
|
||||
super(MIMETYPES);
|
||||
}
|
||||
|
||||
public void extractInternal(ContentReader reader, Map<QName, Serializable> destination) throws Throwable
|
||||
@Override
|
||||
protected Map<String, Serializable> extractRaw(ContentReader reader) throws Throwable
|
||||
{
|
||||
final Map<QName, Serializable> tempDestination = new HashMap<QName, Serializable>();
|
||||
final Map<String, Serializable> rawProperties = newRawMap();
|
||||
|
||||
HTMLEditorKit.ParserCallback callback = new HTMLEditorKit.ParserCallback()
|
||||
{
|
||||
@@ -106,7 +115,7 @@ public class HtmlMetadataExtracter extends AbstractMetadataExtracter
|
||||
}
|
||||
else if (HTML.Tag.TITLE.equals(t) && title != null)
|
||||
{
|
||||
trimPut(ContentModel.PROP_TITLE, title.toString(), tempDestination);
|
||||
putRawValue(KEY_TITLE, title.toString(), rawProperties);
|
||||
title = null;
|
||||
}
|
||||
}
|
||||
@@ -125,11 +134,11 @@ public class HtmlMetadataExtracter extends AbstractMetadataExtracter
|
||||
if (name.equalsIgnoreCase("creator") || name.equalsIgnoreCase("author")
|
||||
|| name.equalsIgnoreCase("dc.creator"))
|
||||
{
|
||||
trimPut(ContentModel.PROP_AUTHOR, valueO, tempDestination);
|
||||
putRawValue(KEY_AUTHOR, valueO.toString(), rawProperties);
|
||||
}
|
||||
if (name.equalsIgnoreCase("description") || name.equalsIgnoreCase("dc.description"))
|
||||
else if (name.equalsIgnoreCase("description") || name.equalsIgnoreCase("dc.description"))
|
||||
{
|
||||
trimPut(ContentModel.PROP_DESCRIPTION, valueO, tempDestination);
|
||||
putRawValue(KEY_DESCRIPTION, valueO.toString(), rawProperties);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -143,7 +152,7 @@ public class HtmlMetadataExtracter extends AbstractMetadataExtracter
|
||||
int tries = 0;
|
||||
while (tries < 3)
|
||||
{
|
||||
tempDestination.clear();
|
||||
rawProperties.clear();
|
||||
Reader r = null;
|
||||
InputStream cis = null;
|
||||
try
|
||||
@@ -153,7 +162,6 @@ public class HtmlMetadataExtracter extends AbstractMetadataExtracter
|
||||
r = new InputStreamReader(cis);
|
||||
HTMLEditorKit.Parser parser = new ParserDelegator();
|
||||
parser.parse(r, callback, tries > 0);
|
||||
destination.putAll(tempDestination);
|
||||
break;
|
||||
}
|
||||
catch (ChangedCharSetException ccse)
|
||||
@@ -173,5 +181,7 @@ public class HtmlMetadataExtracter extends AbstractMetadataExtracter
|
||||
cis.close();
|
||||
}
|
||||
}
|
||||
// Done
|
||||
return rawProperties;
|
||||
}
|
||||
}
|
||||
|
@@ -0,0 +1,12 @@
|
||||
#
|
||||
# HtmlMetadataExtracter - default mapping
|
||||
#
|
||||
# author: Derek Hulley
|
||||
|
||||
# Namespaces
|
||||
namespace.prefix.cm=http://www.alfresco.org/model/content/1.0
|
||||
|
||||
# Mappings
|
||||
author=cm:author
|
||||
title=cm:title
|
||||
description=cm:description
|
@@ -26,13 +26,12 @@ package org.alfresco.repo.content.metadata;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.Serializable;
|
||||
import java.util.HashMap;
|
||||
import java.util.Arrays;
|
||||
import java.util.HashSet;
|
||||
import java.util.Map;
|
||||
|
||||
import org.alfresco.model.ContentModel;
|
||||
import org.alfresco.repo.content.MimetypeMap;
|
||||
import org.alfresco.service.cmr.repository.ContentReader;
|
||||
import org.alfresco.service.namespace.QName;
|
||||
import org.alfresco.util.TempFileProvider;
|
||||
import org.farng.mp3.AbstractMP3FragmentBody;
|
||||
import org.farng.mp3.MP3File;
|
||||
@@ -44,30 +43,46 @@ import org.farng.mp3.lyrics3.Lyrics3v2;
|
||||
import org.farng.mp3.lyrics3.Lyrics3v2Field;
|
||||
|
||||
/**
|
||||
* Extracts the following values from MP3 files:
|
||||
* <pre>
|
||||
* <b>songTitle:</b> -- {music}songTitle, cm:title
|
||||
* <b>albumTitle:</b> -- {music}albumTitle
|
||||
* <b>artist:</b> -- {music}artist, cm:author
|
||||
* <b>description:</b> -- cm:description
|
||||
* <b>comment:</b> -- {music}comment
|
||||
* <b>yearReleased:</b> -- {music}yearReleased
|
||||
* <b>trackNumber:</b> -- {music}trackNumber
|
||||
* <b>genre:</b> -- {music}genre
|
||||
* <b>composer:</b> -- {music}composer
|
||||
* <b>lyrics:</b> -- {music}lyrics
|
||||
* </pre>
|
||||
*
|
||||
* @author Roy Wetherall
|
||||
*/
|
||||
public class MP3MetadataExtracter extends AbstractMetadataExtracter
|
||||
public class MP3MetadataExtracter extends AbstractMappingMetadataExtracter
|
||||
{
|
||||
private static final QName PROP_ALBUM_TITLE = QName.createQName("{music}albumTitle");
|
||||
private static final QName PROP_SONG_TITLE = QName.createQName("{music}songTitle");;
|
||||
private static final QName PROP_ARTIST = QName.createQName("{music}artist");;
|
||||
private static final QName PROP_COMMENT = QName.createQName("{music}comment");;
|
||||
private static final QName PROP_YEAR_RELEASED = QName.createQName("{music}yearReleased");;
|
||||
private static final QName PROP_TRACK_NUMBER = QName.createQName("{music}trackNumber");;
|
||||
private static final QName PROP_GENRE = QName.createQName("{music}genre");;
|
||||
private static final QName PROP_COMPOSER = QName.createQName("{music}composer");;
|
||||
private static final QName PROP_LYRICS = QName.createQName("{music}lyrics");;
|
||||
private static final String KEY_SONG_TITLE = "songTitle";
|
||||
private static final String KEY_ALBUM_TITLE = "albumTitle";
|
||||
private static final String KEY_ARTIST = "artist";
|
||||
private static final String KEY_DESCRIPTION = "description";
|
||||
private static final String KEY_COMMENT = "comment";
|
||||
private static final String KEY_YEAR_RELEASED = "yearReleased";
|
||||
private static final String KEY_TRACK_NUMBER = "trackNumber";
|
||||
private static final String KEY_GENRE = "genre";
|
||||
private static final String KEY_COMPOSER = "composer";
|
||||
private static final String KEY_LYRICS = "lyrics";
|
||||
|
||||
public static String[] SUPPORTED_MIMETYPES = new String[] {MimetypeMap.MIMETYPE_MP3 };
|
||||
|
||||
public MP3MetadataExtracter()
|
||||
{
|
||||
super(MimetypeMap.MIMETYPE_MP3, 1.0, 1000);
|
||||
super(new HashSet<String>(Arrays.asList(SUPPORTED_MIMETYPES)));
|
||||
}
|
||||
|
||||
public void extractInternal(
|
||||
ContentReader reader,
|
||||
Map<QName, Serializable> destination) throws Throwable
|
||||
@Override
|
||||
public Map<String, Serializable> extractRaw(ContentReader reader) throws Throwable
|
||||
{
|
||||
Map<QName, Serializable> props = new HashMap<QName, Serializable>();
|
||||
Map<String, Serializable> rawProperties = newRawMap();
|
||||
|
||||
// Create a temp file
|
||||
File tempFile = TempFileProvider.createTempFile("MP3MetadataExtracter_", ".tmp");
|
||||
@@ -81,30 +96,30 @@ public class MP3MetadataExtracter extends AbstractMetadataExtracter
|
||||
ID3v1 id3v1 = mp3File.getID3v1Tag();
|
||||
if (id3v1 != null)
|
||||
{
|
||||
setTagValue(props, PROP_ALBUM_TITLE, id3v1.getAlbum());
|
||||
setTagValue(props, PROP_SONG_TITLE, id3v1.getTitle());
|
||||
setTagValue(props, PROP_ARTIST, id3v1.getArtist());
|
||||
setTagValue(props, PROP_COMMENT, id3v1.getComment());
|
||||
setTagValue(props, PROP_YEAR_RELEASED, id3v1.getYear());
|
||||
putRawValue(KEY_ALBUM_TITLE, id3v1.getAlbum(), rawProperties);
|
||||
putRawValue(KEY_SONG_TITLE, id3v1.getTitle(), rawProperties);
|
||||
putRawValue(KEY_ARTIST, id3v1.getArtist(), rawProperties);
|
||||
putRawValue(KEY_COMMENT, id3v1.getComment(), rawProperties);
|
||||
putRawValue(KEY_YEAR_RELEASED, id3v1.getYear(), rawProperties);
|
||||
|
||||
// TODO sort out the genre
|
||||
//setTagValue(props, MusicModel.PROP_GENRE, id3v1.getGenre());
|
||||
//putRawValue(MusicModel.KEY_GENRE, id3v1.getGenre());
|
||||
|
||||
// TODO sort out the size
|
||||
//setTagValue(props, MusicModel.PROP_SIZE, id3v1.getSize());
|
||||
//putRawValue(MusicModel.KEY_SIZE, id3v1.getSize());
|
||||
}
|
||||
|
||||
AbstractID3v2 id3v2 = mp3File.getID3v2Tag();
|
||||
if (id3v2 != null)
|
||||
{
|
||||
setTagValue(props, PROP_SONG_TITLE, getID3V2Value(id3v2, "TIT2"));
|
||||
setTagValue(props, PROP_ARTIST, getID3V2Value(id3v2, "TPE1"));
|
||||
setTagValue(props, PROP_ALBUM_TITLE, getID3V2Value(id3v2, "TALB"));
|
||||
setTagValue(props, PROP_YEAR_RELEASED, getID3V2Value(id3v2, "TDRC"));
|
||||
setTagValue(props, PROP_COMMENT, getID3V2Value(id3v2, "COMM"));
|
||||
setTagValue(props, PROP_TRACK_NUMBER, getID3V2Value(id3v2, "TRCK"));
|
||||
setTagValue(props, PROP_GENRE, getID3V2Value(id3v2, "TCON"));
|
||||
setTagValue(props, PROP_COMPOSER, getID3V2Value(id3v2, "TCOM"));
|
||||
putRawValue(KEY_SONG_TITLE, getID3V2Value(id3v2, "TIT2"), rawProperties);
|
||||
putRawValue(KEY_ARTIST, getID3V2Value(id3v2, "TPE1"), rawProperties);
|
||||
putRawValue(KEY_ALBUM_TITLE, getID3V2Value(id3v2, "TALB"), rawProperties);
|
||||
putRawValue(KEY_YEAR_RELEASED, getID3V2Value(id3v2, "TDRC"), rawProperties);
|
||||
putRawValue(KEY_COMMENT, getID3V2Value(id3v2, "COMM"), rawProperties);
|
||||
putRawValue(KEY_TRACK_NUMBER, getID3V2Value(id3v2, "TRCK"), rawProperties);
|
||||
putRawValue(KEY_GENRE, getID3V2Value(id3v2, "TCON"), rawProperties);
|
||||
putRawValue(KEY_COMPOSER, getID3V2Value(id3v2, "TCOM"), rawProperties);
|
||||
|
||||
// TODO sort out the lyrics
|
||||
//System.out.println("Lyrics: " + getID3V2Value(id3v2, "SYLT"));
|
||||
@@ -117,12 +132,12 @@ public class MP3MetadataExtracter extends AbstractMetadataExtracter
|
||||
System.out.println("Lyrics3 tag found.");
|
||||
if (lyrics3Tag instanceof Lyrics3v2)
|
||||
{
|
||||
setTagValue(props, PROP_SONG_TITLE, getLyrics3v2Value((Lyrics3v2)lyrics3Tag, "TIT2"));
|
||||
setTagValue(props, PROP_ARTIST, getLyrics3v2Value((Lyrics3v2)lyrics3Tag, "TPE1"));
|
||||
setTagValue(props, PROP_ALBUM_TITLE, getLyrics3v2Value((Lyrics3v2)lyrics3Tag, "TALB"));
|
||||
setTagValue(props, PROP_COMMENT, getLyrics3v2Value((Lyrics3v2)lyrics3Tag, "COMM"));
|
||||
setTagValue(props, PROP_LYRICS, getLyrics3v2Value((Lyrics3v2)lyrics3Tag, "SYLT"));
|
||||
setTagValue(props, PROP_COMPOSER, getLyrics3v2Value((Lyrics3v2)lyrics3Tag, "TCOM"));
|
||||
putRawValue(KEY_SONG_TITLE, getLyrics3v2Value((Lyrics3v2)lyrics3Tag, "TIT2"), rawProperties);
|
||||
putRawValue(KEY_ARTIST, getLyrics3v2Value((Lyrics3v2)lyrics3Tag, "TPE1"), rawProperties);
|
||||
putRawValue(KEY_ALBUM_TITLE, getLyrics3v2Value((Lyrics3v2)lyrics3Tag, "TALB"), rawProperties);
|
||||
putRawValue(KEY_COMMENT, getLyrics3v2Value((Lyrics3v2)lyrics3Tag, "COMM"), rawProperties);
|
||||
putRawValue(KEY_LYRICS, getLyrics3v2Value((Lyrics3v2)lyrics3Tag, "SYLT"), rawProperties);
|
||||
putRawValue(KEY_COMPOSER, getLyrics3v2Value((Lyrics3v2)lyrics3Tag, "TCOM"), rawProperties);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -132,20 +147,14 @@ public class MP3MetadataExtracter extends AbstractMetadataExtracter
|
||||
tempFile.delete();
|
||||
}
|
||||
|
||||
// Set the destination values
|
||||
if (props.get(PROP_SONG_TITLE) != null)
|
||||
{
|
||||
destination.put(ContentModel.PROP_TITLE, props.get(PROP_SONG_TITLE));
|
||||
}
|
||||
if (props.get(PROP_ARTIST) != null)
|
||||
{
|
||||
destination.put(ContentModel.PROP_AUTHOR, props.get(PROP_ARTIST));
|
||||
}
|
||||
String description = getDescription(props);
|
||||
String description = getDescription(rawProperties);
|
||||
if (description != null)
|
||||
{
|
||||
destination.put(ContentModel.PROP_DESCRIPTION, description);
|
||||
putRawValue(KEY_DESCRIPTION, description, rawProperties);
|
||||
}
|
||||
|
||||
// Done
|
||||
return rawProperties;
|
||||
}
|
||||
|
||||
|
||||
@@ -155,17 +164,17 @@ public class MP3MetadataExtracter extends AbstractMetadataExtracter
|
||||
* @param props the properties extracted from the file
|
||||
* @return the description
|
||||
*/
|
||||
private String getDescription(Map<QName, Serializable> props)
|
||||
private String getDescription(Map<String, Serializable> props)
|
||||
{
|
||||
StringBuilder result = new StringBuilder();
|
||||
if (props.get(PROP_SONG_TITLE) != null && props.get(PROP_ARTIST) != null && props.get(PROP_ALBUM_TITLE) != null)
|
||||
if (props.get(KEY_SONG_TITLE) != null && props.get(KEY_ARTIST) != null && props.get(KEY_ALBUM_TITLE) != null)
|
||||
{
|
||||
result
|
||||
.append(props.get(PROP_SONG_TITLE))
|
||||
.append(props.get(KEY_SONG_TITLE))
|
||||
.append(" - ")
|
||||
.append(props.get(PROP_ALBUM_TITLE))
|
||||
.append(props.get(KEY_ALBUM_TITLE))
|
||||
.append(" (")
|
||||
.append(props.get(PROP_ARTIST))
|
||||
.append(props.get(KEY_ARTIST))
|
||||
.append(")");
|
||||
|
||||
}
|
||||
@@ -173,26 +182,6 @@ public class MP3MetadataExtracter extends AbstractMetadataExtracter
|
||||
return result.toString();
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @param props
|
||||
* @param propQName
|
||||
* @param propvalue
|
||||
*/
|
||||
private void setTagValue(Map<QName, Serializable> props, QName propQName, String propvalue)
|
||||
{
|
||||
if (propvalue != null && propvalue.length() != 0)
|
||||
{
|
||||
trimPut(propQName, propvalue, props);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @param lyrics3Tag
|
||||
* @param name
|
||||
* @return
|
||||
*/
|
||||
private String getLyrics3v2Value(Lyrics3v2 lyrics3Tag, String name)
|
||||
{
|
||||
String result = "";
|
||||
@@ -210,10 +199,6 @@ public class MP3MetadataExtracter extends AbstractMetadataExtracter
|
||||
|
||||
/**
|
||||
* Get the ID3V2 tag value in a safe way
|
||||
*
|
||||
* @param id3v2
|
||||
* @param name
|
||||
* @return
|
||||
*/
|
||||
private String getID3V2Value(AbstractID3v2 id3v2, String name)
|
||||
{
|
||||
@@ -231,5 +216,4 @@ public class MP3MetadataExtracter extends AbstractMetadataExtracter
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
}
|
||||
|
@@ -0,0 +1,20 @@
|
||||
#
|
||||
# MP3MetadataExtracter - default mapping
|
||||
#
|
||||
# author: Derek Hulley
|
||||
|
||||
# Namespaces
|
||||
namespace.prefix.cm=http://www.alfresco.org/model/content/1.0
|
||||
namespace.prefix.music=music
|
||||
|
||||
# Mappings
|
||||
songTitle=music:songTitle, cm:title
|
||||
albumTitle=music:albumTitle
|
||||
artist=music:artist, cm:author
|
||||
description=cm:description
|
||||
comment=music:comment
|
||||
yearReleased=music:yearReleased
|
||||
trackNumber=music:trackNumber
|
||||
genre=music:genre
|
||||
composer=music:composer
|
||||
lyrics=music:lyrics
|
@@ -30,29 +30,39 @@ import java.io.Serializable;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Calendar;
|
||||
import java.util.Date;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import org.alfresco.model.ContentModel;
|
||||
import org.alfresco.service.cmr.repository.ContentIOException;
|
||||
import org.alfresco.service.cmr.repository.ContentReader;
|
||||
import org.alfresco.service.namespace.QName;
|
||||
import org.apache.poi.poifs.eventfilesystem.POIFSReader;
|
||||
import org.apache.poi.poifs.eventfilesystem.POIFSReaderEvent;
|
||||
import org.apache.poi.poifs.eventfilesystem.POIFSReaderListener;
|
||||
import org.apache.poi.poifs.filesystem.DocumentInputStream;
|
||||
|
||||
/**
|
||||
* Outlook format email meta-data extractor
|
||||
* Outlook format email meta-data extractor extracting the following values:
|
||||
* <pre>
|
||||
* <b>sentDate:</b> -- cm:sentdate
|
||||
* <b>originator:</b> -- cm:originator, cm:author
|
||||
* <b>addressee:</b> -- cm:addressee
|
||||
* <b>addressees:</b> -- cm:addressees
|
||||
* <b>subjectLine:</b> -- cm:subjectline, cm:description
|
||||
* </pre>
|
||||
*
|
||||
* @since 2.1
|
||||
* @author Kevin Roast
|
||||
*/
|
||||
public class MailMetadataExtracter extends AbstractMetadataExtracter
|
||||
public class MailMetadataExtracter extends AbstractMappingMetadataExtracter
|
||||
{
|
||||
public static String[] SUPPORTED_MIMETYPES = new String[] {
|
||||
"message/rfc822"};
|
||||
private static final String KEY_SENT_DATE = "sentDate";
|
||||
private static final String KEY_ORIGINATOR = "originator";
|
||||
private static final String KEY_ADDRESSEE = "addressee";
|
||||
private static final String KEY_ADDRESSEES = "addressees";
|
||||
private static final String KEY_SUBJECT = "subjectLine";
|
||||
|
||||
public static String[] SUPPORTED_MIMETYPES = new String[] {"message/rfc822"};
|
||||
|
||||
private static final String STREAM_PREFIX = "__substg1.0_";
|
||||
private static final int STREAM_PREFIX_LENGTH = STREAM_PREFIX.length();
|
||||
@@ -62,11 +72,14 @@ public class MailMetadataExtracter extends AbstractMetadataExtracter
|
||||
|
||||
public MailMetadataExtracter()
|
||||
{
|
||||
super(new HashSet<String>(Arrays.asList(SUPPORTED_MIMETYPES)), 1.0, 1000);
|
||||
super(new HashSet<String>(Arrays.asList(SUPPORTED_MIMETYPES)));
|
||||
}
|
||||
|
||||
public void extractInternal(ContentReader reader, final Map<QName, Serializable> destination) throws Throwable
|
||||
@Override
|
||||
public Map<String, Serializable> extractRaw(ContentReader reader) throws Throwable
|
||||
{
|
||||
final Map<String, Serializable> rawProperties = newRawMap();
|
||||
|
||||
POIFSReaderListener readerListener = new POIFSReaderListener()
|
||||
{
|
||||
public void processPOIFSReaderEvent(final POIFSReaderEvent event)
|
||||
@@ -76,7 +89,7 @@ public class MailMetadataExtracter extends AbstractMetadataExtracter
|
||||
if (event.getName().startsWith(STREAM_PREFIX))
|
||||
{
|
||||
StreamHandler handler = new StreamHandler(event.getName(), event.getStream());
|
||||
handler.process(destination);
|
||||
handler.process(rawProperties);
|
||||
}
|
||||
}
|
||||
catch (Exception ex)
|
||||
@@ -109,7 +122,7 @@ public class MailMetadataExtracter extends AbstractMetadataExtracter
|
||||
// store multi-value extracted property
|
||||
if (this.receipientEmails.get().size() != 0)
|
||||
{
|
||||
destination.put(ContentModel.PROP_ADDRESSEES, (Serializable)receipientEmails.get());
|
||||
putRawValue(KEY_ADDRESSEES, (Serializable)receipientEmails.get(), rawProperties);
|
||||
}
|
||||
}
|
||||
finally
|
||||
@@ -119,6 +132,8 @@ public class MailMetadataExtracter extends AbstractMetadataExtracter
|
||||
try { is.close(); } catch (IOException e) {}
|
||||
}
|
||||
}
|
||||
// Done
|
||||
return rawProperties;
|
||||
}
|
||||
|
||||
private static String convertExchangeAddress(String email)
|
||||
@@ -138,6 +153,7 @@ public class MailMetadataExtracter extends AbstractMetadataExtracter
|
||||
private static final String ENCODING_BINARY = "0102";
|
||||
private static final String ENCODING_UNICODE = "001F";
|
||||
|
||||
@SuppressWarnings("unused")
|
||||
private static final String SUBSTG_MESSAGEBODY = "1000";
|
||||
private static final String SUBSTG_RECIPIENTEMAIL = "39FE"; // 7bit email address
|
||||
private static final String SUBSTG_RECIPIENTSEARCH = "300B"; // address 'search' variant
|
||||
@@ -158,12 +174,12 @@ public class MailMetadataExtracter extends AbstractMetadataExtracter
|
||||
this.stream = stream;
|
||||
}
|
||||
|
||||
void process(final Map<QName, Serializable> destination)
|
||||
void process(final Map<String, Serializable> destination)
|
||||
throws IOException
|
||||
{
|
||||
if (type.equals(SUBSTG_SENDEREMAIL))
|
||||
{
|
||||
destination.put(ContentModel.PROP_ORIGINATOR, convertExchangeAddress(extractText()));
|
||||
putRawValue(KEY_ORIGINATOR, convertExchangeAddress(extractText()), destination);
|
||||
}
|
||||
else if (type.equals(SUBSTG_RECIPIENTEMAIL))
|
||||
{
|
||||
@@ -192,11 +208,11 @@ public class MailMetadataExtracter extends AbstractMetadataExtracter
|
||||
}
|
||||
else if (type.equals(SUBSTG_RECEIVEDEMAIL))
|
||||
{
|
||||
destination.put(ContentModel.PROP_ADDRESSEE, convertExchangeAddress(extractText()));
|
||||
putRawValue(KEY_ADDRESSEE, convertExchangeAddress(extractText()), destination);
|
||||
}
|
||||
else if (type.equals(SUBSTG_SUBJECT))
|
||||
{
|
||||
destination.put(ContentModel.PROP_SUBJECT, extractText());
|
||||
putRawValue(KEY_SUBJECT, extractText(), destination);
|
||||
}
|
||||
else if (type.equals(SUBSTG_DATE))
|
||||
{
|
||||
@@ -221,7 +237,7 @@ public class MailMetadataExtracter extends AbstractMetadataExtracter
|
||||
String strMinute = date.substring(dateIndex + 10, dateIndex + 12);
|
||||
c.set(Calendar.MINUTE, Integer.parseInt(strMinute));
|
||||
c.set(Calendar.SECOND, 0);
|
||||
destination.put(ContentModel.PROP_SENTDATE, c.getTime());
|
||||
putRawValue(KEY_SENT_DATE, c.getTime(), destination);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@@ -0,0 +1,14 @@
|
||||
#
|
||||
# MailMetadataExtracter - default mapping
|
||||
#
|
||||
# author: Derek Hulley
|
||||
|
||||
# Namespaces
|
||||
namespace.prefix.cm=http://www.alfresco.org/model/content/1.0
|
||||
|
||||
# Mappings
|
||||
sentDate=cm:sentdate
|
||||
originator=cm:originator, cm:author
|
||||
addressee=cm:addressee
|
||||
addressees=cm:addressees
|
||||
subjectLine=cm:subjectline, cm:description
|
@@ -32,10 +32,8 @@ import java.util.Map;
|
||||
|
||||
import net.sf.jooreports.openoffice.connection.OpenOfficeConnection;
|
||||
|
||||
import org.alfresco.model.ContentModel;
|
||||
import org.alfresco.repo.content.MimetypeMap;
|
||||
import org.alfresco.service.cmr.repository.ContentReader;
|
||||
import org.alfresco.service.namespace.QName;
|
||||
import org.alfresco.util.PropertyCheck;
|
||||
import org.alfresco.util.TempFileProvider;
|
||||
|
||||
@@ -48,25 +46,33 @@ import com.sun.star.ucb.XFileIdentifierConverter;
|
||||
import com.sun.star.uno.UnoRuntime;
|
||||
|
||||
/**
|
||||
* Extracts values from Star Office documents into the following:
|
||||
* <pre>
|
||||
* <b>author:</b> -- cm:author
|
||||
* <b>title:</b> -- cm:title
|
||||
* <b>description:</b> -- cm:description
|
||||
* </pre>
|
||||
*
|
||||
* @author Jesper Steen Møller
|
||||
*/
|
||||
public class OpenOfficeMetadataExtracter extends AbstractMetadataExtracter
|
||||
public class OpenOfficeMetadataExtracter extends AbstractMappingMetadataExtracter
|
||||
{
|
||||
private static final String KEY_AUTHOR = "author";
|
||||
private static final String KEY_TITLE = "title";
|
||||
private static final String KEY_DESCRIPTION = "description";
|
||||
|
||||
public static String[] SUPPORTED_MIMETYPES = new String[] {
|
||||
MimetypeMap.MIMETYPE_STAROFFICE5_WRITER,
|
||||
MimetypeMap.MIMETYPE_STAROFFICE5_IMPRESS,
|
||||
MimetypeMap.MIMETYPE_OPENOFFICE1_WRITER,
|
||||
MimetypeMap.MIMETYPE_OPENOFFICE1_IMPRESS
|
||||
// Add the other OpenOffice.org stuff here
|
||||
// In fact, other types may apply as well, but should be counted as lower
|
||||
// quality since they involve conversion.
|
||||
};
|
||||
|
||||
private OpenOfficeConnection connection;
|
||||
|
||||
public OpenOfficeMetadataExtracter()
|
||||
{
|
||||
super(new HashSet<String>(Arrays.asList(SUPPORTED_MIMETYPES)), 1.00, 10000);
|
||||
super(new HashSet<String>(Arrays.asList(SUPPORTED_MIMETYPES)));
|
||||
}
|
||||
|
||||
public void setConnection(OpenOfficeConnection connection)
|
||||
@@ -119,8 +125,11 @@ public class OpenOfficeMetadataExtracter extends AbstractMetadataExtracter
|
||||
return connection.isConnected();
|
||||
}
|
||||
|
||||
public void extractInternal(ContentReader reader, final Map<QName, Serializable> destination) throws Throwable
|
||||
@Override
|
||||
public Map<String, Serializable> extractRaw(ContentReader reader) throws Throwable
|
||||
{
|
||||
Map<String, Serializable> rawProperties = newRawMap();
|
||||
|
||||
String sourceMimetype = reader.getMimetype();
|
||||
|
||||
// create temporary files to convert from and to
|
||||
@@ -154,24 +163,17 @@ public class OpenOfficeMetadataExtracter extends AbstractMetadataExtracter
|
||||
infoSupplier
|
||||
.getDocumentInfo());
|
||||
|
||||
// Titled aspect
|
||||
trimPut(ContentModel.PROP_TITLE, propSet.getPropertyValue("Title"), destination);
|
||||
trimPut(ContentModel.PROP_DESCRIPTION, propSet.getPropertyValue("Subject"), destination);
|
||||
|
||||
// Auditable aspect
|
||||
// trimPut(ContentModel.PROP_CREATED,
|
||||
// si.getCreateDateTime(), destination);
|
||||
trimPut(ContentModel.PROP_AUTHOR, propSet.getPropertyValue("Author"), destination);
|
||||
// trimPut(ContentModel.PROP_MODIFIED,
|
||||
// si.getLastSaveDateTime(), destination);
|
||||
// trimPut(ContentModel.PROP_MODIFIER, si.getLastAuthor(),
|
||||
// destination);
|
||||
putRawValue(KEY_TITLE, propSet.getPropertyValue("Title").toString(), rawProperties);
|
||||
putRawValue(KEY_DESCRIPTION, propSet.getPropertyValue("Subject").toString(), rawProperties);
|
||||
putRawValue(KEY_AUTHOR, propSet.getPropertyValue("Author").toString(), rawProperties);
|
||||
}
|
||||
finally
|
||||
{
|
||||
document.dispose();
|
||||
}
|
||||
}
|
||||
// Done
|
||||
return rawProperties;
|
||||
}
|
||||
|
||||
public String toUrl(File file, OpenOfficeConnection connection) throws ConnectException
|
||||
|
@@ -0,0 +1,12 @@
|
||||
#
|
||||
# OpenOfficeMetadataExtracter - default mapping
|
||||
#
|
||||
# author: Derek Hulley
|
||||
|
||||
# Namespaces
|
||||
namespace.prefix.cm=http://www.alfresco.org/model/content/1.0
|
||||
|
||||
# Mappings
|
||||
author=cm:author
|
||||
title=cm:title
|
||||
description=cm:description
|
Reference in New Issue
Block a user