diff --git a/config/alfresco/content-services-context.xml b/config/alfresco/content-services-context.xml
index c8d813b7fb..57634868a1 100644
--- a/config/alfresco/content-services-context.xml
+++ b/config/alfresco/content-services-context.xml
@@ -112,6 +112,7 @@
+
diff --git a/config/alfresco/model/contentModel.xml b/config/alfresco/model/contentModel.xml
index dc45553c07..06ea1289ff 100644
--- a/config/alfresco/model/contentModel.xml
+++ b/config/alfresco/model/contentModel.xml
@@ -649,6 +649,29 @@
+
+ Emailed
+
+
+ Originator
+ d:text
+
+
+ Addressee
+ d:text
+
+
+ Addressees
+ d:text
+ true
+
+
+ Sent Date
+ d:datetime
+
+
+
+
diff --git a/source/java/org/alfresco/repo/content/metadata/AbstractMetadataExtracter.java b/source/java/org/alfresco/repo/content/metadata/AbstractMetadataExtracter.java
index 3a8b8d4f3e..50548b8089 100644
--- a/source/java/org/alfresco/repo/content/metadata/AbstractMetadataExtracter.java
+++ b/source/java/org/alfresco/repo/content/metadata/AbstractMetadataExtracter.java
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2005 Jesper Steen Møller
+ * Copyright (C) 2005 Jesper Steen M�ller
*
* Licensed under the Mozilla Public License version 1.1
* with a permitted attribution clause. You may obtain a
@@ -31,11 +31,11 @@ import org.apache.commons.logging.LogFactory;
/**
*
- * @author Jesper Steen Møller
+ * @author Jesper Steen Møller
*/
abstract public class AbstractMetadataExtracter implements MetadataExtracter
{
- private static Log logger = LogFactory.getLog(AbstractMetadataExtracter.class);
+ protected static Log logger = LogFactory.getLog(AbstractMetadataExtracter.class);
private MimetypeService mimetypeService;
private MetadataExtracterRegistry registry;
diff --git a/source/java/org/alfresco/repo/content/metadata/AbstractMetadataExtracterTest.java b/source/java/org/alfresco/repo/content/metadata/AbstractMetadataExtracterTest.java
index 51ead94f88..bbb17d5153 100644
--- a/source/java/org/alfresco/repo/content/metadata/AbstractMetadataExtracterTest.java
+++ b/source/java/org/alfresco/repo/content/metadata/AbstractMetadataExtracterTest.java
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2005 Jesper Steen Møller
+ * Copyright (C) 2005 Jesper Steen M�ller
*
* Licensed under the Mozilla Public License version 1.1
* with a permitted attribution clause. You may obtain a
@@ -38,7 +38,7 @@ import org.springframework.context.ApplicationContext;
* @see org.alfresco.repo.content.metadata.MetadataExtracter
* @see org.alfresco.repo.content.metadata.AbstractMetadataExtracter
*
- * @author Jesper Steen Møller
+ * @author Jesper Steen Møller
*/
public abstract class AbstractMetadataExtracterTest extends TestCase
{
diff --git a/source/java/org/alfresco/repo/content/metadata/HtmlMetadataExtracter.java b/source/java/org/alfresco/repo/content/metadata/HtmlMetadataExtracter.java
index 63b731e3c2..d8c4657c50 100644
--- a/source/java/org/alfresco/repo/content/metadata/HtmlMetadataExtracter.java
+++ b/source/java/org/alfresco/repo/content/metadata/HtmlMetadataExtracter.java
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2005 Jesper Steen Møller
+ * Copyright (C) 2005 Jesper Steen M�ller
*
* Licensed under the Mozilla Public License version 1.1
* with a permitted attribution clause. You may obtain a
@@ -38,7 +38,7 @@ import org.alfresco.service.namespace.QName;
/**
*
- * @author Jesper Steen Møller
+ * @author Jesper Steen Møller
*/
public class HtmlMetadataExtracter extends AbstractMetadataExtracter
{
diff --git a/source/java/org/alfresco/repo/content/metadata/HtmlMetadataExtracterTest.java b/source/java/org/alfresco/repo/content/metadata/HtmlMetadataExtracterTest.java
index a4ed6efaba..986c67a9d4 100644
--- a/source/java/org/alfresco/repo/content/metadata/HtmlMetadataExtracterTest.java
+++ b/source/java/org/alfresco/repo/content/metadata/HtmlMetadataExtracterTest.java
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2005 Jesper Steen Møller
+ * Copyright (C) 2005 Jesper Steen M�ller
*
* Licensed under the Mozilla Public License version 1.1
* with a permitted attribution clause. You may obtain a
@@ -19,7 +19,7 @@ package org.alfresco.repo.content.metadata;
import org.alfresco.repo.content.MimetypeMap;
/**
- * @author Jesper Steen Møller
+ * @author Jesper Steen Møller
*/
public class HtmlMetadataExtracterTest extends AbstractMetadataExtracterTest
{
diff --git a/source/java/org/alfresco/repo/content/metadata/MailMetadataExtracter.java b/source/java/org/alfresco/repo/content/metadata/MailMetadataExtracter.java
new file mode 100644
index 0000000000..6f527ece14
--- /dev/null
+++ b/source/java/org/alfresco/repo/content/metadata/MailMetadataExtracter.java
@@ -0,0 +1,180 @@
+/*
+ * Copyright (C) 2005 Jesper Steen M�ller
+ *
+ * Licensed under the Mozilla Public License version 1.1
+ * with a permitted attribution clause. You may obtain a
+ * copy of the License at
+ *
+ * http://www.alfresco.org/legal/license.txt
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
+ * either express or implied. See the License for the specific
+ * language governing permissions and limitations under the
+ * License.
+ */
+package org.alfresco.repo.content.metadata;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.Serializable;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Date;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+
+import org.alfresco.service.cmr.repository.ContentIOException;
+import org.alfresco.service.cmr.repository.ContentReader;
+import org.alfresco.service.namespace.NamespaceService;
+import org.alfresco.service.namespace.QName;
+import org.apache.poi.poifs.eventfilesystem.POIFSReader;
+import org.apache.poi.poifs.eventfilesystem.POIFSReaderEvent;
+import org.apache.poi.poifs.eventfilesystem.POIFSReaderListener;
+import org.apache.poi.poifs.filesystem.DocumentInputStream;
+
+/**
+ * Outlook format email meta-data extractor
+ *
+ * @author Kevin Roast
+ */
+public class MailMetadataExtracter extends AbstractMetadataExtracter
+{
+ public static String[] SUPPORTED_MIMETYPES = new String[] {
+ "message/rfc822"};
+
+ private static final String SUBSTG_MESSAGEBODY = "__substg1.0_1000001E";
+ private static final String SUBSTG_RECIPIENTEMAIL = "__substg1.0_39FE001E";
+ private static final String SUBSTG_RECEIVEDEMAIL = "__substg1.0_0076001E";
+ private static final String SUBSTG_SENDEREMAIL = "__substg1.0_0C1F001E";
+ private static final String SUBSTG_DATE = "__substg1.0_00470102";
+
+ private static final QName ASPECT_MAILED = QName.createQName(NamespaceService.CONTENT_MODEL_1_0_URI, "emailed");
+ private static final QName PROP_SENTDATE = QName.createQName(NamespaceService.CONTENT_MODEL_1_0_URI, "sentdate");
+ private static final QName PROP_ORIGINATOR = QName.createQName(NamespaceService.CONTENT_MODEL_1_0_URI, "originator");
+ private static final QName PROP_ADDRESSEE = QName.createQName(NamespaceService.CONTENT_MODEL_1_0_URI, "addressee");
+ private static final QName PROP_ADDRESSEES = QName.createQName(NamespaceService.CONTENT_MODEL_1_0_URI, "addressees");
+
+ // the CC: email addresses
+ private ThreadLocal> receipientEmails = new ThreadLocal>();
+
+ public MailMetadataExtracter()
+ {
+ super(new HashSet(Arrays.asList(SUPPORTED_MIMETYPES)), 1.0, 1000);
+ }
+
+ public void extractInternal(ContentReader reader, final Map destination) throws Throwable
+ {
+ POIFSReaderListener readerListener = new POIFSReaderListener()
+ {
+ public void processPOIFSReaderEvent(final POIFSReaderEvent event)
+ {
+ try
+ {
+ String name = event.getName();
+
+ if (name.equals(SUBSTG_RECIPIENTEMAIL)) // a recipient email address
+ {
+ String emailAddress = readPlainTextStream(event.getStream());
+ receipientEmails.get().add(convertExchangeAddress(emailAddress));
+ }
+ else if (name.equals(SUBSTG_RECEIVEDEMAIL)) // receiver email address
+ {
+ String emailAddress = readPlainTextStream(event.getStream());
+ destination.put(PROP_ADDRESSEE, convertExchangeAddress(emailAddress));
+ }
+ else if (name.equals(SUBSTG_SENDEREMAIL)) // sender email - NOTE either email OR full Exchange data e.g. : /O=HOSTEDSERVICE2/OU=FIRST ADMINISTRATIVE GROUP/CN=RECIPIENTS/CN=MIKE.FARMAN@BEN
+ {
+ String emailAddress = readPlainTextStream(event.getStream());
+ destination.put(PROP_ORIGINATOR, convertExchangeAddress(emailAddress));
+ }
+ else if (name.equals(SUBSTG_DATE))
+ {
+ // the date is not really plain text - but it's easier to parse as such
+ String date = readPlainTextStream(event.getStream());
+ int valueIndex = date.indexOf("l=");
+ if (valueIndex != -1)
+ {
+ int dateIndex = date.indexOf('-', valueIndex);
+ if (dateIndex != -1)
+ {
+ dateIndex++;
+ String strYear = date.substring(dateIndex, dateIndex + 2);
+ int year = Integer.parseInt(strYear) + (2000 - 1900);
+ String strMonth = date.substring(dateIndex + 2, dateIndex + 4);
+ int month = Integer.parseInt(strMonth) - 1;
+ String strDay = date.substring(dateIndex + 4, dateIndex + 6);
+ int day = Integer.parseInt(strDay);
+ String strHour = date.substring(dateIndex + 6, dateIndex + 8);
+ int hour = Integer.parseInt(strHour);
+ String strMinute = date.substring(dateIndex + 10, dateIndex + 12);
+ int minute = Integer.parseInt(strMinute);
+ destination.put(PROP_SENTDATE, new Date(year, month, day, hour, minute));
+ }
+ }
+ }
+ }
+ catch (Exception ex)
+ {
+ throw new ContentIOException("Property set stream: " + event.getPath() + event.getName(), ex);
+ }
+ }
+ };
+
+ InputStream is = null;
+ try
+ {
+ this.receipientEmails.set(new ArrayList());
+
+ is = reader.getContentInputStream();
+ POIFSReader poiFSReader = new POIFSReader();
+ poiFSReader.registerListener(readerListener);
+
+ try
+ {
+ poiFSReader.read(is);
+ }
+ catch (IOException err)
+ {
+ // probably not an Outlook format MSG - ignore for now
+ logger.warn("Unable to extract meta-data from message: " + err.getMessage());
+ }
+
+ // store multi-value extracted property
+ if (receipientEmails.get().size() != 0)
+ {
+ destination.put(PROP_ADDRESSEES, (Serializable)receipientEmails.get());
+ }
+ }
+ finally
+ {
+ if (is != null)
+ {
+ try { is.close(); } catch (IOException e) {}
+ }
+ }
+ }
+
+ private static String readPlainTextStream(DocumentInputStream stream)
+ throws IOException
+ {
+ byte[] data = new byte[stream.available()];
+ int read = stream.read(data);
+ return new String(data);
+ }
+
+ private static String convertExchangeAddress(String email)
+ {
+ if (email.lastIndexOf("/CN=") == -1)
+ {
+ return email;
+ }
+ else
+ {
+ // found a full Exchange format To header
+ return email.substring(email.lastIndexOf("/CN=") + 4);
+ }
+ }
+}
diff --git a/source/java/org/alfresco/repo/content/metadata/MetadataExtracter.java b/source/java/org/alfresco/repo/content/metadata/MetadataExtracter.java
index 50b61930da..1cc07c5dc7 100644
--- a/source/java/org/alfresco/repo/content/metadata/MetadataExtracter.java
+++ b/source/java/org/alfresco/repo/content/metadata/MetadataExtracter.java
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2005 Jesper Steen Møller
+ * Copyright (C) 2005 Jesper Steen M�ller
*
* Licensed under the Mozilla Public License version 1.1
* with a permitted attribution clause. You may obtain a
@@ -25,7 +25,7 @@ import org.alfresco.service.namespace.QName;
/**
*
- * @author Jesper Steen Møller
+ * @author Jesper Steen Møller
*/
public interface MetadataExtracter
{
diff --git a/source/java/org/alfresco/repo/content/metadata/MetadataExtracterRegistry.java b/source/java/org/alfresco/repo/content/metadata/MetadataExtracterRegistry.java
index 0a3fd4fe1a..8dd87fb63b 100644
--- a/source/java/org/alfresco/repo/content/metadata/MetadataExtracterRegistry.java
+++ b/source/java/org/alfresco/repo/content/metadata/MetadataExtracterRegistry.java
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2005 Jesper Steen Møller
+ * Copyright (C) 2005 Jesper Steen M�ller
*
* Licensed under the Mozilla Public License version 1.1
* with a permitted attribution clause. You may obtain a
@@ -36,7 +36,7 @@ import org.apache.commons.logging.LogFactory;
* The extracters themselves know how well they are able to extract metadata.
*
* @see org.alfresco.repo.content.metadata.MetadataExtracter
- * @author Jesper Steen Møller
+ * @author Jesper Steen Møller
*/
public class MetadataExtracterRegistry
{
diff --git a/source/java/org/alfresco/repo/content/metadata/OfficeMetadataExtracter.java b/source/java/org/alfresco/repo/content/metadata/OfficeMetadataExtracter.java
index 250f9bdfc2..179be80aa7 100644
--- a/source/java/org/alfresco/repo/content/metadata/OfficeMetadataExtracter.java
+++ b/source/java/org/alfresco/repo/content/metadata/OfficeMetadataExtracter.java
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2005 Jesper Steen Møller
+ * Copyright (C) 2005 Jesper Steen M�ller
*
* Licensed under the Mozilla Public License version 1.1
* with a permitted attribution clause. You may obtain a
@@ -28,7 +28,6 @@ import org.alfresco.repo.content.MimetypeMap;
import org.alfresco.service.cmr.repository.ContentIOException;
import org.alfresco.service.cmr.repository.ContentReader;
import org.alfresco.service.namespace.QName;
-import org.apache.poi.hpsf.DocumentSummaryInformation;
import org.apache.poi.hpsf.PropertySet;
import org.apache.poi.hpsf.PropertySetFactory;
import org.apache.poi.hpsf.SummaryInformation;
@@ -37,15 +36,16 @@ import org.apache.poi.poifs.eventfilesystem.POIFSReaderEvent;
import org.apache.poi.poifs.eventfilesystem.POIFSReaderListener;
/**
+ * Office file format Metadata Extracter
*
- * @author Jesper Steen Møller
+ * @author Jesper Steen Møller
*/
public class OfficeMetadataExtracter extends AbstractMetadataExtracter
{
public static String[] SUPPORTED_MIMETYPES = new String[] {
MimetypeMap.MIMETYPE_WORD,
MimetypeMap.MIMETYPE_EXCEL,
- MimetypeMap.MIMETYPE_PPT };
+ MimetypeMap.MIMETYPE_PPT};
public OfficeMetadataExtracter()
{
@@ -64,6 +64,7 @@ public class OfficeMetadataExtracter extends AbstractMetadataExtracter
if (ps instanceof SummaryInformation)
{
SummaryInformation si = (SummaryInformation) ps;
+
// Titled aspect
trimPut(ContentModel.PROP_TITLE, si.getTitle(), destination);
trimPut(ContentModel.PROP_DESCRIPTION, si.getSubject(), destination);
@@ -73,16 +74,6 @@ public class OfficeMetadataExtracter extends AbstractMetadataExtracter
trimPut(ContentModel.PROP_MODIFIED, si.getLastSaveDateTime(), destination);
trimPut(ContentModel.PROP_AUTHOR, si.getAuthor(), destination);
}
- else if (ps instanceof DocumentSummaryInformation)
- {
-// DocumentSummaryInformation dsi = (DocumentSummaryInformation) ps;
-
- // These are not really interesting to any aspect:
- // trimPut(ContentModel.PROP_xxx, dsi.getCompany(),
- // destination);
- // trimPut(ContentModel.PROP_yyy, dsi.getManager(),
- // destination);
- }
}
catch (Exception ex)
{
@@ -90,6 +81,7 @@ public class OfficeMetadataExtracter extends AbstractMetadataExtracter
}
}
};
+
InputStream is = null;
try
{
diff --git a/source/java/org/alfresco/repo/content/metadata/OfficeMetadataExtracterTest.java b/source/java/org/alfresco/repo/content/metadata/OfficeMetadataExtracterTest.java
index 2630ee4ab1..6249415fdd 100644
--- a/source/java/org/alfresco/repo/content/metadata/OfficeMetadataExtracterTest.java
+++ b/source/java/org/alfresco/repo/content/metadata/OfficeMetadataExtracterTest.java
@@ -4,7 +4,7 @@ package org.alfresco.repo.content.metadata;
/**
* @see org.alfresco.repo.content.transform.OfficeMetadataExtracter
*
- * @author Jesper Steen Møller
+ * @author Jesper Steen Møller
*/
public class OfficeMetadataExtracterTest extends AbstractMetadataExtracterTest
{
diff --git a/source/java/org/alfresco/repo/content/metadata/OpenOfficeMetadataExtracter.java b/source/java/org/alfresco/repo/content/metadata/OpenOfficeMetadataExtracter.java
index 754fc952c2..389b5a46bf 100644
--- a/source/java/org/alfresco/repo/content/metadata/OpenOfficeMetadataExtracter.java
+++ b/source/java/org/alfresco/repo/content/metadata/OpenOfficeMetadataExtracter.java
@@ -42,7 +42,7 @@ import com.sun.star.ucb.XFileIdentifierConverter;
import com.sun.star.uno.UnoRuntime;
/**
- * @author Jesper Steen M�ller
+ * @author Jesper Steen Møller
*/
public class OpenOfficeMetadataExtracter extends AbstractMetadataExtracter
{
diff --git a/source/java/org/alfresco/repo/content/metadata/OpenOfficeMetadataExtracterTest.java b/source/java/org/alfresco/repo/content/metadata/OpenOfficeMetadataExtracterTest.java
index 26bf1a28f1..9648c52bba 100644
--- a/source/java/org/alfresco/repo/content/metadata/OpenOfficeMetadataExtracterTest.java
+++ b/source/java/org/alfresco/repo/content/metadata/OpenOfficeMetadataExtracterTest.java
@@ -21,7 +21,7 @@ import net.sf.jooreports.openoffice.connection.SocketOpenOfficeConnection;
/**
- * @author Jesper Steen M�ller
+ * @author Jesper Steen Møller
*/
public class OpenOfficeMetadataExtracterTest extends AbstractMetadataExtracterTest
{
diff --git a/source/java/org/alfresco/repo/content/metadata/PdfBoxMetadataExtracter.java b/source/java/org/alfresco/repo/content/metadata/PdfBoxMetadataExtracter.java
index e335c6cf83..5f0d796058 100644
--- a/source/java/org/alfresco/repo/content/metadata/PdfBoxMetadataExtracter.java
+++ b/source/java/org/alfresco/repo/content/metadata/PdfBoxMetadataExtracter.java
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2005 Jesper Steen Møller
+ * Copyright (C) 2005 Jesper Steen M�ller
*
* Licensed under the Mozilla Public License version 1.1
* with a permitted attribution clause. You may obtain a
@@ -31,7 +31,7 @@ import org.pdfbox.pdmodel.PDDocumentInformation;
/**
*
- * @author Jesper Steen Møller
+ * @author Jesper Steen Møller
*/
public class PdfBoxMetadataExtracter extends AbstractMetadataExtracter
{
diff --git a/source/java/org/alfresco/repo/content/metadata/PdfBoxMetadataExtracterTest.java b/source/java/org/alfresco/repo/content/metadata/PdfBoxMetadataExtracterTest.java
index 6b82efa45e..70049a7e92 100644
--- a/source/java/org/alfresco/repo/content/metadata/PdfBoxMetadataExtracterTest.java
+++ b/source/java/org/alfresco/repo/content/metadata/PdfBoxMetadataExtracterTest.java
@@ -5,7 +5,7 @@ import org.alfresco.repo.content.MimetypeMap;
/**
* @see org.alfresco.repo.content.metadata.PdfBoxMetadataExtracter
*
- * @author Jesper Steen Møller
+ * @author Jesper Steen Møller
*/
public class PdfBoxMetadataExtracterTest extends AbstractMetadataExtracterTest
{