diff --git a/config/alfresco/content-services-context.xml b/config/alfresco/content-services-context.xml index c8d813b7fb..57634868a1 100644 --- a/config/alfresco/content-services-context.xml +++ b/config/alfresco/content-services-context.xml @@ -112,6 +112,7 @@ + diff --git a/config/alfresco/model/contentModel.xml b/config/alfresco/model/contentModel.xml index dc45553c07..06ea1289ff 100644 --- a/config/alfresco/model/contentModel.xml +++ b/config/alfresco/model/contentModel.xml @@ -649,6 +649,29 @@ + + Emailed + + + Originator + d:text + + + Addressee + d:text + + + Addressees + d:text + true + + + Sent Date + d:datetime + + + + diff --git a/source/java/org/alfresco/repo/content/metadata/AbstractMetadataExtracter.java b/source/java/org/alfresco/repo/content/metadata/AbstractMetadataExtracter.java index 3a8b8d4f3e..50548b8089 100644 --- a/source/java/org/alfresco/repo/content/metadata/AbstractMetadataExtracter.java +++ b/source/java/org/alfresco/repo/content/metadata/AbstractMetadataExtracter.java @@ -1,5 +1,5 @@ /* - * Copyright (C) 2005 Jesper Steen Møller + * Copyright (C) 2005 Jesper Steen M�ller * * Licensed under the Mozilla Public License version 1.1 * with a permitted attribution clause. You may obtain a @@ -31,11 +31,11 @@ import org.apache.commons.logging.LogFactory; /** * - * @author Jesper Steen Møller + * @author Jesper Steen Møller */ abstract public class AbstractMetadataExtracter implements MetadataExtracter { - private static Log logger = LogFactory.getLog(AbstractMetadataExtracter.class); + protected static Log logger = LogFactory.getLog(AbstractMetadataExtracter.class); private MimetypeService mimetypeService; private MetadataExtracterRegistry registry; diff --git a/source/java/org/alfresco/repo/content/metadata/AbstractMetadataExtracterTest.java b/source/java/org/alfresco/repo/content/metadata/AbstractMetadataExtracterTest.java index 51ead94f88..bbb17d5153 100644 --- a/source/java/org/alfresco/repo/content/metadata/AbstractMetadataExtracterTest.java +++ b/source/java/org/alfresco/repo/content/metadata/AbstractMetadataExtracterTest.java @@ -1,5 +1,5 @@ /* - * Copyright (C) 2005 Jesper Steen Møller + * Copyright (C) 2005 Jesper Steen M�ller * * Licensed under the Mozilla Public License version 1.1 * with a permitted attribution clause. You may obtain a @@ -38,7 +38,7 @@ import org.springframework.context.ApplicationContext; * @see org.alfresco.repo.content.metadata.MetadataExtracter * @see org.alfresco.repo.content.metadata.AbstractMetadataExtracter * - * @author Jesper Steen Møller + * @author Jesper Steen Møller */ public abstract class AbstractMetadataExtracterTest extends TestCase { diff --git a/source/java/org/alfresco/repo/content/metadata/HtmlMetadataExtracter.java b/source/java/org/alfresco/repo/content/metadata/HtmlMetadataExtracter.java index 63b731e3c2..d8c4657c50 100644 --- a/source/java/org/alfresco/repo/content/metadata/HtmlMetadataExtracter.java +++ b/source/java/org/alfresco/repo/content/metadata/HtmlMetadataExtracter.java @@ -1,5 +1,5 @@ /* - * Copyright (C) 2005 Jesper Steen Møller + * Copyright (C) 2005 Jesper Steen M�ller * * Licensed under the Mozilla Public License version 1.1 * with a permitted attribution clause. You may obtain a @@ -38,7 +38,7 @@ import org.alfresco.service.namespace.QName; /** * - * @author Jesper Steen Møller + * @author Jesper Steen Møller */ public class HtmlMetadataExtracter extends AbstractMetadataExtracter { diff --git a/source/java/org/alfresco/repo/content/metadata/HtmlMetadataExtracterTest.java b/source/java/org/alfresco/repo/content/metadata/HtmlMetadataExtracterTest.java index a4ed6efaba..986c67a9d4 100644 --- a/source/java/org/alfresco/repo/content/metadata/HtmlMetadataExtracterTest.java +++ b/source/java/org/alfresco/repo/content/metadata/HtmlMetadataExtracterTest.java @@ -1,5 +1,5 @@ /* - * Copyright (C) 2005 Jesper Steen Møller + * Copyright (C) 2005 Jesper Steen M�ller * * Licensed under the Mozilla Public License version 1.1 * with a permitted attribution clause. You may obtain a @@ -19,7 +19,7 @@ package org.alfresco.repo.content.metadata; import org.alfresco.repo.content.MimetypeMap; /** - * @author Jesper Steen Møller + * @author Jesper Steen Møller */ public class HtmlMetadataExtracterTest extends AbstractMetadataExtracterTest { diff --git a/source/java/org/alfresco/repo/content/metadata/MailMetadataExtracter.java b/source/java/org/alfresco/repo/content/metadata/MailMetadataExtracter.java new file mode 100644 index 0000000000..6f527ece14 --- /dev/null +++ b/source/java/org/alfresco/repo/content/metadata/MailMetadataExtracter.java @@ -0,0 +1,180 @@ +/* + * Copyright (C) 2005 Jesper Steen M�ller + * + * Licensed under the Mozilla Public License version 1.1 + * with a permitted attribution clause. You may obtain a + * copy of the License at + * + * http://www.alfresco.org/legal/license.txt + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied. See the License for the specific + * language governing permissions and limitations under the + * License. + */ +package org.alfresco.repo.content.metadata; + +import java.io.IOException; +import java.io.InputStream; +import java.io.Serializable; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Date; +import java.util.HashSet; +import java.util.List; +import java.util.Map; + +import org.alfresco.service.cmr.repository.ContentIOException; +import org.alfresco.service.cmr.repository.ContentReader; +import org.alfresco.service.namespace.NamespaceService; +import org.alfresco.service.namespace.QName; +import org.apache.poi.poifs.eventfilesystem.POIFSReader; +import org.apache.poi.poifs.eventfilesystem.POIFSReaderEvent; +import org.apache.poi.poifs.eventfilesystem.POIFSReaderListener; +import org.apache.poi.poifs.filesystem.DocumentInputStream; + +/** + * Outlook format email meta-data extractor + * + * @author Kevin Roast + */ +public class MailMetadataExtracter extends AbstractMetadataExtracter +{ + public static String[] SUPPORTED_MIMETYPES = new String[] { + "message/rfc822"}; + + private static final String SUBSTG_MESSAGEBODY = "__substg1.0_1000001E"; + private static final String SUBSTG_RECIPIENTEMAIL = "__substg1.0_39FE001E"; + private static final String SUBSTG_RECEIVEDEMAIL = "__substg1.0_0076001E"; + private static final String SUBSTG_SENDEREMAIL = "__substg1.0_0C1F001E"; + private static final String SUBSTG_DATE = "__substg1.0_00470102"; + + private static final QName ASPECT_MAILED = QName.createQName(NamespaceService.CONTENT_MODEL_1_0_URI, "emailed"); + private static final QName PROP_SENTDATE = QName.createQName(NamespaceService.CONTENT_MODEL_1_0_URI, "sentdate"); + private static final QName PROP_ORIGINATOR = QName.createQName(NamespaceService.CONTENT_MODEL_1_0_URI, "originator"); + private static final QName PROP_ADDRESSEE = QName.createQName(NamespaceService.CONTENT_MODEL_1_0_URI, "addressee"); + private static final QName PROP_ADDRESSEES = QName.createQName(NamespaceService.CONTENT_MODEL_1_0_URI, "addressees"); + + // the CC: email addresses + private ThreadLocal> receipientEmails = new ThreadLocal>(); + + public MailMetadataExtracter() + { + super(new HashSet(Arrays.asList(SUPPORTED_MIMETYPES)), 1.0, 1000); + } + + public void extractInternal(ContentReader reader, final Map destination) throws Throwable + { + POIFSReaderListener readerListener = new POIFSReaderListener() + { + public void processPOIFSReaderEvent(final POIFSReaderEvent event) + { + try + { + String name = event.getName(); + + if (name.equals(SUBSTG_RECIPIENTEMAIL)) // a recipient email address + { + String emailAddress = readPlainTextStream(event.getStream()); + receipientEmails.get().add(convertExchangeAddress(emailAddress)); + } + else if (name.equals(SUBSTG_RECEIVEDEMAIL)) // receiver email address + { + String emailAddress = readPlainTextStream(event.getStream()); + destination.put(PROP_ADDRESSEE, convertExchangeAddress(emailAddress)); + } + else if (name.equals(SUBSTG_SENDEREMAIL)) // sender email - NOTE either email OR full Exchange data e.g. : /O=HOSTEDSERVICE2/OU=FIRST ADMINISTRATIVE GROUP/CN=RECIPIENTS/CN=MIKE.FARMAN@BEN + { + String emailAddress = readPlainTextStream(event.getStream()); + destination.put(PROP_ORIGINATOR, convertExchangeAddress(emailAddress)); + } + else if (name.equals(SUBSTG_DATE)) + { + // the date is not really plain text - but it's easier to parse as such + String date = readPlainTextStream(event.getStream()); + int valueIndex = date.indexOf("l="); + if (valueIndex != -1) + { + int dateIndex = date.indexOf('-', valueIndex); + if (dateIndex != -1) + { + dateIndex++; + String strYear = date.substring(dateIndex, dateIndex + 2); + int year = Integer.parseInt(strYear) + (2000 - 1900); + String strMonth = date.substring(dateIndex + 2, dateIndex + 4); + int month = Integer.parseInt(strMonth) - 1; + String strDay = date.substring(dateIndex + 4, dateIndex + 6); + int day = Integer.parseInt(strDay); + String strHour = date.substring(dateIndex + 6, dateIndex + 8); + int hour = Integer.parseInt(strHour); + String strMinute = date.substring(dateIndex + 10, dateIndex + 12); + int minute = Integer.parseInt(strMinute); + destination.put(PROP_SENTDATE, new Date(year, month, day, hour, minute)); + } + } + } + } + catch (Exception ex) + { + throw new ContentIOException("Property set stream: " + event.getPath() + event.getName(), ex); + } + } + }; + + InputStream is = null; + try + { + this.receipientEmails.set(new ArrayList()); + + is = reader.getContentInputStream(); + POIFSReader poiFSReader = new POIFSReader(); + poiFSReader.registerListener(readerListener); + + try + { + poiFSReader.read(is); + } + catch (IOException err) + { + // probably not an Outlook format MSG - ignore for now + logger.warn("Unable to extract meta-data from message: " + err.getMessage()); + } + + // store multi-value extracted property + if (receipientEmails.get().size() != 0) + { + destination.put(PROP_ADDRESSEES, (Serializable)receipientEmails.get()); + } + } + finally + { + if (is != null) + { + try { is.close(); } catch (IOException e) {} + } + } + } + + private static String readPlainTextStream(DocumentInputStream stream) + throws IOException + { + byte[] data = new byte[stream.available()]; + int read = stream.read(data); + return new String(data); + } + + private static String convertExchangeAddress(String email) + { + if (email.lastIndexOf("/CN=") == -1) + { + return email; + } + else + { + // found a full Exchange format To header + return email.substring(email.lastIndexOf("/CN=") + 4); + } + } +} diff --git a/source/java/org/alfresco/repo/content/metadata/MetadataExtracter.java b/source/java/org/alfresco/repo/content/metadata/MetadataExtracter.java index 50b61930da..1cc07c5dc7 100644 --- a/source/java/org/alfresco/repo/content/metadata/MetadataExtracter.java +++ b/source/java/org/alfresco/repo/content/metadata/MetadataExtracter.java @@ -1,5 +1,5 @@ /* - * Copyright (C) 2005 Jesper Steen Møller + * Copyright (C) 2005 Jesper Steen M�ller * * Licensed under the Mozilla Public License version 1.1 * with a permitted attribution clause. You may obtain a @@ -25,7 +25,7 @@ import org.alfresco.service.namespace.QName; /** * - * @author Jesper Steen Møller + * @author Jesper Steen Møller */ public interface MetadataExtracter { diff --git a/source/java/org/alfresco/repo/content/metadata/MetadataExtracterRegistry.java b/source/java/org/alfresco/repo/content/metadata/MetadataExtracterRegistry.java index 0a3fd4fe1a..8dd87fb63b 100644 --- a/source/java/org/alfresco/repo/content/metadata/MetadataExtracterRegistry.java +++ b/source/java/org/alfresco/repo/content/metadata/MetadataExtracterRegistry.java @@ -1,5 +1,5 @@ /* - * Copyright (C) 2005 Jesper Steen Møller + * Copyright (C) 2005 Jesper Steen M�ller * * Licensed under the Mozilla Public License version 1.1 * with a permitted attribution clause. You may obtain a @@ -36,7 +36,7 @@ import org.apache.commons.logging.LogFactory; * The extracters themselves know how well they are able to extract metadata. * * @see org.alfresco.repo.content.metadata.MetadataExtracter - * @author Jesper Steen Møller + * @author Jesper Steen Møller */ public class MetadataExtracterRegistry { diff --git a/source/java/org/alfresco/repo/content/metadata/OfficeMetadataExtracter.java b/source/java/org/alfresco/repo/content/metadata/OfficeMetadataExtracter.java index 250f9bdfc2..179be80aa7 100644 --- a/source/java/org/alfresco/repo/content/metadata/OfficeMetadataExtracter.java +++ b/source/java/org/alfresco/repo/content/metadata/OfficeMetadataExtracter.java @@ -1,5 +1,5 @@ /* - * Copyright (C) 2005 Jesper Steen Møller + * Copyright (C) 2005 Jesper Steen M�ller * * Licensed under the Mozilla Public License version 1.1 * with a permitted attribution clause. You may obtain a @@ -28,7 +28,6 @@ import org.alfresco.repo.content.MimetypeMap; import org.alfresco.service.cmr.repository.ContentIOException; import org.alfresco.service.cmr.repository.ContentReader; import org.alfresco.service.namespace.QName; -import org.apache.poi.hpsf.DocumentSummaryInformation; import org.apache.poi.hpsf.PropertySet; import org.apache.poi.hpsf.PropertySetFactory; import org.apache.poi.hpsf.SummaryInformation; @@ -37,15 +36,16 @@ import org.apache.poi.poifs.eventfilesystem.POIFSReaderEvent; import org.apache.poi.poifs.eventfilesystem.POIFSReaderListener; /** + * Office file format Metadata Extracter * - * @author Jesper Steen Møller + * @author Jesper Steen Møller */ public class OfficeMetadataExtracter extends AbstractMetadataExtracter { public static String[] SUPPORTED_MIMETYPES = new String[] { MimetypeMap.MIMETYPE_WORD, MimetypeMap.MIMETYPE_EXCEL, - MimetypeMap.MIMETYPE_PPT }; + MimetypeMap.MIMETYPE_PPT}; public OfficeMetadataExtracter() { @@ -64,6 +64,7 @@ public class OfficeMetadataExtracter extends AbstractMetadataExtracter if (ps instanceof SummaryInformation) { SummaryInformation si = (SummaryInformation) ps; + // Titled aspect trimPut(ContentModel.PROP_TITLE, si.getTitle(), destination); trimPut(ContentModel.PROP_DESCRIPTION, si.getSubject(), destination); @@ -73,16 +74,6 @@ public class OfficeMetadataExtracter extends AbstractMetadataExtracter trimPut(ContentModel.PROP_MODIFIED, si.getLastSaveDateTime(), destination); trimPut(ContentModel.PROP_AUTHOR, si.getAuthor(), destination); } - else if (ps instanceof DocumentSummaryInformation) - { -// DocumentSummaryInformation dsi = (DocumentSummaryInformation) ps; - - // These are not really interesting to any aspect: - // trimPut(ContentModel.PROP_xxx, dsi.getCompany(), - // destination); - // trimPut(ContentModel.PROP_yyy, dsi.getManager(), - // destination); - } } catch (Exception ex) { @@ -90,6 +81,7 @@ public class OfficeMetadataExtracter extends AbstractMetadataExtracter } } }; + InputStream is = null; try { diff --git a/source/java/org/alfresco/repo/content/metadata/OfficeMetadataExtracterTest.java b/source/java/org/alfresco/repo/content/metadata/OfficeMetadataExtracterTest.java index 2630ee4ab1..6249415fdd 100644 --- a/source/java/org/alfresco/repo/content/metadata/OfficeMetadataExtracterTest.java +++ b/source/java/org/alfresco/repo/content/metadata/OfficeMetadataExtracterTest.java @@ -4,7 +4,7 @@ package org.alfresco.repo.content.metadata; /** * @see org.alfresco.repo.content.transform.OfficeMetadataExtracter * - * @author Jesper Steen Møller + * @author Jesper Steen Møller */ public class OfficeMetadataExtracterTest extends AbstractMetadataExtracterTest { diff --git a/source/java/org/alfresco/repo/content/metadata/OpenOfficeMetadataExtracter.java b/source/java/org/alfresco/repo/content/metadata/OpenOfficeMetadataExtracter.java index 754fc952c2..389b5a46bf 100644 --- a/source/java/org/alfresco/repo/content/metadata/OpenOfficeMetadataExtracter.java +++ b/source/java/org/alfresco/repo/content/metadata/OpenOfficeMetadataExtracter.java @@ -42,7 +42,7 @@ import com.sun.star.ucb.XFileIdentifierConverter; import com.sun.star.uno.UnoRuntime; /** - * @author Jesper Steen M�ller + * @author Jesper Steen Møller */ public class OpenOfficeMetadataExtracter extends AbstractMetadataExtracter { diff --git a/source/java/org/alfresco/repo/content/metadata/OpenOfficeMetadataExtracterTest.java b/source/java/org/alfresco/repo/content/metadata/OpenOfficeMetadataExtracterTest.java index 26bf1a28f1..9648c52bba 100644 --- a/source/java/org/alfresco/repo/content/metadata/OpenOfficeMetadataExtracterTest.java +++ b/source/java/org/alfresco/repo/content/metadata/OpenOfficeMetadataExtracterTest.java @@ -21,7 +21,7 @@ import net.sf.jooreports.openoffice.connection.SocketOpenOfficeConnection; /** - * @author Jesper Steen M�ller + * @author Jesper Steen Møller */ public class OpenOfficeMetadataExtracterTest extends AbstractMetadataExtracterTest { diff --git a/source/java/org/alfresco/repo/content/metadata/PdfBoxMetadataExtracter.java b/source/java/org/alfresco/repo/content/metadata/PdfBoxMetadataExtracter.java index e335c6cf83..5f0d796058 100644 --- a/source/java/org/alfresco/repo/content/metadata/PdfBoxMetadataExtracter.java +++ b/source/java/org/alfresco/repo/content/metadata/PdfBoxMetadataExtracter.java @@ -1,5 +1,5 @@ /* - * Copyright (C) 2005 Jesper Steen Møller + * Copyright (C) 2005 Jesper Steen M�ller * * Licensed under the Mozilla Public License version 1.1 * with a permitted attribution clause. You may obtain a @@ -31,7 +31,7 @@ import org.pdfbox.pdmodel.PDDocumentInformation; /** * - * @author Jesper Steen Møller + * @author Jesper Steen Møller */ public class PdfBoxMetadataExtracter extends AbstractMetadataExtracter { diff --git a/source/java/org/alfresco/repo/content/metadata/PdfBoxMetadataExtracterTest.java b/source/java/org/alfresco/repo/content/metadata/PdfBoxMetadataExtracterTest.java index 6b82efa45e..70049a7e92 100644 --- a/source/java/org/alfresco/repo/content/metadata/PdfBoxMetadataExtracterTest.java +++ b/source/java/org/alfresco/repo/content/metadata/PdfBoxMetadataExtracterTest.java @@ -5,7 +5,7 @@ import org.alfresco.repo.content.MimetypeMap; /** * @see org.alfresco.repo.content.metadata.PdfBoxMetadataExtracter * - * @author Jesper Steen Møller + * @author Jesper Steen Møller */ public class PdfBoxMetadataExtracterTest extends AbstractMetadataExtracterTest {