mirror of
https://github.com/Alfresco/alfresco-community-repo.git
synced 2025-09-17 14:21:39 +00:00
. Added support for additional encodings to Outlook email extractor
- fixes issue with some emails not being parsed . Added email subject to list of extracted values - "cm:subjectline" git-svn-id: https://svn.alfresco.com/repos/alfresco-enterprise/alfresco/HEAD/root@3418 c4b6b30b-aa2e-2d43-bbcb-ca4b014f7261
This commit is contained in:
@@ -660,6 +660,10 @@
|
|||||||
<type>d:text</type>
|
<type>d:text</type>
|
||||||
<multiple>true</multiple>
|
<multiple>true</multiple>
|
||||||
</property>
|
</property>
|
||||||
|
<property name="cm:subjectline">
|
||||||
|
<title>Subject</title>
|
||||||
|
<type>d:text</type>
|
||||||
|
</property>
|
||||||
<property name="cm:sentdate">
|
<property name="cm:sentdate">
|
||||||
<title>Sent Date</title>
|
<title>Sent Date</title>
|
||||||
<type>d:datetime</type>
|
<type>d:datetime</type>
|
||||||
|
@@ -45,20 +45,19 @@ public class MailMetadataExtracter extends AbstractMetadataExtracter
|
|||||||
public static String[] SUPPORTED_MIMETYPES = new String[] {
|
public static String[] SUPPORTED_MIMETYPES = new String[] {
|
||||||
"message/rfc822"};
|
"message/rfc822"};
|
||||||
|
|
||||||
private static final String SUBSTG_MESSAGEBODY = "__substg1.0_1000001E";
|
private static final String STREAM_PREFIX = "__substg1.0_";
|
||||||
private static final String SUBSTG_RECIPIENTEMAIL = "__substg1.0_39FE001E";
|
private static final int STREAM_PREFIX_LENGTH = STREAM_PREFIX.length();
|
||||||
private static final String SUBSTG_RECEIVEDEMAIL = "__substg1.0_0076001E";
|
|
||||||
private static final String SUBSTG_SENDEREMAIL = "__substg1.0_0C1F001E";
|
|
||||||
private static final String SUBSTG_DATE = "__substg1.0_00470102";
|
|
||||||
|
|
||||||
private static final QName ASPECT_MAILED = QName.createQName(NamespaceService.CONTENT_MODEL_1_0_URI, "emailed");
|
private static final QName ASPECT_MAILED = QName.createQName(NamespaceService.CONTENT_MODEL_1_0_URI, "emailed");
|
||||||
private static final QName PROP_SENTDATE = QName.createQName(NamespaceService.CONTENT_MODEL_1_0_URI, "sentdate");
|
private static final QName PROP_SENTDATE = QName.createQName(NamespaceService.CONTENT_MODEL_1_0_URI, "sentdate");
|
||||||
private static final QName PROP_ORIGINATOR = QName.createQName(NamespaceService.CONTENT_MODEL_1_0_URI, "originator");
|
private static final QName PROP_ORIGINATOR = QName.createQName(NamespaceService.CONTENT_MODEL_1_0_URI, "originator");
|
||||||
private static final QName PROP_ADDRESSEE = QName.createQName(NamespaceService.CONTENT_MODEL_1_0_URI, "addressee");
|
private static final QName PROP_ADDRESSEE = QName.createQName(NamespaceService.CONTENT_MODEL_1_0_URI, "addressee");
|
||||||
private static final QName PROP_ADDRESSEES = QName.createQName(NamespaceService.CONTENT_MODEL_1_0_URI, "addressees");
|
private static final QName PROP_ADDRESSEES = QName.createQName(NamespaceService.CONTENT_MODEL_1_0_URI, "addressees");
|
||||||
|
private static final QName PROP_SUBJECT = QName.createQName(NamespaceService.CONTENT_MODEL_1_0_URI, "subjectline");
|
||||||
|
|
||||||
// the CC: email addresses
|
// the CC: email addresses
|
||||||
private ThreadLocal<List<String>> receipientEmails = new ThreadLocal<List<String>>();
|
private ThreadLocal<List<String>> receipientEmails = new ThreadLocal<List<String>>();
|
||||||
|
//private StringBuilder debug = null;
|
||||||
|
|
||||||
public MailMetadataExtracter()
|
public MailMetadataExtracter()
|
||||||
{
|
{
|
||||||
@@ -73,48 +72,12 @@ public class MailMetadataExtracter extends AbstractMetadataExtracter
|
|||||||
{
|
{
|
||||||
try
|
try
|
||||||
{
|
{
|
||||||
String name = event.getName();
|
//String name = event.getName();
|
||||||
|
//String path = event.getPath().toString();
|
||||||
|
//debug.append(path).append(" - ").append(name).append(" (").append(event.getStream().available()).append(")\r\n");
|
||||||
|
|
||||||
if (name.equals(SUBSTG_RECIPIENTEMAIL)) // a recipient email address
|
StreamHandler handler = new StreamHandler(event.getName(), event.getStream());
|
||||||
{
|
handler.process(destination);
|
||||||
String emailAddress = readPlainTextStream(event.getStream());
|
|
||||||
receipientEmails.get().add(convertExchangeAddress(emailAddress));
|
|
||||||
}
|
|
||||||
else if (name.equals(SUBSTG_RECEIVEDEMAIL)) // receiver email address
|
|
||||||
{
|
|
||||||
String emailAddress = readPlainTextStream(event.getStream());
|
|
||||||
destination.put(PROP_ADDRESSEE, convertExchangeAddress(emailAddress));
|
|
||||||
}
|
|
||||||
else if (name.equals(SUBSTG_SENDEREMAIL)) // sender email - NOTE either email OR full Exchange data e.g. : /O=HOSTEDSERVICE2/OU=FIRST ADMINISTRATIVE GROUP/CN=RECIPIENTS/CN=MIKE.FARMAN@BEN
|
|
||||||
{
|
|
||||||
String emailAddress = readPlainTextStream(event.getStream());
|
|
||||||
destination.put(PROP_ORIGINATOR, convertExchangeAddress(emailAddress));
|
|
||||||
}
|
|
||||||
else if (name.equals(SUBSTG_DATE))
|
|
||||||
{
|
|
||||||
// the date is not really plain text - but it's easier to parse as such
|
|
||||||
String date = readPlainTextStream(event.getStream());
|
|
||||||
int valueIndex = date.indexOf("l=");
|
|
||||||
if (valueIndex != -1)
|
|
||||||
{
|
|
||||||
int dateIndex = date.indexOf('-', valueIndex);
|
|
||||||
if (dateIndex != -1)
|
|
||||||
{
|
|
||||||
dateIndex++;
|
|
||||||
String strYear = date.substring(dateIndex, dateIndex + 2);
|
|
||||||
int year = Integer.parseInt(strYear) + (2000 - 1900);
|
|
||||||
String strMonth = date.substring(dateIndex + 2, dateIndex + 4);
|
|
||||||
int month = Integer.parseInt(strMonth) - 1;
|
|
||||||
String strDay = date.substring(dateIndex + 4, dateIndex + 6);
|
|
||||||
int day = Integer.parseInt(strDay);
|
|
||||||
String strHour = date.substring(dateIndex + 6, dateIndex + 8);
|
|
||||||
int hour = Integer.parseInt(strHour);
|
|
||||||
String strMinute = date.substring(dateIndex + 10, dateIndex + 12);
|
|
||||||
int minute = Integer.parseInt(strMinute);
|
|
||||||
destination.put(PROP_SENTDATE, new Date(year, month, day, hour, minute));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
catch (Exception ex)
|
catch (Exception ex)
|
||||||
{
|
{
|
||||||
@@ -126,6 +89,7 @@ public class MailMetadataExtracter extends AbstractMetadataExtracter
|
|||||||
InputStream is = null;
|
InputStream is = null;
|
||||||
try
|
try
|
||||||
{
|
{
|
||||||
|
//debug = new StringBuilder(1024);
|
||||||
this.receipientEmails.set(new ArrayList<String>());
|
this.receipientEmails.set(new ArrayList<String>());
|
||||||
|
|
||||||
is = reader.getContentInputStream();
|
is = reader.getContentInputStream();
|
||||||
@@ -147,6 +111,8 @@ public class MailMetadataExtracter extends AbstractMetadataExtracter
|
|||||||
{
|
{
|
||||||
destination.put(PROP_ADDRESSEES, (Serializable)receipientEmails.get());
|
destination.put(PROP_ADDRESSEES, (Serializable)receipientEmails.get());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
//logger.warn(debug);
|
||||||
}
|
}
|
||||||
finally
|
finally
|
||||||
{
|
{
|
||||||
@@ -157,14 +123,6 @@ public class MailMetadataExtracter extends AbstractMetadataExtracter
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private static String readPlainTextStream(DocumentInputStream stream)
|
|
||||||
throws IOException
|
|
||||||
{
|
|
||||||
byte[] data = new byte[stream.available()];
|
|
||||||
int read = stream.read(data);
|
|
||||||
return new String(data);
|
|
||||||
}
|
|
||||||
|
|
||||||
private static String convertExchangeAddress(String email)
|
private static String convertExchangeAddress(String email)
|
||||||
{
|
{
|
||||||
if (email.lastIndexOf("/CN=") == -1)
|
if (email.lastIndexOf("/CN=") == -1)
|
||||||
@@ -177,4 +135,94 @@ public class MailMetadataExtracter extends AbstractMetadataExtracter
|
|||||||
return email.substring(email.lastIndexOf("/CN=") + 4);
|
return email.substring(email.lastIndexOf("/CN=") + 4);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private static final String ENCODING_TEXT = "001E";
|
||||||
|
private static final String ENCODING_BINARY = "0102";
|
||||||
|
private static final String ENCODING_UNICODE = "001F";
|
||||||
|
private static final String SUBSTG_MESSAGEBODY = "1000";
|
||||||
|
private static final String SUBSTG_RECIPIENTEMAIL = "39FE";
|
||||||
|
private static final String SUBSTG_RECEIVEDEMAIL = "0076";
|
||||||
|
private static final String SUBSTG_SENDEREMAIL = "0C1F";
|
||||||
|
private static final String SUBSTG_DATE = "0047";
|
||||||
|
private static final String SUBSTG_SUBJECT = "0037";
|
||||||
|
|
||||||
|
private class StreamHandler
|
||||||
|
{
|
||||||
|
StreamHandler(String name, DocumentInputStream stream)
|
||||||
|
{
|
||||||
|
this.type = name.substring(STREAM_PREFIX_LENGTH, STREAM_PREFIX_LENGTH + 4);
|
||||||
|
this.encoding = name.substring(STREAM_PREFIX_LENGTH + 4, STREAM_PREFIX_LENGTH + 8);
|
||||||
|
this.stream = stream;
|
||||||
|
}
|
||||||
|
|
||||||
|
void process(final Map<QName, Serializable> destination)
|
||||||
|
throws IOException
|
||||||
|
{
|
||||||
|
if (type.equals(SUBSTG_SENDEREMAIL))
|
||||||
|
{
|
||||||
|
destination.put(PROP_ORIGINATOR, convertExchangeAddress(extractText()));
|
||||||
|
}
|
||||||
|
else if (type.equals(SUBSTG_RECIPIENTEMAIL))
|
||||||
|
{
|
||||||
|
receipientEmails.get().add(convertExchangeAddress(extractText()));
|
||||||
|
}
|
||||||
|
else if (type.equals(SUBSTG_RECEIVEDEMAIL))
|
||||||
|
{
|
||||||
|
destination.put(PROP_ADDRESSEE, convertExchangeAddress(extractText()));
|
||||||
|
}
|
||||||
|
else if (type.equals(SUBSTG_SUBJECT))
|
||||||
|
{
|
||||||
|
destination.put(PROP_SUBJECT, extractText());
|
||||||
|
}
|
||||||
|
else if (type.equals(SUBSTG_DATE))
|
||||||
|
{
|
||||||
|
// the date is not really plain text - but it's easier to parse as such
|
||||||
|
String date = extractText();
|
||||||
|
int valueIndex = date.indexOf("l=");
|
||||||
|
if (valueIndex != -1)
|
||||||
|
{
|
||||||
|
int dateIndex = date.indexOf('-', valueIndex);
|
||||||
|
if (dateIndex != -1)
|
||||||
|
{
|
||||||
|
dateIndex++;
|
||||||
|
String strYear = date.substring(dateIndex, dateIndex + 2);
|
||||||
|
int year = Integer.parseInt(strYear) + (2000 - 1900);
|
||||||
|
String strMonth = date.substring(dateIndex + 2, dateIndex + 4);
|
||||||
|
int month = Integer.parseInt(strMonth) - 1;
|
||||||
|
String strDay = date.substring(dateIndex + 4, dateIndex + 6);
|
||||||
|
int day = Integer.parseInt(strDay);
|
||||||
|
String strHour = date.substring(dateIndex + 6, dateIndex + 8);
|
||||||
|
int hour = Integer.parseInt(strHour);
|
||||||
|
String strMinute = date.substring(dateIndex + 10, dateIndex + 12);
|
||||||
|
int minute = Integer.parseInt(strMinute);
|
||||||
|
destination.put(PROP_SENTDATE, new Date(year, month, day, hour, minute));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private String extractText()
|
||||||
|
throws IOException
|
||||||
|
{
|
||||||
|
byte[] data = new byte[stream.available()];
|
||||||
|
stream.read(data);
|
||||||
|
if (this.encoding.equals(ENCODING_TEXT) || this.encoding.equals(ENCODING_BINARY))
|
||||||
|
{
|
||||||
|
return new String(data);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
byte[] b = new byte[data.length >> 1];
|
||||||
|
for (int i=0; i<b.length; i++)
|
||||||
|
{
|
||||||
|
b[i] = data[i << 1];
|
||||||
|
}
|
||||||
|
return new String(b);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private String type;
|
||||||
|
private String encoding;
|
||||||
|
private DocumentInputStream stream;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
Reference in New Issue
Block a user