Update metadata extractors - Outlook, MP3, Mail and PDF improvements, and increase test coverage

git-svn-id: https://svn.alfresco.com/repos/alfresco-enterprise/alfresco/HEAD/root@18454 c4b6b30b-aa2e-2d43-bbcb-ca4b014f7261
This commit is contained in:
Nick Burch
2010-02-04 14:42:45 +00:00
parent f2554d0f63
commit bd1e3edf76
19 changed files with 707 additions and 225 deletions

View File

@@ -50,6 +50,9 @@ import org.apache.pdfbox.pdmodel.PDDocumentInformation;
* <b>created:</b> -- cm:created
* </pre>
*
* TIKA Note - all the fields (plus a few others) are present
* in the tika metadata.
*
* @author Jesper Steen Møller
* @author Derek Hulley
*/
@@ -95,6 +98,10 @@ public class PdfBoxMetadataExtracter extends AbstractMappingMetadataExtracter
Calendar created = docInfo.getCreationDate();
if (created != null)
{
// Work around https://issues.apache.org/jira/browse/PDFBOX-598
created.set(Calendar.MILLISECOND, 0);
// Save
putRawValue(KEY_CREATED, created.getTime(), rawProperties);
}
}