mirror of
https://github.com/Alfresco/alfresco-community-repo.git
synced 2025-07-31 17:39:05 +00:00
Merged V3.2 to HEAD
16971: Merged V3.1 to V3.2 14282: (RECORD ONLY) Updated version to 3.1.1 14565: (RECORD ONLY) Updated version to include revision number (x.y.z) 14848: (RECORD ONLY) Updated version number 15029: (RECORD ONLY) ETHREEOH-441 and ETHREEOH-1862 Exceptions in server when certain PDFs are uploaded. - Probably done already without merge info - Renamed source zip to help tracking 15985: ETHREEOH-2292: Deployment failure in case of IPv6 on Win 2k8 16164: Fixed ETHREEOH-2690: JGroups TCP doesn't bind to specified address 16240: (RECORD ONLY) Fix typos in installer 16726: Fix ETHREEOH-2677 - user usages (when taking ownership) 16745: Fix ETHREEOH-2991 - Deployment exception, unable to deploy - when deploying a manual snapshot with a stale file 16771: (RECORD ONLY) Fix to ETHREEOH-441 - Probably merged without merge info 16822: Merged DEV/BELARUS/V3.1 to V3.1 16753: ETHREEOH-1951: when versionable aspect is active, using the Microsoft Word option ... 16825: Fixed ETHREEOH-803: Incorrect mimetype is displayed for .pps and .pot files 16862: Fix for ETHREEOH-801 Fail to extract some kind of PDF file metadata - Resolved merge by prefering merged-in fix 16880: Merged V2.2 to V3.1 13966: (record only) Updated to use ALF-BINARIES version of installjammer 14340: (record only) Fix so deployment installers build 14719: (record only) Tweaks to AMP 15153: (record only) Fix ETWOTWO-1264 - PHP integration 15287: Fixed ETWOTWO-989: MS Sql server upgrade from 2.1.6 failed 15351: ETWOTWO-1345 (script not matching patch ID) 16928: Fixed shutdown: Task threads are now daemon threads 16986: Merged V3.1 to V3.2 16932: Moved Lucene ResultSet prefetch code to use NodeBulkLoader (backed by common code in Node DAO) 16945: (RECORD ONLY) Merged V3.2 to V3.1 16931: Fixed build unit test path for recent Chiba lib change 16957: Removed Hibernate event listener after 3.2.3 CGLib fixes 16959: Applied fix for ETHREEOH-2121: ContentUtils.getContentAsString does not pass JSESSIONID into the request 16961: (RECORD ONLY) Updated db settings in configs 16964: (RECORD ONLY) Merged V3.2 to V3.1 16308: ETHREEOH-2833: The Content rule with 'Items with specific text value in property' condition can't be created. 16968: Fixed ETHREEOH-2120: Recently Modified Documents Dashlet failed to load after a big upload 16983: Build fix: avoid queries for parent assocs if no nodes were found during child node in caching 16991: Merge V3.1 to V3.2 15136: (RECORD ONLY) : changes have already been merged. MERGE 2.2 to 3.1 14985 - ETWOTWO-1174 - Preview of protected PDFs results in an error 14305 - ETWOTWO-951 - contribution 14601 - ETWOTWO-1236 - Make FSR deployment case sensitive. 17022: MT - fix fallout from r16924 - add inbound collection support to MT node service interceptor (caught by MultiTDemoTest.testCreateGroups) 17023: Fixed parentAssocCache bug when adding assocs against an empty cache git-svn-id: https://svn.alfresco.com/repos/alfresco-enterprise/alfresco/HEAD/root@17025 c4b6b30b-aa2e-2d43-bbcb-ca4b014f7261
This commit is contained in:
@@ -27,13 +27,17 @@ package org.alfresco.repo.content.metadata;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.Serializable;
|
||||
import java.text.SimpleDateFormat;
|
||||
import java.util.Arrays;
|
||||
import java.util.Calendar;
|
||||
import java.util.Date;
|
||||
import java.util.HashSet;
|
||||
import java.util.Map;
|
||||
|
||||
import org.alfresco.repo.content.MimetypeMap;
|
||||
import org.alfresco.service.cmr.repository.ContentReader;
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
import org.apache.pdfbox.pdmodel.PDDocument;
|
||||
import org.apache.pdfbox.pdmodel.PDDocumentInformation;
|
||||
|
||||
@@ -51,6 +55,8 @@ import org.apache.pdfbox.pdmodel.PDDocumentInformation;
|
||||
*/
|
||||
public class PdfBoxMetadataExtracter extends AbstractMappingMetadataExtracter
|
||||
{
|
||||
protected static Log pdfLogger = LogFactory.getLog(PdfBoxMetadataExtracter.class);
|
||||
|
||||
private static final String KEY_AUTHOR = "author";
|
||||
private static final String KEY_TITLE = "title";
|
||||
private static final String KEY_SUBJECT = "subject";
|
||||
@@ -92,10 +98,32 @@ public class PdfBoxMetadataExtracter extends AbstractMappingMetadataExtracter
|
||||
putRawValue(KEY_CREATED, created.getTime(), rawProperties);
|
||||
}
|
||||
}
|
||||
catch (IOException e)
|
||||
catch (IOException iox)
|
||||
{
|
||||
// This sometimes fails because the date is a string: ETHREEOH-1936
|
||||
}
|
||||
// Alfresco bug ETHREEOH-801 refers to a bug in PDFBox (http://issues.apache.org/jira/browse/PDFBOX-145)
|
||||
// where the above call to docInfo.getCreationDate() throws an IOException for some PDFs.
|
||||
//
|
||||
// The code below is a workaround for that issue.
|
||||
|
||||
// This creationDate has format: D:20080429+01'00'
|
||||
String creationDate = docInfo.getCustomMetadataValue("CreationDate");
|
||||
|
||||
if (pdfLogger.isWarnEnabled())
|
||||
{
|
||||
pdfLogger.warn("IOException caught when extracting metadata from pdf file.");
|
||||
pdfLogger.warn("This may be caused by a PDFBox bug that can often be worked around. The stack trace below is provided for information purposes only.");
|
||||
pdfLogger.warn("", iox);
|
||||
}
|
||||
|
||||
final SimpleDateFormat sdf = new SimpleDateFormat("yyyyMMdd");
|
||||
if (creationDate != null && creationDate.length() > 10) // 10 allows for "D:yyyyMMdd"
|
||||
{
|
||||
String dateWithoutLeadingDColon = creationDate.substring(2);
|
||||
Date parsedDate = sdf.parse(dateWithoutLeadingDColon);
|
||||
putRawValue(KEY_CREATED, parsedDate, rawProperties);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
finally
|
||||
|
Reference in New Issue
Block a user