Merged V3.2 to HEAD

16971: Merged V3.1 to V3.2
        14282: (RECORD ONLY) Updated version to 3.1.1
        14565: (RECORD ONLY) Updated version to include revision number (x.y.z)
        14848: (RECORD ONLY) Updated version number
        15029: (RECORD ONLY) ETHREEOH-441 and ETHREEOH-1862 Exceptions in server when certain PDFs are uploaded.
               - Probably done already without merge info
               - Renamed source zip to help tracking
        15985: ETHREEOH-2292: Deployment failure in case of IPv6 on Win 2k8
        16164: Fixed ETHREEOH-2690: JGroups TCP doesn't bind to specified address
        16240: (RECORD ONLY) Fix typos in installer
        16726: Fix ETHREEOH-2677 - user usages (when taking ownership)
        16745: Fix ETHREEOH-2991 - Deployment exception, unable to deploy - when deploying a manual snapshot with a stale file
        16771: (RECORD ONLY) Fix to ETHREEOH-441
               - Probably merged without merge info
        16822: Merged DEV/BELARUS/V3.1 to V3.1
           16753: ETHREEOH-1951: when versionable aspect is active, using the Microsoft Word option ...
        16825: Fixed ETHREEOH-803: Incorrect mimetype is displayed for .pps and .pot files
        16862: Fix for ETHREEOH-801 Fail to extract some kind of PDF file metadata
               - Resolved merge by prefering merged-in fix
        16880: Merged V2.2 to V3.1
           13966: (record only) Updated to use ALF-BINARIES version of installjammer
           14340: (record only) Fix so deployment installers build
           14719: (record only) Tweaks to AMP
           15153: (record only) Fix ETWOTWO-1264 - PHP integration
           15287: Fixed ETWOTWO-989: MS Sql server upgrade from 2.1.6 failed
           15351: ETWOTWO-1345 (script not matching patch ID)
        16928: Fixed shutdown: Task threads are now daemon threads
        
    16986: Merged V3.1 to V3.2
        16932: Moved Lucene ResultSet prefetch code to use NodeBulkLoader (backed by common code in Node DAO)
        16945: (RECORD ONLY) Merged V3.2 to V3.1
           16931: Fixed build unit test path for recent Chiba lib change
        16957: Removed Hibernate event listener after 3.2.3 CGLib fixes
        16959: Applied fix for ETHREEOH-2121: ContentUtils.getContentAsString does not pass JSESSIONID into the request
        16961: (RECORD ONLY) Updated db settings in configs
        16964: (RECORD ONLY) Merged V3.2 to V3.1
           16308: ETHREEOH-2833: The Content rule with 'Items with specific text value in property' condition can't be created.
        16968: Fixed ETHREEOH-2120: Recently Modified Documents Dashlet failed to load after a big upload
        16983: Build fix: avoid queries for parent assocs if no nodes were found during child node in caching
    
    16991: Merge V3.1 to V3.2
         15136: (RECORD ONLY)  : changes have already been merged.
             MERGE 2.2 to 3.1
                 14985 - ETWOTWO-1174 - Preview of protected PDFs results in an error
                 14305 - ETWOTWO-951 - contribution
                 14601 - ETWOTWO-1236 - Make FSR deployment case sensitive.
        
    17022: MT - fix fallout from r16924 - add inbound collection support to MT node service interceptor (caught by MultiTDemoTest.testCreateGroups)
    
    17023: Fixed parentAssocCache bug when adding assocs against an empty cache


git-svn-id: https://svn.alfresco.com/repos/alfresco-enterprise/alfresco/HEAD/root@17025 c4b6b30b-aa2e-2d43-bbcb-ca4b014f7261
This commit is contained in:
Jan Vonka
2009-10-19 14:07:57 +00:00
parent 733d27742b
commit de87ed5ffa
21 changed files with 519 additions and 339 deletions

View File

@@ -27,13 +27,17 @@ package org.alfresco.repo.content.metadata;
import java.io.IOException;
import java.io.InputStream;
import java.io.Serializable;
import java.text.SimpleDateFormat;
import java.util.Arrays;
import java.util.Calendar;
import java.util.Date;
import java.util.HashSet;
import java.util.Map;
import org.alfresco.repo.content.MimetypeMap;
import org.alfresco.service.cmr.repository.ContentReader;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDDocumentInformation;
@@ -51,6 +55,8 @@ import org.apache.pdfbox.pdmodel.PDDocumentInformation;
*/
public class PdfBoxMetadataExtracter extends AbstractMappingMetadataExtracter
{
protected static Log pdfLogger = LogFactory.getLog(PdfBoxMetadataExtracter.class);
private static final String KEY_AUTHOR = "author";
private static final String KEY_TITLE = "title";
private static final String KEY_SUBJECT = "subject";
@@ -92,10 +98,32 @@ public class PdfBoxMetadataExtracter extends AbstractMappingMetadataExtracter
putRawValue(KEY_CREATED, created.getTime(), rawProperties);
}
}
catch (IOException e)
catch (IOException iox)
{
// This sometimes fails because the date is a string: ETHREEOH-1936
}
// Alfresco bug ETHREEOH-801 refers to a bug in PDFBox (http://issues.apache.org/jira/browse/PDFBOX-145)
// where the above call to docInfo.getCreationDate() throws an IOException for some PDFs.
//
// The code below is a workaround for that issue.
// This creationDate has format: D:20080429+01'00'
String creationDate = docInfo.getCustomMetadataValue("CreationDate");
if (pdfLogger.isWarnEnabled())
{
pdfLogger.warn("IOException caught when extracting metadata from pdf file.");
pdfLogger.warn("This may be caused by a PDFBox bug that can often be worked around. The stack trace below is provided for information purposes only.");
pdfLogger.warn("", iox);
}
final SimpleDateFormat sdf = new SimpleDateFormat("yyyyMMdd");
if (creationDate != null && creationDate.length() > 10) // 10 allows for "D:yyyyMMdd"
{
String dateWithoutLeadingDColon = creationDate.substring(2);
Date parsedDate = sdf.parse(dateWithoutLeadingDColon);
putRawValue(KEY_CREATED, parsedDate, rawProperties);
}
}
}
}
finally