mirror of
https://github.com/Alfresco/alfresco-community-repo.git
synced 2025-06-02 17:35:18 +00:00
- expects .msg files in native Outlook format - uses POI library for the parsing of the horrid OLE2 compound document format - extracts addressee(s), sent date and originator email address ...for the future - could be modified and used as a transformer to allow full-text indexing of Outlook format emails . Add new aspect "emailed" to the contentmodel to support properties for above extractor git-svn-id: https://svn.alfresco.com/repos/alfresco-enterprise/alfresco/HEAD/root@3387 c4b6b30b-aa2e-2d43-bbcb-ca4b014f7261
44 lines
1.1 KiB
Java
44 lines
1.1 KiB
Java
package org.alfresco.repo.content.metadata;
|
|
|
|
import org.alfresco.repo.content.MimetypeMap;
|
|
|
|
/**
|
|
* @see org.alfresco.repo.content.metadata.PdfBoxMetadataExtracter
|
|
*
|
|
* @author Jesper Steen Møller
|
|
*/
|
|
public class PdfBoxMetadataExtracterTest extends AbstractMetadataExtracterTest
|
|
{
|
|
private MetadataExtracter extracter;
|
|
|
|
@Override
|
|
public void setUp() throws Exception
|
|
{
|
|
super.setUp();
|
|
extracter = new PdfBoxMetadataExtracter();
|
|
}
|
|
|
|
/**
|
|
* @return Returns the same transformer regardless - it is allowed
|
|
*/
|
|
protected MetadataExtracter getExtracter()
|
|
{
|
|
return extracter;
|
|
}
|
|
|
|
public void testReliability() throws Exception
|
|
{
|
|
double reliability = 0.0;
|
|
reliability = extracter.getReliability(MimetypeMap.MIMETYPE_TEXT_PLAIN);
|
|
assertEquals("Mimetype should not be supported", 0.0, reliability);
|
|
|
|
reliability = extracter.getReliability(MimetypeMap.MIMETYPE_PDF);
|
|
assertEquals("Mimetype should be supported", 1.0, reliability);
|
|
}
|
|
|
|
public void testPdfExtraction() throws Exception
|
|
{
|
|
testExtractFromMimetype(MimetypeMap.MIMETYPE_PDF);
|
|
}
|
|
}
|