mirror of
https://github.com/Alfresco/alfresco-community-repo.git
synced 2025-07-31 17:39:05 +00:00
Add metadata extractor support for .dwg files (ALF-2262)
The code for extracting .dwg files has been contributed to Apache tika, and the Alfresco metadata extractor deep calls into Tika to have the work done. We retain our own tests of this however. git-svn-id: https://svn.alfresco.com/repos/alfresco-enterprise/alfresco/HEAD/root@19927 c4b6b30b-aa2e-2d43-bbcb-ca4b014f7261
This commit is contained in:
@@ -217,6 +217,7 @@
|
||||
<!-- Unsupported experimental extractor commented out -->
|
||||
<!-- <bean id="extracter.MP3" class="org.alfresco.repo.content.metadata.MP3MetadataExtracter" parent="baseMetadataExtracter" /> -->
|
||||
<bean id="extracter.OpenDocument" class="org.alfresco.repo.content.metadata.OpenDocumentMetadataExtracter" parent="baseMetadataExtracter" />
|
||||
<bean id="extracter.DWG" class="org.alfresco.repo.content.metadata.DWGMetadataExtracter" parent="baseMetadataExtracter" />
|
||||
<bean id="extracter.RFC822" class="org.alfresco.repo.content.metadata.RFC822MetadataExtracter" parent="baseMetadataExtracter" >
|
||||
<property name="supportedDateFormats">
|
||||
<list>
|
||||
|
@@ -356,6 +356,9 @@
|
||||
<extension display="JAR">jar</extension>
|
||||
<extension display="EAR">ear</extension>
|
||||
</mimetype>
|
||||
<mimetype mimetype="application/dwg" display="AutoCAD Drawing">
|
||||
<extension>dwg</extension>
|
||||
</mimetype>
|
||||
<mimetype mimetype="image/x-dwg" display="AutoCAD Drawing">
|
||||
<extension>dwg</extension>
|
||||
</mimetype>
|
||||
|
@@ -18,6 +18,7 @@
|
||||
*/
|
||||
package org.alfresco.repo.content;
|
||||
|
||||
import org.alfresco.repo.content.metadata.DWGMetadataExtracterTest;
|
||||
import org.alfresco.repo.content.metadata.HtmlMetadataExtracterTest;
|
||||
import org.alfresco.repo.content.metadata.MP3MetadataExtracterTest;
|
||||
import org.alfresco.repo.content.metadata.MailMetadataExtracterTest;
|
||||
@@ -80,6 +81,7 @@ public class ContentMinimalContextTestSuite extends TestSuite
|
||||
TestSuite suite = new TestSuite();
|
||||
|
||||
// Metadata tests
|
||||
suite.addTestSuite( DWGMetadataExtracterTest.class );
|
||||
suite.addTestSuite( HtmlMetadataExtracterTest.class );
|
||||
suite.addTestSuite( MailMetadataExtracterTest.class );
|
||||
suite.addTestSuite( MP3MetadataExtracterTest.class );
|
||||
|
@@ -62,6 +62,8 @@ public class MimetypeMap implements MimetypeService
|
||||
public static final String MIMETYPE_EXCEL = "application/vnd.excel";
|
||||
public static final String MIMETYPE_BINARY = "application/octet-stream";
|
||||
public static final String MIMETYPE_PPT = "application/vnd.powerpoint";
|
||||
public static final String MIMETYPE_APP_DWG = "application/dwg";
|
||||
public static final String MIMETYPE_IMG_DWG = "image/x-dwg";
|
||||
public static final String MIMETYPE_FLASH = "application/x-shockwave-flash";
|
||||
public static final String MIMETYPE_IMAGE_GIF = "image/gif";
|
||||
public static final String MIMETYPE_IMAGE_JPEG = "image/jpeg";
|
||||
|
@@ -122,8 +122,6 @@ public abstract class AbstractMetadataExtracterTest extends TestCase
|
||||
|
||||
protected Map<QName, Serializable> extractFromMimetype(String mimetype) throws Exception
|
||||
{
|
||||
Map<QName, Serializable> properties = new HashMap<QName, Serializable>();
|
||||
|
||||
// get the extension for the mimetype
|
||||
String ext = mimetypeMap.getExtension(mimetype);
|
||||
|
||||
@@ -133,7 +131,12 @@ public abstract class AbstractMetadataExtracterTest extends TestCase
|
||||
{
|
||||
throw new FileNotFoundException("No quick." + ext + " file found for test");
|
||||
}
|
||||
return extractFromFile(sourceFile, mimetype);
|
||||
}
|
||||
|
||||
protected Map<QName, Serializable> extractFromFile(File sourceFile, String mimetype) throws Exception
|
||||
{
|
||||
Map<QName, Serializable> properties = new HashMap<QName, Serializable>();
|
||||
// construct a reader onto the source file
|
||||
ContentReader sourceReader = new FileContentReader(sourceFile);
|
||||
sourceReader.setMimetype(mimetype);
|
||||
|
@@ -0,0 +1,110 @@
|
||||
/*
|
||||
* Copyright (C) 2005 Antti Jokipii
|
||||
*
|
||||
* This file is part of Alfresco
|
||||
*
|
||||
* Alfresco is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Lesser General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* Alfresco is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package org.alfresco.repo.content.metadata;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.Serializable;
|
||||
import java.util.Arrays;
|
||||
import java.util.HashSet;
|
||||
import java.util.Map;
|
||||
|
||||
import org.alfresco.repo.content.MimetypeMap;
|
||||
import org.alfresco.service.cmr.repository.ContentReader;
|
||||
import org.apache.tika.metadata.Metadata;
|
||||
import org.apache.tika.parser.ParseContext;
|
||||
import org.apache.tika.parser.dwg.DWGParser;
|
||||
import org.apache.tika.sax.BodyContentHandler;
|
||||
import org.xml.sax.ContentHandler;
|
||||
|
||||
|
||||
/**
|
||||
* Metadata extractor for the
|
||||
* {@link org.alfresco.repo.content.MimetypeMap#MIMETYPE_DWG MIMETYPE_DWG}
|
||||
* mimetype.
|
||||
* <pre>
|
||||
* <b>title:</b> -- cm:title
|
||||
* <b>description:</b> -- cm:description
|
||||
* <b>author:</b> -- cm:author
|
||||
* <b>keywords:</b>
|
||||
* <b>comments:</b>
|
||||
* <b>lastauthor:</b>
|
||||
* </pre>
|
||||
*
|
||||
* TIKA Note - this has been converted to deep-call into Tika.
|
||||
* This will be replaced with proper calls to Tika at a later date.
|
||||
* Everything except some Print info has been ported to Tika.
|
||||
*
|
||||
* @author Nick Burch
|
||||
*/
|
||||
public class DWGMetadataExtracter extends AbstractMappingMetadataExtracter
|
||||
{
|
||||
private static final String KEY_AUTHOR = "author";
|
||||
private static final String KEY_COMMENT = "comment";
|
||||
private static final String KEY_DESCRIPTION = "description";
|
||||
private static final String KEY_KEYWORD = "keyword";
|
||||
private static final String KEY_LAST_AUTHOR = "lastAuthor";
|
||||
private static final String KEY_TITLE = "title";
|
||||
|
||||
public static String[] SUPPORTED_MIMETYPES = new String[] {
|
||||
MimetypeMap.MIMETYPE_APP_DWG,
|
||||
MimetypeMap.MIMETYPE_IMG_DWG,
|
||||
};
|
||||
|
||||
public DWGMetadataExtracter()
|
||||
{
|
||||
super(new HashSet<String>(Arrays.asList(SUPPORTED_MIMETYPES)));
|
||||
}
|
||||
|
||||
@Override
|
||||
public Map<String, Serializable> extractRaw(ContentReader reader) throws Throwable
|
||||
{
|
||||
Map<String, Serializable> rawProperties = newRawMap();
|
||||
|
||||
InputStream is = null;
|
||||
try
|
||||
{
|
||||
is = reader.getContentInputStream();
|
||||
|
||||
DWGParser dwgParser = new DWGParser();
|
||||
ContentHandler handler = new BodyContentHandler() ;
|
||||
Metadata metadata = new Metadata();
|
||||
ParseContext context = new ParseContext();
|
||||
|
||||
dwgParser.parse(is, handler, metadata, context);
|
||||
|
||||
putRawValue(KEY_AUTHOR, metadata.get(Metadata.AUTHOR), rawProperties);
|
||||
putRawValue(KEY_COMMENT, metadata.get(Metadata.COMMENTS), rawProperties);
|
||||
putRawValue(KEY_DESCRIPTION, metadata.get(Metadata.DESCRIPTION), rawProperties);
|
||||
putRawValue(KEY_KEYWORD, metadata.get(Metadata.KEYWORDS), rawProperties);
|
||||
putRawValue(KEY_LAST_AUTHOR, metadata.get(Metadata.LAST_AUTHOR), rawProperties);
|
||||
putRawValue(KEY_DESCRIPTION, metadata.get(Metadata.SUBJECT), rawProperties);
|
||||
putRawValue(KEY_TITLE, metadata.get(Metadata.TITLE), rawProperties);
|
||||
}
|
||||
finally
|
||||
{
|
||||
if (is != null)
|
||||
{
|
||||
try { is.close(); } catch (IOException e) {}
|
||||
}
|
||||
}
|
||||
// Done
|
||||
return rawProperties;
|
||||
}
|
||||
}
|
@@ -0,0 +1,12 @@
|
||||
#
|
||||
# DWGMetadataExtracter - default mapping
|
||||
#
|
||||
# author: Nick Burch
|
||||
|
||||
# Namespaces
|
||||
namespace.prefix.cm=http://www.alfresco.org/model/content/1.0
|
||||
|
||||
# Mappings
|
||||
author=cm:author
|
||||
title=cm:title
|
||||
description=cm:description
|
@@ -0,0 +1,110 @@
|
||||
/*
|
||||
* Copyright (C) 2005-2010 Alfresco Software Limited.
|
||||
*
|
||||
* This file is part of Alfresco
|
||||
*
|
||||
* Alfresco is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Lesser General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* Alfresco is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package org.alfresco.repo.content.metadata;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.Serializable;
|
||||
import java.net.URL;
|
||||
import java.util.Map;
|
||||
|
||||
import org.alfresco.model.ContentModel;
|
||||
import org.alfresco.repo.content.MimetypeMap;
|
||||
import org.alfresco.repo.content.transform.AbstractContentTransformerTest;
|
||||
import org.alfresco.service.cmr.repository.datatype.DefaultTypeConverter;
|
||||
import org.alfresco.service.namespace.QName;
|
||||
|
||||
|
||||
/**
|
||||
* @see DWGMetadataExtracter
|
||||
*
|
||||
* @author Nick Burch
|
||||
*/
|
||||
public class DWGMetadataExtracterTest extends AbstractMetadataExtracterTest
|
||||
{
|
||||
private DWGMetadataExtracter extracter;
|
||||
|
||||
@Override
|
||||
public void setUp() throws Exception
|
||||
{
|
||||
super.setUp();
|
||||
extracter = new DWGMetadataExtracter();
|
||||
extracter.setDictionaryService(dictionaryService);
|
||||
extracter.register();
|
||||
}
|
||||
|
||||
/**
|
||||
* @return Returns the same transformer regardless - it is allowed
|
||||
*/
|
||||
protected MetadataExtracter getExtracter()
|
||||
{
|
||||
return extracter;
|
||||
}
|
||||
|
||||
public void testSupports() throws Exception
|
||||
{
|
||||
for (String mimetype : DWGMetadataExtracter.SUPPORTED_MIMETYPES)
|
||||
{
|
||||
boolean supports = extracter.isSupported(mimetype);
|
||||
assertTrue("Mimetype should be supported: " + mimetype, supports);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test all the supported files.
|
||||
* Note - doesn't use extractFromMimetype
|
||||
*/
|
||||
public void testSupportedMimetypes() throws Exception
|
||||
{
|
||||
String mimetype = MimetypeMap.MIMETYPE_APP_DWG;
|
||||
|
||||
for (String version : new String[] {"2004","2007","2010"})
|
||||
{
|
||||
String filename = "quick" + version + ".dwg";
|
||||
URL url = AbstractContentTransformerTest.class.getClassLoader().getResource("quick/" + filename);
|
||||
File file = new File(url.getFile());
|
||||
|
||||
Map<QName, Serializable> properties = extractFromFile(file, mimetype);
|
||||
|
||||
// check we got something
|
||||
assertFalse("extractFromMimetype should return at least some properties, none found for " + mimetype,
|
||||
properties.isEmpty());
|
||||
|
||||
// check common metadata
|
||||
testCommonMetadata(mimetype, properties);
|
||||
// check file-type specific metadata
|
||||
testFileSpecificMetadata(mimetype, properties);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
protected boolean skipAuthorCheck(String mimetype) { return true; }
|
||||
|
||||
/**
|
||||
* We also provide the creation date - check that
|
||||
*/
|
||||
protected void testFileSpecificMetadata(String mimetype,
|
||||
Map<QName, Serializable> properties) {
|
||||
// Check for extra fields
|
||||
assertEquals(
|
||||
"Property " + ContentModel.PROP_AUTHOR + " not found for mimetype " + mimetype,
|
||||
"Nevin Nollop",
|
||||
DefaultTypeConverter.INSTANCE.convert(String.class, properties.get(ContentModel.PROP_AUTHOR)));
|
||||
}
|
||||
|
||||
}
|
BIN
source/test-resources/quick/quick2004.dwg
Normal file
BIN
source/test-resources/quick/quick2004.dwg
Normal file
Binary file not shown.
BIN
source/test-resources/quick/quick2007.dwg
Normal file
BIN
source/test-resources/quick/quick2007.dwg
Normal file
Binary file not shown.
BIN
source/test-resources/quick/quick2010.dwg
Normal file
BIN
source/test-resources/quick/quick2010.dwg
Normal file
Binary file not shown.
Reference in New Issue
Block a user