mirror of
https://github.com/Alfresco/alfresco-community-repo.git
synced 2025-07-31 17:39:05 +00:00
Update the MP3 extractor to output audio keys (related to ALF-6170), and refactor the audio extractors to share more common code. Also expands the audio extractor tests to share common code, and test more metadata. (Needed for devcon demo)
git-svn-id: https://svn.alfresco.com/repos/alfresco-enterprise/alfresco/HEAD/root@31013 c4b6b30b-aa2e-2d43-bbcb-ca4b014f7261
This commit is contained in:
@@ -224,7 +224,6 @@
|
|||||||
<bean id="extracter.Office" class="org.alfresco.repo.content.metadata.OfficeMetadataExtracter" parent="baseMetadataExtracter" />
|
<bean id="extracter.Office" class="org.alfresco.repo.content.metadata.OfficeMetadataExtracter" parent="baseMetadataExtracter" />
|
||||||
<bean id="extracter.Mail" class="org.alfresco.repo.content.metadata.MailMetadataExtracter" parent="baseMetadataExtracter" />
|
<bean id="extracter.Mail" class="org.alfresco.repo.content.metadata.MailMetadataExtracter" parent="baseMetadataExtracter" />
|
||||||
<bean id="extracter.Html" class="org.alfresco.repo.content.metadata.HtmlMetadataExtracter" parent="baseMetadataExtracter" />
|
<bean id="extracter.Html" class="org.alfresco.repo.content.metadata.HtmlMetadataExtracter" parent="baseMetadataExtracter" />
|
||||||
<bean id="extracter.MP3" class="org.alfresco.repo.content.metadata.MP3MetadataExtracter" parent="baseMetadataExtracter" />
|
|
||||||
<bean id="extracter.OpenDocument" class="org.alfresco.repo.content.metadata.OpenDocumentMetadataExtracter" parent="baseMetadataExtracter" />
|
<bean id="extracter.OpenDocument" class="org.alfresco.repo.content.metadata.OpenDocumentMetadataExtracter" parent="baseMetadataExtracter" />
|
||||||
<bean id="extracter.DWG" class="org.alfresco.repo.content.metadata.DWGMetadataExtracter" parent="baseMetadataExtracter" />
|
<bean id="extracter.DWG" class="org.alfresco.repo.content.metadata.DWGMetadataExtracter" parent="baseMetadataExtracter" />
|
||||||
<bean id="extracter.RFC822" class="org.alfresco.repo.content.metadata.RFC822MetadataExtracter" parent="baseMetadataExtracter" >
|
<bean id="extracter.RFC822" class="org.alfresco.repo.content.metadata.RFC822MetadataExtracter" parent="baseMetadataExtracter" >
|
||||||
@@ -235,6 +234,12 @@
|
|||||||
</list>
|
</list>
|
||||||
</property>
|
</property>
|
||||||
</bean>
|
</bean>
|
||||||
|
<bean id="extracter.MP3" class="org.alfresco.repo.content.metadata.MP3MetadataExtracter" parent="baseMetadataExtracter">
|
||||||
|
<property name="tikaConfig" ref="tikaConfig"/>
|
||||||
|
</bean>
|
||||||
|
<bean id="extracter.Audio" class="org.alfresco.repo.content.metadata.TikaAudioMetadataExtracter" parent="baseMetadataExtracter">
|
||||||
|
<property name="tikaConfig" ref="tikaConfig"/>
|
||||||
|
</bean>
|
||||||
<bean id="extracter.OpenOffice" class="org.alfresco.repo.content.metadata.OpenOfficeMetadataExtracter" parent="baseMetadataExtracter">
|
<bean id="extracter.OpenOffice" class="org.alfresco.repo.content.metadata.OpenOfficeMetadataExtracter" parent="baseMetadataExtracter">
|
||||||
<property name="worker">
|
<property name="worker">
|
||||||
<ref bean="extracter.worker.OpenOffice" />
|
<ref bean="extracter.worker.OpenOffice" />
|
||||||
|
@@ -662,7 +662,16 @@ abstract public class AbstractMappingMetadataExtracter implements MetadataExtrac
|
|||||||
// Ask Tika to detect the document, and report back on if
|
// Ask Tika to detect the document, and report back on if
|
||||||
// the current mime type is plausible
|
// the current mime type is plausible
|
||||||
String typeErrorMessage = null;
|
String typeErrorMessage = null;
|
||||||
String differentType = mimetypeService.getMimetypeIfNotMatches(reader.getReader());
|
String differentType = null;
|
||||||
|
if(mimetypeService != null)
|
||||||
|
{
|
||||||
|
differentType = mimetypeService.getMimetypeIfNotMatches(reader.getReader());
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
logger.info("Unable to verify mimetype of " + reader.getReader() +
|
||||||
|
" as no MimetypeService available to " + getClass().getName());
|
||||||
|
}
|
||||||
if(differentType != null)
|
if(differentType != null)
|
||||||
{
|
{
|
||||||
typeErrorMessage = "\n" +
|
typeErrorMessage = "\n" +
|
||||||
|
@@ -31,38 +31,35 @@ import org.apache.tika.parser.mp3.Mp3Parser;
|
|||||||
/**
|
/**
|
||||||
* Extracts the following values from MP3 files:
|
* Extracts the following values from MP3 files:
|
||||||
* <pre>
|
* <pre>
|
||||||
* <b>songTitle:</b> -- {music}songTitle, cm:title
|
* <b>songTitle:</b> -- cm:title
|
||||||
* <b>albumTitle:</b> -- {music}albumTitle
|
* <b>albumTitle:</b> -- audio:album
|
||||||
* <b>artist:</b> -- {music}artist, cm:author
|
* <b>artist:</b> -- audio:artist, cm:author
|
||||||
* <b>description:</b> -- cm:description
|
* <b>description:</b> -- cm:description
|
||||||
* <b>comment:</b> -- {music}comment
|
* <b>comment:</b> --
|
||||||
* <b>yearReleased:</b> -- {music}yearReleased
|
* <b>yearReleased:</b> -- audio:releaseDate
|
||||||
* <b>trackNumber:</b> -- {music}trackNumber
|
* <b>trackNumber:</b> -- audio:trackNumber
|
||||||
* <b>genre:</b> -- {music}genre
|
* <b>genre:</b> -- audio:genre
|
||||||
* <b>composer:</b> -- {music}composer
|
* <b>composer:</b> -- audio:composer
|
||||||
* <b>lyrics:</b> -- {music}lyrics
|
* <b>lyrics:</b> --
|
||||||
* </pre>
|
* </pre>
|
||||||
*
|
*
|
||||||
* TODO Get hold of a mp3 file with some lyrics in it, so we
|
* Note - XMPDM metadata keys are also emitted, in common with
|
||||||
* can contribute the patch to Tika
|
* the other Tika powered extracters
|
||||||
*
|
*
|
||||||
* Uses Apache Tika
|
* Uses Apache Tika
|
||||||
*
|
*
|
||||||
* @author Nick Burch
|
* @author Nick Burch
|
||||||
* @author Roy Wetherall
|
|
||||||
*/
|
*/
|
||||||
public class MP3MetadataExtracter extends TikaPoweredMetadataExtracter
|
public class MP3MetadataExtracter extends TikaAudioMetadataExtracter
|
||||||
{
|
{
|
||||||
private static final String KEY_SONG_TITLE = "songTitle";
|
private static final String KEY_SONG_TITLE = "songTitle";
|
||||||
private static final String KEY_ALBUM_TITLE = "albumTitle";
|
private static final String KEY_ALBUM_TITLE = "albumTitle";
|
||||||
private static final String KEY_ARTIST = "artist";
|
private static final String KEY_ARTIST = "artist";
|
||||||
private static final String KEY_DESCRIPTION = "description";
|
|
||||||
private static final String KEY_COMMENT = "comment";
|
private static final String KEY_COMMENT = "comment";
|
||||||
private static final String KEY_YEAR_RELEASED = "yearReleased";
|
private static final String KEY_YEAR_RELEASED = "yearReleased";
|
||||||
private static final String KEY_TRACK_NUMBER = "trackNumber";
|
private static final String KEY_TRACK_NUMBER = "trackNumber";
|
||||||
private static final String KEY_GENRE = "genre";
|
private static final String KEY_GENRE = "genre";
|
||||||
private static final String KEY_COMPOSER = "composer";
|
private static final String KEY_COMPOSER = "composer";
|
||||||
private static final String KEY_LYRICS = "lyrics";
|
|
||||||
|
|
||||||
public static ArrayList<String> SUPPORTED_MIMETYPES = buildSupportedMimetypes(
|
public static ArrayList<String> SUPPORTED_MIMETYPES = buildSupportedMimetypes(
|
||||||
new String[] { MimetypeMap.MIMETYPE_MP3 },
|
new String[] { MimetypeMap.MIMETYPE_MP3 },
|
||||||
@@ -82,6 +79,12 @@ public class MP3MetadataExtracter extends TikaPoweredMetadataExtracter
|
|||||||
@Override
|
@Override
|
||||||
protected Map<String, Serializable> extractSpecific(Metadata metadata,
|
protected Map<String, Serializable> extractSpecific(Metadata metadata,
|
||||||
Map<String, Serializable> properties, Map<String,String> headers) {
|
Map<String, Serializable> properties, Map<String,String> headers) {
|
||||||
|
// Do the normal Audio mappings
|
||||||
|
super.extractSpecific(metadata, properties, headers);
|
||||||
|
|
||||||
|
// Now do the compatibility ones
|
||||||
|
// We only need these for people who had pre-existing mapping
|
||||||
|
// properties from before the proper audio model was added
|
||||||
putRawValue(KEY_ALBUM_TITLE, metadata.get(XMPDM.ALBUM), properties);
|
putRawValue(KEY_ALBUM_TITLE, metadata.get(XMPDM.ALBUM), properties);
|
||||||
putRawValue(KEY_SONG_TITLE, metadata.get(Metadata.TITLE), properties);
|
putRawValue(KEY_SONG_TITLE, metadata.get(Metadata.TITLE), properties);
|
||||||
putRawValue(KEY_ARTIST, metadata.get(XMPDM.ARTIST), properties);
|
putRawValue(KEY_ARTIST, metadata.get(XMPDM.ARTIST), properties);
|
||||||
@@ -90,41 +93,8 @@ public class MP3MetadataExtracter extends TikaPoweredMetadataExtracter
|
|||||||
putRawValue(KEY_GENRE, metadata.get(XMPDM.GENRE), properties);
|
putRawValue(KEY_GENRE, metadata.get(XMPDM.GENRE), properties);
|
||||||
putRawValue(KEY_YEAR_RELEASED, metadata.get(XMPDM.RELEASE_DATE), properties);
|
putRawValue(KEY_YEAR_RELEASED, metadata.get(XMPDM.RELEASE_DATE), properties);
|
||||||
putRawValue(KEY_COMPOSER, metadata.get(XMPDM.COMPOSER), properties);
|
putRawValue(KEY_COMPOSER, metadata.get(XMPDM.COMPOSER), properties);
|
||||||
// TODO lyrics
|
|
||||||
//putRawValue(KEY_LYRICS, getLyrics(), properties);
|
|
||||||
|
|
||||||
putRawValue(KEY_DESCRIPTION, generateDescription(metadata), properties);
|
|
||||||
|
|
||||||
|
// All done
|
||||||
return properties;
|
return properties;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Generate the description
|
|
||||||
*
|
|
||||||
* @param props the properties extracted from the file
|
|
||||||
* @return the description
|
|
||||||
*/
|
|
||||||
private String generateDescription(Metadata metadata)
|
|
||||||
{
|
|
||||||
StringBuilder result = new StringBuilder();
|
|
||||||
if (metadata.get(Metadata.TITLE) != null)
|
|
||||||
{
|
|
||||||
result.append(metadata.get(Metadata.TITLE));
|
|
||||||
if (metadata.get(XMPDM.ALBUM) != null)
|
|
||||||
{
|
|
||||||
result
|
|
||||||
.append(" - ")
|
|
||||||
.append(metadata.get(XMPDM.ALBUM));
|
|
||||||
}
|
|
||||||
if (metadata.get(XMPDM.ARTIST) != null)
|
|
||||||
{
|
|
||||||
result
|
|
||||||
.append(" (")
|
|
||||||
.append(metadata.get(XMPDM.ARTIST))
|
|
||||||
.append(")");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return result.toString();
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
@@ -5,8 +5,26 @@
|
|||||||
|
|
||||||
# Namespaces
|
# Namespaces
|
||||||
namespace.prefix.cm=http://www.alfresco.org/model/content/1.0
|
namespace.prefix.cm=http://www.alfresco.org/model/content/1.0
|
||||||
|
namespace.prefix.audio=http://www.alfresco.org/model/audio/1.0
|
||||||
|
|
||||||
# Mappings
|
# Core mappings
|
||||||
songTitle=cm:title
|
author=cm:author
|
||||||
artist=cm:author
|
title=cm:title
|
||||||
description=cm:description
|
description=cm:description
|
||||||
|
created=cm:created
|
||||||
|
|
||||||
|
# Audio descriptive mappings
|
||||||
|
xmpDM\:album=audio:album
|
||||||
|
xmpDM\:artist=audio:artist
|
||||||
|
xmpDM\:composer=audio:composer
|
||||||
|
xmpDM\:engineer=audio:engineer
|
||||||
|
xmpDM\:genre=audio:genre
|
||||||
|
xmpDM\:trackNumber=audio:trackNumber
|
||||||
|
xmpDM\:releaseDate=audio:releaseDate
|
||||||
|
#xmpDM:logComment
|
||||||
|
|
||||||
|
# Audio specific mappings
|
||||||
|
xmpDM\:audioSampleRate=audio:sampleRate
|
||||||
|
xmpDM\:audioSampleType=audio:sampleType
|
||||||
|
xmpDM\:audioChannelType=audio:channelType
|
||||||
|
xmpDM\:audioCompressor=audio:compressor
|
||||||
|
@@ -29,17 +29,15 @@ import org.alfresco.service.namespace.QName;
|
|||||||
/**
|
/**
|
||||||
* Test for the MP3 metadata extraction from id3 tags.
|
* Test for the MP3 metadata extraction from id3 tags.
|
||||||
*/
|
*/
|
||||||
public class MP3MetadataExtracterTest extends AbstractMetadataExtracterTest
|
public class MP3MetadataExtracterTest extends TikaAudioMetadataExtracterTest
|
||||||
{
|
{
|
||||||
private MP3MetadataExtracter extracter;
|
private MP3MetadataExtracter extracter;
|
||||||
private static final String ARTIST = "Hauskaz";
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void setUp() throws Exception
|
public void setUp() throws Exception
|
||||||
{
|
{
|
||||||
super.setUp();
|
super.setUp();
|
||||||
extracter = new MP3MetadataExtracter();
|
extracter = (MP3MetadataExtracter)ctx.getBean("extracter.MP3");
|
||||||
extracter.setDictionaryService(dictionaryService);
|
|
||||||
extracter.register();
|
extracter.register();
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -64,6 +62,10 @@ public class MP3MetadataExtracterTest extends AbstractMetadataExtracterTest
|
|||||||
{
|
{
|
||||||
testExtractFromMimetype(MimetypeMap.MIMETYPE_MP3);
|
testExtractFromMimetype(MimetypeMap.MIMETYPE_MP3);
|
||||||
}
|
}
|
||||||
|
@Override
|
||||||
|
public void testOggExtraction() throws Exception {}
|
||||||
|
@Override
|
||||||
|
public void testFlacExtraction() throws Exception {}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* We don't have quite the usual metadata. Tests the descriptions one.
|
* We don't have quite the usual metadata. Tests the descriptions one.
|
||||||
@@ -93,23 +95,6 @@ public class MP3MetadataExtracterTest extends AbstractMetadataExtracterTest
|
|||||||
* Tests for various MP3 specific bits of metadata
|
* Tests for various MP3 specific bits of metadata
|
||||||
*/
|
*/
|
||||||
public void testFileSpecificMetadata(String mimetype, Map<QName, Serializable> properties) {
|
public void testFileSpecificMetadata(String mimetype, Map<QName, Serializable> properties) {
|
||||||
// Pending ALF-6170 for proper music namespace
|
super.testFileSpecificMetadata(mimetype, properties);
|
||||||
// QName songTitle = QName.createQName("music","songTitle");
|
|
||||||
// assertEquals(
|
|
||||||
// "Property " + songTitle + " not found for mimetype " + mimetype,
|
|
||||||
// QUICK_TITLE,
|
|
||||||
// DefaultTypeConverter.INSTANCE.convert(String.class, properties.get(songTitle)));
|
|
||||||
//
|
|
||||||
// QName songArtist = QName.createQName("music","artist");
|
|
||||||
// assertEquals(
|
|
||||||
// "Property " + songArtist + " not found for mimetype " + mimetype,
|
|
||||||
// ARTIST,
|
|
||||||
// DefaultTypeConverter.INSTANCE.convert(String.class, properties.get(songArtist)));
|
|
||||||
|
|
||||||
// Description is a composite - check the artist part
|
|
||||||
assertContains(
|
|
||||||
"Property " + ContentModel.PROP_DESCRIPTION + " didn't contain " + ARTIST + " for mimetype " + mimetype,
|
|
||||||
ARTIST,
|
|
||||||
DefaultTypeConverter.INSTANCE.convert(String.class, properties.get(ContentModel.PROP_DESCRIPTION)));
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@@ -0,0 +1,172 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (C) 2005-2010 Alfresco Software Limited.
|
||||||
|
*
|
||||||
|
* This file is part of Alfresco
|
||||||
|
*
|
||||||
|
* Alfresco is free software: you can redistribute it and/or modify
|
||||||
|
* it under the terms of the GNU Lesser General Public License as published by
|
||||||
|
* the Free Software Foundation, either version 3 of the License, or
|
||||||
|
* (at your option) any later version.
|
||||||
|
*
|
||||||
|
* Alfresco is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
* GNU Lesser General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU Lesser General Public License
|
||||||
|
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
package org.alfresco.repo.content.metadata;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Calendar;
|
||||||
|
import java.util.Date;
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
|
import org.alfresco.repo.content.MimetypeMap;
|
||||||
|
import org.apache.tika.config.TikaConfig;
|
||||||
|
import org.apache.tika.metadata.Metadata;
|
||||||
|
import org.apache.tika.metadata.XMPDM;
|
||||||
|
import org.apache.tika.parser.CompositeParser;
|
||||||
|
import org.apache.tika.parser.Parser;
|
||||||
|
import org.gagravarr.tika.FlacParser;
|
||||||
|
import org.gagravarr.tika.VorbisParser;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A Metadata Extractor which makes use of the Apache
|
||||||
|
* Tika Audio Parsers to extract metadata from your
|
||||||
|
* media files.
|
||||||
|
* For backwards compatibility reasons, this doesn't
|
||||||
|
* handle the MP3 format, which has its own dedicated
|
||||||
|
* extractor in {@link MP3MetadataExtracter}
|
||||||
|
|
||||||
|
* <pre>
|
||||||
|
* <b>author:</b> -- cm:author
|
||||||
|
* <b>title:</b> -- cm:title
|
||||||
|
* <b>created:</b> -- cm:created
|
||||||
|
* <b>xmpDM:artist</b> -- audio:artist
|
||||||
|
* <b>xmpDM:composer</b> -- audio:composer
|
||||||
|
* <b>xmpDM:engineer</b> -- audio:engineer
|
||||||
|
* <b>xmpDM:genre</b> -- audio:genre
|
||||||
|
* <b>xmpDM:trackNumber</b> -- audio:trackNumber
|
||||||
|
* <b>xmpDM:releaseDate</b> -- audio:releaseDate
|
||||||
|
* </pre>
|
||||||
|
*
|
||||||
|
* @author Nick Burch
|
||||||
|
*/
|
||||||
|
public class TikaAudioMetadataExtracter extends TikaPoweredMetadataExtracter
|
||||||
|
{
|
||||||
|
protected static final String KEY_LYRICS = "lyrics";
|
||||||
|
|
||||||
|
private static Parser[] parsers = new Parser[] {
|
||||||
|
new VorbisParser(),
|
||||||
|
new FlacParser()
|
||||||
|
};
|
||||||
|
public static ArrayList<String> SUPPORTED_MIMETYPES = buildSupportedMimetypes(
|
||||||
|
new String[] { MimetypeMap.MIMETYPE_VORBIS, MimetypeMap.MIMETYPE_FLAC },
|
||||||
|
parsers
|
||||||
|
);
|
||||||
|
|
||||||
|
protected TikaConfig tikaConfig;
|
||||||
|
public void setTikaConfig(TikaConfig tikaConfig)
|
||||||
|
{
|
||||||
|
this.tikaConfig = tikaConfig;
|
||||||
|
}
|
||||||
|
|
||||||
|
public TikaAudioMetadataExtracter()
|
||||||
|
{
|
||||||
|
this(SUPPORTED_MIMETYPES);
|
||||||
|
}
|
||||||
|
public TikaAudioMetadataExtracter(ArrayList<String> supportedMimeTypes)
|
||||||
|
{
|
||||||
|
super(supportedMimeTypes);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected Parser getParser() {
|
||||||
|
return new CompositeParser(
|
||||||
|
tikaConfig.getMediaTypeRegistry(), parsers
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected Map<String, Serializable> extractSpecific(Metadata metadata,
|
||||||
|
Map<String, Serializable> properties, Map<String,String> headers) {
|
||||||
|
// Most things can go with the default Tika -> Alfresco Mapping
|
||||||
|
// Handle the few special cases here
|
||||||
|
|
||||||
|
// The description is special
|
||||||
|
putRawValue(KEY_DESCRIPTION, generateDescription(metadata), properties);
|
||||||
|
|
||||||
|
// The release date can be fiddly
|
||||||
|
Date releaseDate = generateReleaseDate(metadata);
|
||||||
|
putRawValue(KEY_CREATED, releaseDate, properties);
|
||||||
|
putRawValue(XMPDM.RELEASE_DATE.getName(), releaseDate, properties);
|
||||||
|
|
||||||
|
// TODO Get the Lyrics from the content
|
||||||
|
//putRawValue(KEY_LYRICS, getLyrics(), properties);
|
||||||
|
|
||||||
|
// All done
|
||||||
|
return properties;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Generates the release date
|
||||||
|
*/
|
||||||
|
private Date generateReleaseDate(Metadata metadata)
|
||||||
|
{
|
||||||
|
String date = metadata.get(XMPDM.RELEASE_DATE);
|
||||||
|
if(date == null || date.length() == 0)
|
||||||
|
{
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Is it just a year?
|
||||||
|
if(date.matches("\\d\\d\\d\\d"))
|
||||||
|
{
|
||||||
|
// Just a year, we need a full date
|
||||||
|
// Go for the 1st of the 1st
|
||||||
|
Calendar c = Calendar.getInstance();
|
||||||
|
c.set(
|
||||||
|
Integer.parseInt(date), Calendar.JANUARY, 1,
|
||||||
|
0, 0, 0
|
||||||
|
);
|
||||||
|
c.set(Calendar.MILLISECOND, 0);
|
||||||
|
return c.getTime();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Treat as a normal date
|
||||||
|
return makeDate(date);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Generate the description
|
||||||
|
*
|
||||||
|
* @param props the properties extracted from the file
|
||||||
|
* @return the description
|
||||||
|
*/
|
||||||
|
private String generateDescription(Metadata metadata)
|
||||||
|
{
|
||||||
|
StringBuilder result = new StringBuilder();
|
||||||
|
if (metadata.get(Metadata.TITLE) != null)
|
||||||
|
{
|
||||||
|
result.append(metadata.get(Metadata.TITLE));
|
||||||
|
if (metadata.get(XMPDM.ALBUM) != null)
|
||||||
|
{
|
||||||
|
result
|
||||||
|
.append(" - ")
|
||||||
|
.append(metadata.get(XMPDM.ALBUM));
|
||||||
|
}
|
||||||
|
if (metadata.get(XMPDM.ARTIST) != null)
|
||||||
|
{
|
||||||
|
result
|
||||||
|
.append(" (")
|
||||||
|
.append(metadata.get(XMPDM.ARTIST))
|
||||||
|
.append(")");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return result.toString();
|
||||||
|
}
|
||||||
|
}
|
@@ -0,0 +1,34 @@
|
|||||||
|
#
|
||||||
|
# TikaAudioMetadataExtracter - audio mapping
|
||||||
|
#
|
||||||
|
# This is used to map from the Tika audio metadata onto your
|
||||||
|
# content model. This will be used for any Audio content
|
||||||
|
# for which an explicit extractor isn't defined
|
||||||
|
#
|
||||||
|
# author: Nick Burch
|
||||||
|
|
||||||
|
# Namespaces
|
||||||
|
namespace.prefix.cm=http://www.alfresco.org/model/content/1.0
|
||||||
|
namespace.prefix.audio=http://www.alfresco.org/model/audio/1.0
|
||||||
|
|
||||||
|
# Core mappings
|
||||||
|
author=cm:author
|
||||||
|
title=cm:title
|
||||||
|
description=cm:description
|
||||||
|
created=cm:created
|
||||||
|
|
||||||
|
# Audio descriptive mappings
|
||||||
|
xmpDM\:album=audio:album
|
||||||
|
xmpDM\:artist=audio:artist
|
||||||
|
xmpDM\:composer=audio:composer
|
||||||
|
xmpDM\:engineer=audio:engineer
|
||||||
|
xmpDM\:genre=audio:genre
|
||||||
|
xmpDM\:trackNumber=audio:trackNumber
|
||||||
|
xmpDM\:releaseDate=audio:releaseDate
|
||||||
|
#xmpDM:logComment
|
||||||
|
|
||||||
|
# Audio specific mappings
|
||||||
|
xmpDM\:audioSampleRate=audio:sampleRate
|
||||||
|
xmpDM\:audioSampleType=audio:sampleType
|
||||||
|
xmpDM\:audioChannelType=audio:channelType
|
||||||
|
xmpDM\:audioCompressor=audio:compressor
|
@@ -0,0 +1,139 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (C) 2005 Jesper Steen Møller
|
||||||
|
*
|
||||||
|
* This file is part of Alfresco
|
||||||
|
*
|
||||||
|
* Alfresco is free software: you can redistribute it and/or modify
|
||||||
|
* it under the terms of the GNU Lesser General Public License as published by
|
||||||
|
* the Free Software Foundation, either version 3 of the License, or
|
||||||
|
* (at your option) any later version.
|
||||||
|
*
|
||||||
|
* Alfresco is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
* GNU Lesser General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU Lesser General Public License
|
||||||
|
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
package org.alfresco.repo.content.metadata;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
|
import org.alfresco.model.ContentModel;
|
||||||
|
import org.alfresco.repo.content.MimetypeMap;
|
||||||
|
import org.alfresco.service.cmr.repository.datatype.DefaultTypeConverter;
|
||||||
|
import org.alfresco.service.namespace.NamespaceService;
|
||||||
|
import org.alfresco.service.namespace.QName;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test for the audio metadata extraction.
|
||||||
|
*/
|
||||||
|
public class TikaAudioMetadataExtracterTest extends AbstractMetadataExtracterTest
|
||||||
|
{
|
||||||
|
private TikaAudioMetadataExtracter extracter;
|
||||||
|
private static final String ARTIST = "Hauskaz";
|
||||||
|
private static final String ALBUM = "About a dog and a fox";
|
||||||
|
private static final String GENRE = "Foxtrot";
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void setUp() throws Exception
|
||||||
|
{
|
||||||
|
super.setUp();
|
||||||
|
extracter = (TikaAudioMetadataExtracter)ctx.getBean("extracter.Audio");
|
||||||
|
extracter.register();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return Returns the same transformer regardless - it is allowed
|
||||||
|
*/
|
||||||
|
protected MetadataExtracter getExtracter()
|
||||||
|
{
|
||||||
|
return extracter;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testSupports() throws Exception
|
||||||
|
{
|
||||||
|
for (String mimetype : TikaAudioMetadataExtracter.SUPPORTED_MIMETYPES)
|
||||||
|
{
|
||||||
|
boolean supports = extracter.isSupported(mimetype);
|
||||||
|
assertTrue("Mimetype should be supported: " + mimetype, supports);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testOggExtraction() throws Exception
|
||||||
|
{
|
||||||
|
testExtractFromMimetype(MimetypeMap.MIMETYPE_VORBIS);
|
||||||
|
}
|
||||||
|
public void testFlacExtraction() throws Exception
|
||||||
|
{
|
||||||
|
testExtractFromMimetype(MimetypeMap.MIMETYPE_FLAC);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* We don't have quite the usual metadata. Tests the descriptions one.
|
||||||
|
* Other tests in {@link #testFileSpecificMetadata(String, Map)}
|
||||||
|
*/
|
||||||
|
protected void testCommonMetadata(String mimetype, Map<QName, Serializable> properties) {
|
||||||
|
// Title is as normal
|
||||||
|
assertEquals(
|
||||||
|
"Property " + ContentModel.PROP_TITLE + " not found for mimetype " + mimetype,
|
||||||
|
QUICK_TITLE,
|
||||||
|
DefaultTypeConverter.INSTANCE.convert(String.class, properties.get(ContentModel.PROP_TITLE)));
|
||||||
|
// Has Author, not Creator, and is different
|
||||||
|
assertEquals(
|
||||||
|
"Property " + ContentModel.PROP_AUTHOR + " not found for mimetype " + mimetype,
|
||||||
|
"Hauskaz",
|
||||||
|
DefaultTypeConverter.INSTANCE.convert(String.class, properties.get(ContentModel.PROP_AUTHOR)));
|
||||||
|
|
||||||
|
// Description is a composite
|
||||||
|
assertContains(
|
||||||
|
"Property " + ContentModel.PROP_DESCRIPTION + " didn't contain " + QUICK_TITLE + " for mimetype " + mimetype,
|
||||||
|
QUICK_TITLE,
|
||||||
|
DefaultTypeConverter.INSTANCE.convert(String.class, properties.get(ContentModel.PROP_DESCRIPTION)));
|
||||||
|
// Check rest of it later
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Tests for various Audio specific bits of metadata
|
||||||
|
*/
|
||||||
|
public void testFileSpecificMetadata(String mimetype, Map<QName, Serializable> properties) {
|
||||||
|
QName album = QName.createQName(NamespaceService.AUDIO_MODEL_1_0_URI, "album");
|
||||||
|
assertEquals(
|
||||||
|
"Property " + album + " not found for mimetype " + mimetype,
|
||||||
|
ALBUM,
|
||||||
|
DefaultTypeConverter.INSTANCE.convert(String.class, properties.get(album)));
|
||||||
|
|
||||||
|
QName artist = QName.createQName(NamespaceService.AUDIO_MODEL_1_0_URI, "artist");
|
||||||
|
assertEquals(
|
||||||
|
"Property " + artist + " not found for mimetype " + mimetype,
|
||||||
|
ARTIST,
|
||||||
|
DefaultTypeConverter.INSTANCE.convert(String.class, properties.get(artist)));
|
||||||
|
|
||||||
|
QName genre = QName.createQName(NamespaceService.AUDIO_MODEL_1_0_URI, "genre");
|
||||||
|
assertEquals(
|
||||||
|
"Property " + genre + " not found for mimetype " + mimetype,
|
||||||
|
GENRE,
|
||||||
|
DefaultTypeConverter.INSTANCE.convert(String.class, properties.get(genre)));
|
||||||
|
|
||||||
|
QName releaseDate = QName.createQName(NamespaceService.AUDIO_MODEL_1_0_URI, "releaseDate");
|
||||||
|
assertEquals(
|
||||||
|
"Property " + releaseDate + " not found for mimetype " + mimetype,
|
||||||
|
"2009-01-01T00:00:00.000Z",
|
||||||
|
DefaultTypeConverter.INSTANCE.convert(String.class, properties.get(releaseDate)));
|
||||||
|
|
||||||
|
QName channels = QName.createQName(NamespaceService.AUDIO_MODEL_1_0_URI, "channelType");
|
||||||
|
assertEquals(
|
||||||
|
"Property " + channels + " not found for mimetype " + mimetype,
|
||||||
|
"Stereo",
|
||||||
|
DefaultTypeConverter.INSTANCE.convert(String.class, properties.get(channels)));
|
||||||
|
|
||||||
|
|
||||||
|
// Description is a composite - check the artist part
|
||||||
|
assertContains(
|
||||||
|
"Property " + ContentModel.PROP_DESCRIPTION + " didn't contain " + ARTIST + " for mimetype " + mimetype,
|
||||||
|
ARTIST,
|
||||||
|
DefaultTypeConverter.INSTANCE.convert(String.class, properties.get(ContentModel.PROP_DESCRIPTION)));
|
||||||
|
}
|
||||||
|
}
|
@@ -11,6 +11,7 @@
|
|||||||
# Namespaces
|
# Namespaces
|
||||||
namespace.prefix.cm=http://www.alfresco.org/model/content/1.0
|
namespace.prefix.cm=http://www.alfresco.org/model/content/1.0
|
||||||
namespace.prefix.exif=http://www.alfresco.org/model/exif/1.0
|
namespace.prefix.exif=http://www.alfresco.org/model/exif/1.0
|
||||||
|
namespace.prefix.audio=http://www.alfresco.org/model/audio/1.0
|
||||||
|
|
||||||
# Mappings
|
# Mappings
|
||||||
author=cm:author
|
author=cm:author
|
||||||
@@ -36,3 +37,16 @@ exif\:FNumber=exif:fNumber
|
|||||||
exif\:FocalLength=exif:focalLength
|
exif\:FocalLength=exif:focalLength
|
||||||
exif\:IsoSpeedRatings=exif:isoSpeedRatings
|
exif\:IsoSpeedRatings=exif:isoSpeedRatings
|
||||||
exif\:DateTimeOriginal=exif:dateTimeOriginal
|
exif\:DateTimeOriginal=exif:dateTimeOriginal
|
||||||
|
|
||||||
|
xmpDM\:album=audio:album
|
||||||
|
xmpDM\:artist=audio:artist
|
||||||
|
xmpDM\:composer=audio:composer
|
||||||
|
xmpDM\:engineer=audio:engineer
|
||||||
|
xmpDM\:genre=audio:genre
|
||||||
|
xmpDM\:trackNumber=audio:trackNumber
|
||||||
|
xmpDM\:releaseDate=audio:releaseDate
|
||||||
|
#xmpDM:logComment
|
||||||
|
xmpDM\:audioSampleRate=audio:sampleRate
|
||||||
|
xmpDM\:audioSampleType=audio:sampleType
|
||||||
|
xmpDM\:audioChannelType=audio:channelType
|
||||||
|
xmpDM\:audioCompressor=audio:compressor
|
||||||
|
@@ -124,7 +124,6 @@ public class TikaAutoMetadataExtracterTest extends AbstractMetadataExtracterTest
|
|||||||
//"2010.dwg", // Not auto-detected properly yet
|
//"2010.dwg", // Not auto-detected properly yet
|
||||||
".pdf",
|
".pdf",
|
||||||
".odt",
|
".odt",
|
||||||
".ogg"
|
|
||||||
};
|
};
|
||||||
|
|
||||||
for (String fileBase : testFiles)
|
for (String fileBase : testFiles)
|
||||||
|
@@ -88,18 +88,21 @@ public abstract class TikaPoweredMetadataExtracter extends AbstractMappingMetada
|
|||||||
* Builds up a list of supported mime types by merging an explicit
|
* Builds up a list of supported mime types by merging an explicit
|
||||||
* list with any that Tika also claims to support
|
* list with any that Tika also claims to support
|
||||||
*/
|
*/
|
||||||
protected static ArrayList<String> buildSupportedMimetypes(String[] explicitTypes, Parser tikaParser) {
|
protected static ArrayList<String> buildSupportedMimetypes(String[] explicitTypes, Parser... tikaParsers) {
|
||||||
ArrayList<String> types = new ArrayList<String>();
|
ArrayList<String> types = new ArrayList<String>();
|
||||||
for(String type : explicitTypes) {
|
for(String type : explicitTypes) {
|
||||||
if(!types.contains(type)) {
|
if(!types.contains(type)) {
|
||||||
types.add(type);
|
types.add(type);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if(tikaParser != null) {
|
if(tikaParsers != null) {
|
||||||
for(MediaType mt : tikaParser.getSupportedTypes(new ParseContext())) {
|
for(Parser tikaParser : tikaParsers)
|
||||||
String type = mt.toString();
|
{
|
||||||
if(!types.contains(type)) {
|
for(MediaType mt : tikaParser.getSupportedTypes(new ParseContext())) {
|
||||||
types.add(type);
|
String type = mt.toString();
|
||||||
|
if(!types.contains(type)) {
|
||||||
|
types.add(type);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -225,9 +228,11 @@ public abstract class TikaPoweredMetadataExtracter extends AbstractMappingMetada
|
|||||||
{
|
{
|
||||||
is = getInputStream(reader);
|
is = getInputStream(reader);
|
||||||
Parser parser = getParser();
|
Parser parser = getParser();
|
||||||
Metadata metadata = new Metadata();
|
|
||||||
ParseContext context = new ParseContext();
|
ParseContext context = new ParseContext();
|
||||||
|
|
||||||
|
Metadata metadata = new Metadata();
|
||||||
|
metadata.add(Metadata.CONTENT_TYPE, reader.getMimetype());
|
||||||
|
|
||||||
ContentHandler handler;
|
ContentHandler handler;
|
||||||
Map<String,String> headers = null;
|
Map<String,String> headers = null;
|
||||||
if(needHeaderContents()) {
|
if(needHeaderContents()) {
|
||||||
|
Reference in New Issue
Block a user