/* * Copyright (C) 2005-2007 Alfresco Software Limited. * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. * As a special exception to the terms and conditions of version 2.0 of * the GPL, you may redistribute this Program in connection with Free/Libre * and Open Source Software ("FLOSS") applications as described in Alfresco's * FLOSS exception. You should have recieved a copy of the text describing * the FLOSS exception, and it is also available here: * http://www.alfresco.com/legal/licensing" */ package org.alfresco.repo.content.metadata; import java.io.InputStream; import java.io.Serializable; import java.lang.reflect.Array; import java.text.DateFormat; import java.text.ParseException; import java.text.SimpleDateFormat; import java.util.ArrayList; import java.util.Collection; import java.util.Collections; import java.util.Date; import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Properties; import java.util.Set; import java.util.StringTokenizer; import org.alfresco.error.AlfrescoRuntimeException; import org.alfresco.service.cmr.dictionary.DataTypeDefinition; import org.alfresco.service.cmr.dictionary.DictionaryService; import org.alfresco.service.cmr.dictionary.PropertyDefinition; import org.alfresco.service.cmr.repository.ContentIOException; import org.alfresco.service.cmr.repository.ContentReader; import org.alfresco.service.cmr.repository.MimetypeService; import org.alfresco.service.cmr.repository.datatype.DefaultTypeConverter; import org.alfresco.service.cmr.repository.datatype.TypeConversionException; import org.alfresco.service.namespace.InvalidQNameException; import org.alfresco.service.namespace.QName; import org.alfresco.util.ISO8601DateFormat; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; /** * Support class for metadata extracters that support dynamic and config-driven * mapping between extracted values and model properties. Extraction is broken * up into two phases: *
* Migrating an existing extracter to use this class is straightforward: *
1.0
if the mimetype is supported, otherwise 0.0
*
* @see #isSupported(String)
*/
public double getReliability(String mimetype)
{
return isSupported(mimetype) ? 1.0D : 0.0D;
}
/**
* Set the policy to use when existing values are encountered. Depending on how the extracer
* is called, this may not be relevant, i.e an empty map of existing properties may be passed
* in by the client code, which may follow its own overwrite strategy.
*
* @param overwritePolicy the policy to apply when there are existing system properties
*/
public void setOverwritePolicy(OverwritePolicy overwritePolicy)
{
this.overwritePolicy = overwritePolicy;
}
/**
* Set the policy to use when existing values are encountered. Depending on how the extracer
* is called, this may not be relevant, i.e an empty map of existing properties may be passed
* in by the client code, which may follow its own overwrite strategy.
*
* @param overwritePolicyStr the policy to apply when there are existing system properties
*/
public void setOverwritePolicy(String overwritePolicyStr)
{
this.overwritePolicy = OverwritePolicy.valueOf(overwritePolicyStr);
}
/**
* Set whether the extractor should discard metadata that fails to convert to the target type
* defined in the data dictionary model. This is true by default i.e. if the data
* extracted is not compatible with the target model then the extraction will fail. If this is
* false then any extracted data that fails to convert will be discarded.
*
* @param failOnTypeConversion false to discard properties that can't get converted
* to the dictionary-defined type, or true (default)
* to fail the extraction if the type doesn't convert
*/
public void setFailOnTypeConversion(boolean failOnTypeConversion)
{
this.failOnTypeConversion = failOnTypeConversion;
}
/**
* Set the date formats, over and above the {@link ISO8601DateFormat ISO8601 format}, that will
* be supported for string to date conversions. The supported syntax is described by the
* {@link http://java.sun.com/j2se/1.5.0/docs/api/java/text/SimpleDateFormat.html SimpleDateFormat Javadocs}.
*
* @param supportedDateFormats a list of supported date formats.
*/
public void setSupportedDateFormats(List* # Namespaces prefixes * namespace.prefix.cm=http://www.alfresco.org/model/content/1.0 * namespace.prefix.my=http://www....com/alfresco/1.0 * * # Mapping * editor=cm:author, my:editor * title=cm:title * user1=cm:summary * user2=cm:description ** The mapping can therefore be from a single document property onto several system properties. * * @param mappingProperties the properties that map document properties to system properties */ public void setMappingProperties(Properties mappingProperties) { mapping = readMappingProperties(mappingProperties); } /** * Helper method for derived classes to obtain the mappings that will be applied to raw * values. This should be called after initialization in order to guarantee the complete * map is given. *
* Normally, the list of properties that can be extracted from a document is fixed and * well-known - in that case, just extract everything. But Some implementations may have * an extra, indeterminate set of values available for extraction. If the extraction of * these runtime parameters is expensive, then the keys provided by the return value can * be used to extract values from the documents. The metadata extraction becomes fully * configuration-driven, i.e. declaring further mappings will result in more values being * extracted from the documents. *
* Most extractors will not be using this method. For an example of its use, see the
* {@linkplain OpenDocumentMetadataExtracter OpenDocument extractor}, which uses the mapping
* to select specific user properties from a document.
*/
protected final Map
* The default implementation looks for the default mapping file in the location
* given by the class name and .properties. If the extracter's class is
* x.y.z.MyExtracter then the default properties will be picked up at
* classpath:/x/y/z/MyExtracter.properties.
* Inner classes are supported, but the '$' in the class name is replaced with '-', so
* default properties for x.y.z.MyStuff$MyExtracter will be located using
* x.y.z.MyStuff-MyExtracter.properties.
*
* The default mapping implementation should include thorough Javadocs so that the
* system administrators can accurately determine how to best enhance or override the
* default mapping.
*
* If the default mapping is declared in a properties file other than the one named after
* the class, then the {@link #readMappingProperties(String)} method can be used to quickly
* generate the return value:
*
* Raw values must not be trimmed or removed for any reason. Null values and empty
* strings are
*
* Properties extracted and their meanings and types should be thoroughly described in
* the class-level javadocs of the extracter implementation, for example:
*
*
* String values are trimmed before being put into the map.
* Otherwise, it is up to the extracter to ensure that the value is a Serializable.
* It is not appropriate to implicitly convert values in order to make them Serializable
* - the best conversion method will depend on the value's specific meaning.
*
* @param key the destination key
* @param value the serializable value
* @param destination the map to put values into
* @return Returns true if set, otherwise false
*/
@SuppressWarnings("unchecked")
protected boolean putRawValue(String key, Serializable value, Map
* The map can also be created in code either statically or during the call.
*
* @return Returns the default, static mapping. It may not be null.
*
* @see #setInheritDefaultMapping(boolean inherit)
*/
protected Map
* protected Map<
*
*
* editor: - the document editor --> cm:author
* title: - the document title --> cm:title
* user1: - the document summary
* user2: - the document description --> cm:description
* user3: -
* user4: -
*
*
* @param reader the document to extract the values from. This stream provided by
* the reader must be closed if accessed directly.
* @return Returns a map of document property values keyed by property name.
* @throws All exception conditions can be handled.
*
* @see #getDefaultMapping()
*/
protected abstract Map