ML text index and search

Expose properties via DD
Remove more old indexer code

git-svn-id: https://svn.alfresco.com/repos/alfresco-enterprise/alfresco/HEAD/root@4592 c4b6b30b-aa2e-2d43-bbcb-ca4b014f7261
This commit is contained in:
Andrew Hind
2006-12-13 11:52:41 +00:00
parent 7f8c678bd5
commit 82cfe2d806
59 changed files with 1745 additions and 637 deletions

View File

@@ -344,9 +344,6 @@
<property name="nameSpaceService">
<ref bean="namespaceService" />
</property>
<property name="luceneIndexLock">
<ref bean="luceneIndexLock" />
</property>
<property name="luceneFullTextSearchIndexer">
<ref bean="LuceneFullTextSearchIndexer" />
</property>
@@ -368,15 +365,6 @@
<property name="indexerBatchSize">
<value>${lucene.indexer.batchSize}</value>
</property>
<property name="indexerMinMergeDocs">
<value>${lucene.indexer.minMergeDocs}</value>
</property>
<property name="indexerMergeFactor">
<value>${lucene.indexer.mergeFactor}</value>
</property>
<property name="indexerMaxMergeDocs">
<value>${lucene.indexer.maxMergeDocs}</value>
</property>
<property name="lockDirectory">
<value>${dir.indexes.lock}</value>
</property>
@@ -411,11 +399,6 @@
</property>
</bean>
<!-- Support for locking lucene indexes for deletion and update -->
<bean id="luceneIndexLock" class="org.alfresco.repo.search.transaction.LuceneIndexLock"></bean>
<!-- -->
<!-- Lock Service -->
<!-- -->

View File

@@ -16,9 +16,6 @@
<property name="nameSpaceService">
<ref bean="namespaceService" />
</property>
<property name="luceneIndexLock">
<ref bean="luceneIndexLock" />
</property>
<property name="luceneFullTextSearchIndexer">
<ref bean="LuceneFullTextSearchIndexer" />
</property>

View File

@@ -1,101 +0,0 @@
<?xml version='1.0' encoding='UTF-8'?>
<!DOCTYPE beans PUBLIC '-//SPRING//DTD BEAN//EN' 'http://www.springframework.org/dtd/spring-beans.dtd'>
<beans>
<!-- Indexer and searchers for lucene -->
<bean id="luceneIndexerAndSearcherFactory"
class="org.alfresco.repo.search.impl.lucene.LuceneIndexerAndSearcherFactory">
<property name="nodeService">
<ref bean="nodeService" />
</property>
<property name="dictionaryService">
<ref bean="dictionaryService" />
</property>
<property name="nameSpaceService">
<ref bean="namespaceService" />
</property>
<property name="luceneIndexLock">
<ref bean="luceneIndexLock" />
</property>
<property name="luceneFullTextSearchIndexer">
<ref bean="LuceneFullTextSearchIndexer" />
</property>
<property name="indexRootLocation">
<value>${dir.indexes}</value>
</property>
<property name="contentService">
<ref bean="contentService" />
</property>
<property name="queryRegister">
<ref bean="queryRegisterComponent" />
</property>
<property name="maxAtomicTransformationTime">
<value>${lucene.maxAtomicTransformationTime}</value>
</property>
<property name="queryMaxClauses">
<value>${lucene.query.maxClauses}</value>
</property>
<property name="indexerBatchSize">
<value>${lucene.indexer.batchSize}</value>
</property>
<property name="indexerMinMergeDocs">
<value>${lucene.indexer.minMergeDocs}</value>
</property>
<property name="indexerMergeFactor">
<value>${lucene.indexer.mergeFactor}</value>
</property>
<property name="indexerMaxMergeDocs">
<value>${lucene.indexer.maxMergeDocs}</value>
</property>
<property name="lockDirectory">
<value>${dir.indexes.lock}</value>
</property>
<property name="indexerMaxFieldLength">
<value>${lucene.indexer.maxFieldLength}</value>
</property>
<property name="writeLockTimeout">
<value>${lucene.write.lock.timeout}</value>
</property>
<property name="commitLockTimeout">
<value>${lucene.commit.lock.timeout}</value>
</property>
<property name="lockPollInterval">
<value>${lucene.lock.poll.interval}</value>
</property>
</bean>
<!-- Bean to backup Lucene indexes -->
<bean id="luceneIndexBackupComponent"
class="org.alfresco.repo.search.impl.lucene.LuceneIndexerAndSearcherFactory$LuceneIndexBackupComponent">
<property name="transactionService">
<ref bean="transactionComponent" />
</property>
<property name="factory">
<ref bean="luceneIndexerAndSearcherFactory" />
</property>
<property name="nodeService">
<ref bean="nodeService" />
</property>
<property name="targetLocation">
<value>${dir.root}/backup-lucene-indexes</value>
</property>
</bean>
<bean id="indexBackupJobDetail" class="org.springframework.scheduling.quartz.JobDetailBean">
<property name="jobClass">
<value>org.alfresco.repo.search.impl.lucene.LuceneIndexerAndSearcherFactory$LuceneIndexBackupJob</value>
</property>
<property name="jobDataAsMap">
<map>
<entry key="luceneIndexBackupComponent">
<ref bean="luceneIndexBackupComponent" />
</entry>
</map>
</property>
</bean>
</beans>

View File

@@ -0,0 +1,4 @@
# Data Type Index Analyzers
d_dictionary.datatype.d_text.analyzer=org.apache.lucene.analysis.cz.CzechAnalyzer
d_dictionary.datatype.d_content.analyzer=org.apache.lucene.analysis.cz.CzechAnalyzer

View File

@@ -0,0 +1,4 @@
# Data Type Index Analyzers
d_dictionary.datatype.d_text.analyzer=org.alfresco.repo.search.impl.lucene.analysis.DanishSnowballAnalyser
d_dictionary.datatype.d_content.analyzer=org.alfresco.repo.search.impl.lucene.analysis.DanishSnowballAnalyser

View File

@@ -0,0 +1,4 @@
# Data Type Index Analyzers
d_dictionary.datatype.d_text.analyzer=org.apache.lucene.analysis.de.GermanAnalyzer
d_dictionary.datatype.d_content.analyzer=org.apache.lucene.analysis.de.GermanAnalyzer

View File

@@ -0,0 +1,4 @@
# Data Type Index Analyzers
d_dictionary.datatype.d_text.analyzer=org.apache.lucene.analysis.el.GreekAnalyzer
d_dictionary.datatype.d_content.analyzer=org.apache.lucene.analysis.el.GreekAnalyzer

View File

@@ -0,0 +1,4 @@
# Data Type Index Analyzers
d_dictionary.datatype.d_text.analyzer=org.alfresco.repo.search.impl.lucene.analysis.AlfrescoStandardAnalyser
d_dictionary.datatype.d_content.analyzer=org.alfresco.repo.search.impl.lucene.analysis.AlfrescoStandardAnalyser

View File

@@ -0,0 +1,4 @@
# Data Type Index Analyzers
d_dictionary.datatype.d_text.analyzer=org.alfresco.repo.search.impl.lucene.analysis.SpanishSnowballAnalyser
d_dictionary.datatype.d_content.analyzer=org.alfresco.repo.search.impl.lucene.analysis.SpanishSnowballAnalyser

View File

@@ -0,0 +1,4 @@
# Data Type Index Analyzers
d_dictionary.datatype.d_text.analyzer=org.apache.lucene.analysis.fr.FrenchAnalyzer
d_dictionary.datatype.d_content.analyzer=org.apache.lucene.analysis.fr.FrenchAnalyzer

View File

@@ -0,0 +1,4 @@
# Data Type Index Analyzers
d_dictionary.datatype.d_text.analyzer=org.alfresco.repo.search.impl.lucene.analysis.ItalianSnowballAnalyser
d_dictionary.datatype.d_content.analyzer=org.alfresco.repo.search.impl.lucene.analysis.ItalianSnowballAnalyser

View File

@@ -0,0 +1,4 @@
# Data Type Index Analyzers
d_dictionary.datatype.d_text.analyzer=org.apache.lucene.analysis.cjk.CJKAnalyzer
d_dictionary.datatype.d_content.analyzer=org.apache.lucene.analysis.cjk.CJKAnalyzer

View File

@@ -0,0 +1,4 @@
# Data Type Index Analyzers
d_dictionary.datatype.d_text.analyzer=org.apache.lucene.analysis.cjk.CJKAnalyzer
d_dictionary.datatype.d_content.analyzer=org.apache.lucene.analysis.cjk.CJKAnalyzer

View File

@@ -0,0 +1,4 @@
# Data Type Index Analyzers
d_dictionary.datatype.d_text.analyzer=org.apache.lucene.analysis.nl.DutchAnalyzer
d_dictionary.datatype.d_content.analyzer=org.apache.lucene.analysis.nl.DutchAnalyzer

View File

@@ -0,0 +1,4 @@
# Data Type Index Analyzers
d_dictionary.datatype.d_text.analyzer=org.alfresco.repo.search.impl.lucene.analysis.NorwegianSnowballAnalyser
d_dictionary.datatype.d_content.analyzer=org.alfresco.repo.search.impl.lucene.analysis.NorwegianSnowballAnalyser

View File

@@ -0,0 +1,4 @@
# Data Type Index Analyzers
d_dictionary.datatype.d_text.analyzer=org.alfresco.repo.search.impl.lucene.analysis.PortugueseSnowballAnalyser
d_dictionary.datatype.d_content.analyzer=org.alfresco.repo.search.impl.lucene.analysis.PortugueseSnowballAnalyser

View File

@@ -0,0 +1,4 @@
# Data Type Index Analyzers
d_dictionary.datatype.d_text.analyzer=org.apache.lucene.analysis.br.BrazilianAnalyzer
d_dictionary.datatype.d_content.analyzer=org.apache.lucene.analysis.br.BrazilianAnalyzer

View File

@@ -0,0 +1,4 @@
# Data Type Index Analyzers
d_dictionary.datatype.d_text.analyzer=org.apache.lucene.analysis.ru.RussianAnalyzer
d_dictionary.datatype.d_content.analyzer=org.apache.lucene.analysis.ru.RussianAnalyzer

View File

@@ -0,0 +1,4 @@
# Data Type Index Analyzers
d_dictionary.datatype.d_text.analyzer=org.alfresco.repo.search.impl.lucene.analysis.SwedishSnowballAnalyser
d_dictionary.datatype.d_content.analyzer=org.alfresco.repo.search.impl.lucene.analysis.SwedishSnowballAnalyser

View File

@@ -0,0 +1,4 @@
# Data Type Index Analyzers
d_dictionary.datatype.d_text.analyzer=org.apache.lucene.analysis.cn.ChineseAnalyzer
d_dictionary.datatype.d_content.analyzer=org.apache.lucene.analysis.cn.ChineseAnalyzer

View File

@@ -316,6 +316,15 @@ import org.apache.commons.logging.LogFactory;
{
return aspects.values();
}
/**
*
* @return the compiled properties
*/
public Collection<PropertyDefinition> getProperties()
{
return properties.values();
}
/* (non-Javadoc)

View File

@@ -18,6 +18,8 @@ package org.alfresco.repo.dictionary;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashSet;
import java.util.LinkedHashSet;
import java.util.Map;
import org.alfresco.service.cmr.dictionary.AspectDefinition;
@@ -285,5 +287,50 @@ public class DictionaryComponent implements DictionaryService
{
return dictionaryDAO.getAssociation(associationName);
}
/*
* (non-Javadoc)
* @see org.alfresco.service.cmr.dictionary.DictionaryService#getAllProperties(org.alfresco.service.namespace.QName)
*/
public Collection<QName> getAllProperties(QName dataType)
{
Collection<QName> aspects = new HashSet<QName>(64);
for (QName model : getAllModels())
{
aspects.addAll(getProperties(model, dataType));
}
return aspects;
}
/*
* (non-Javadoc)
* @see org.alfresco.service.cmr.dictionary.DictionaryService#getAllProperties(org.alfresco.service.namespace.QName, org.alfresco.service.namespace.QName)
*/
public Collection<QName> getProperties(QName model, QName dataType)
{
Collection<PropertyDefinition> propDefs = dictionaryDAO.getProperties(model, dataType);
HashSet<QName> props = new HashSet<QName>(propDefs.size());
for(PropertyDefinition def : propDefs)
{
props.add(def.getName());
}
return props;
}
public Collection<QName> getProperties(QName model)
{
Collection<PropertyDefinition> propDefs = dictionaryDAO.getProperties(model);
HashSet<QName> props = new HashSet<QName>(propDefs.size());
for(PropertyDefinition def : propDefs)
{
props.add(def.getName());
}
return props;
}
}

View File

@@ -21,6 +21,7 @@ import java.util.Collection;
import org.alfresco.service.cmr.dictionary.AspectDefinition;
import org.alfresco.service.cmr.dictionary.ModelDefinition;
import org.alfresco.service.cmr.dictionary.DataTypeDefinition;
import org.alfresco.service.cmr.dictionary.PropertyDefinition;
import org.alfresco.service.cmr.dictionary.TypeDefinition;
import org.alfresco.service.namespace.QName;
@@ -61,6 +62,14 @@ public interface DictionaryDAO extends ModelQuery
* @return the aspects of the model
*/
public Collection<AspectDefinition> getAspects(QName model);
/**
*
* @param model the model for which to get properties
* @return
*/
public Collection<PropertyDefinition> getProperties(QName model);
/**
* Construct an anonymous type that combines a primary type definition and
@@ -87,4 +96,14 @@ public interface DictionaryDAO extends ModelQuery
*/
public void removeModel(QName model);
/**
* Get all properties for the model and that are of the given data type.
* If dataType is null then the all properties will be returned.
*
* @param modelName
* @param dataType
* @return
*/
public Collection<PropertyDefinition> getProperties(QName modelName, QName dataType);
}

View File

@@ -20,6 +20,7 @@ import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
@@ -441,5 +442,33 @@ public class DictionaryDAOImpl implements DictionaryDAO
return new M2AnonymousTypeDefinition(typeDef, aspectDefs);
}
/*
* (non-Javadoc)
* @see org.alfresco.repo.dictionary.DictionaryDAO#getProperties(org.alfresco.service.namespace.QName)
*/
public Collection<PropertyDefinition> getProperties(QName modelName)
{
CompiledModel model = getCompiledModel(modelName);
return model.getProperties();
}
/*
* (non-Javadoc)
* @see org.alfresco.repo.dictionary.DictionaryDAO#getProperties(org.alfresco.service.namespace.QName, org.alfresco.service.namespace.QName)
*/
public Collection<PropertyDefinition> getProperties(QName modelName, QName dataType)
{
HashSet<PropertyDefinition> properties = new HashSet<PropertyDefinition>();
Collection<PropertyDefinition> props = getProperties(modelName);
for(PropertyDefinition prop : props)
{
if((dataType == null) || prop.getDataType().getName().equals(dataType))
{
properties.add(prop);
}
}
return properties;
}
}

View File

@@ -0,0 +1,52 @@
package org.alfresco.repo.search;
import org.alfresco.error.AlfrescoRuntimeException;
/**
* Enum to specify how multi-lingual properties should be treate for indexing and search.
*
* @author andyh
*
*/
public enum MLAnalysisMode
{
/**
* Only exact locale is used.
*/
LOCALE_ONLY,
/**
* Only the exact locale and no local === all lnaguages
*/
LOCALE_AND_ALL,
/**
* Expand the locale to include all the locales that contain it.
* en_GB would be en_GB, en, but not all languages
*/
LOCALE_AND_ALL_CONTAINING_LOCALES,
/**
* Expand the locale to include all the locales that contain it.
* en_GB would be en_GB, en, and all.
*/
LOCALE_AND_ALL_CONTAINING_LOCALES_AND_ALL,
/**
* Expand to all the locales that are contained by this.
* en would expand to en, en_GB, en_US, ....
*/
LOCAL_AND_ALL_CONTAINED_LOCALES;
public static MLAnalysisMode getMLAnalysisMode(String mode)
{
for(MLAnalysisMode test : MLAnalysisMode.values())
{
if(test.toString().equalsIgnoreCase(mode))
{
return test;
}
}
throw new AlfrescoRuntimeException("Unknown ML Analysis mode "+mode);
}
}

View File

@@ -1,34 +0,0 @@
/*
* Copyright (C) 2005 Alfresco, Inc.
*
* Licensed under the Mozilla Public License version 1.1
* with a permitted attribution clause. You may obtain a
* copy of the License at
*
* http://www.alfresco.org/legal/license.txt
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
* either express or implied. See the License for the specific
* language governing permissions and limitations under the
* License.
*/
package org.alfresco.repo.search.impl.lucene;
import org.alfresco.repo.search.transaction.LuceneIndexLock;
public interface Lockable
{
public void setLuceneIndexLock(LuceneIndexLock luceneIndexLock);
public LuceneIndexLock getLuceneIndexLock();
public void getReadLock();
public void releaseReadLock();
public void getWriteLock();
public void releaseWriteLock();
}

View File

@@ -20,6 +20,8 @@ import java.io.Reader;
import java.util.HashMap;
import java.util.Map;
import org.alfresco.repo.search.MLAnalysisMode;
import org.alfresco.repo.search.impl.lucene.analysis.MLAnalayser;
import org.alfresco.repo.search.impl.lucene.analysis.PathAnalyser;
import org.alfresco.repo.search.impl.lucene.analysis.VerbatimAnalyser;
import org.alfresco.service.cmr.dictionary.DictionaryService;
@@ -32,44 +34,43 @@ import org.apache.lucene.analysis.WhitespaceAnalyzer;
import org.alfresco.repo.search.impl.lucene.analysis.AlfrescoStandardAnalyser;
/**
* Analyse properties according to the property definition.
*
* The default is to use the standard tokeniser. The tokeniser should not have
* been called when indexing properties that require no tokenisation. (tokenise
* should be set to false when adding the field to the document)
* Analyse properties according to the property definition. The default is to use the standard tokeniser. The tokeniser should not have been called when indexing properties that
* require no tokenisation. (tokenise should be set to false when adding the field to the document)
*
* @author andyh
*
*/
public class LuceneAnalyser extends Analyzer
{
// Dictinary service to look up analyser classes by data type and locale.
private DictionaryService dictionaryService;
// If all else fails a fall back analyser
private Analyzer defaultAnalyser;
// Cached analysers for non ML data types.
private Map<String, Analyzer> analysers = new HashMap<String, Analyzer>();
private MLAnalysisMode mlAlaysisMode;
/**
* Constructs with a default standard analyser
*
* @param defaultAnalyzer
* Any fields not specifically defined to use a different
* analyzer will use the one provided here.
* Any fields not specifically defined to use a different analyzer will use the one provided here.
*/
public LuceneAnalyser(DictionaryService dictionaryService)
public LuceneAnalyser(DictionaryService dictionaryService, MLAnalysisMode mlAlaysisMode)
{
this(new AlfrescoStandardAnalyser());
this.dictionaryService = dictionaryService;
this.mlAlaysisMode = mlAlaysisMode;
}
/**
* Constructs with default analyzer.
*
* @param defaultAnalyzer
* Any fields not specifically defined to use a different
* analyzer will use the one provided here.
* Any fields not specifically defined to use a different analyzer will use the one provided here.
*/
public LuceneAnalyser(Analyzer defaultAnalyser)
{
@@ -78,6 +79,23 @@ public class LuceneAnalyser extends Analyzer
public TokenStream tokenStream(String fieldName, Reader reader)
{
// Treat multilingual as a special case.
// If multilingual then we need to find the correct tokeniser.
// This is done dynamically by reading a language code at the start of the reader.
if (fieldName.startsWith("@") && !fieldName.endsWith(".mimetype"))
{
QName propertyQName = QName.createQName(fieldName.substring(1));
PropertyDefinition propertyDef = dictionaryService.getProperty(propertyQName);
if (propertyDef != null)
{
if (propertyDef.getDataType().getName().equals(DataTypeDefinition.MLTEXT))
{
MLAnalayser analyser = new MLAnalayser(dictionaryService);
return analyser.tokenStream(fieldName, reader);
}
}
}
Analyzer analyser = (Analyzer) analysers.get(fieldName);
if (analyser == null)
{
@@ -86,6 +104,12 @@ public class LuceneAnalyser extends Analyzer
return analyser.tokenStream(fieldName, reader);
}
/**
* Pick the analyser from the field name
*
* @param fieldName
* @return
*/
private Analyzer findAnalyser(String fieldName)
{
Analyzer analyser;
@@ -116,24 +140,31 @@ public class LuceneAnalyser extends Analyzer
}
else if (fieldName.startsWith("@"))
{
QName propertyQName = QName.createQName(fieldName.substring(1));
PropertyDefinition propertyDef = dictionaryService.getProperty(propertyQName);
if (propertyDef != null)
if (fieldName.endsWith(".mimetype"))
{
if (propertyDef.isTokenisedInIndex())
{
DataTypeDefinition dataType = propertyDef.getDataType();
analyser = loadAnalyzer(dataType);
}
else
{
analyser = new VerbatimAnalyser();
}
analyser = new VerbatimAnalyser();
}
else
{
DataTypeDefinition dataType = dictionaryService.getDataType(DataTypeDefinition.TEXT);
analyser = loadAnalyzer(dataType);
QName propertyQName = QName.createQName(fieldName.substring(1));
PropertyDefinition propertyDef = dictionaryService.getProperty(propertyQName);
if (propertyDef != null)
{
if (propertyDef.isTokenisedInIndex())
{
DataTypeDefinition dataType = propertyDef.getDataType();
analyser = loadAnalyzer(dataType);
}
else
{
analyser = new VerbatimAnalyser();
}
}
else
{
DataTypeDefinition dataType = dictionaryService.getDataType(DataTypeDefinition.TEXT);
analyser = loadAnalyzer(dataType);
}
}
}
else
@@ -144,6 +175,12 @@ public class LuceneAnalyser extends Analyzer
return analyser;
}
/**
* Find an instantiate an analyser. The shuld all be thread sade as Analyser.tokenStream should be re-entrant.
*
* @param dataType
* @return
*/
private Analyzer loadAnalyzer(DataTypeDefinition dataType)
{
String analyserClassName = dataType.getAnalyserClassName();
@@ -155,19 +192,40 @@ public class LuceneAnalyser extends Analyzer
}
catch (ClassNotFoundException e)
{
throw new RuntimeException("Unable to load analyser for property of type " + dataType.getName() + " using "
+ analyserClassName);
throw new RuntimeException("Unable to load analyser for property of type "
+ dataType.getName() + " using " + analyserClassName);
}
catch (InstantiationException e)
{
throw new RuntimeException("Unable to load analyser for property of type " + dataType.getName() + " using "
+ analyserClassName);
throw new RuntimeException("Unable to load analyser for property of type "
+ dataType.getName() + " using " + analyserClassName);
}
catch (IllegalAccessException e)
{
throw new RuntimeException("Unable to load analyser for property of type " + dataType.getName() + " using "
+ analyserClassName);
throw new RuntimeException("Unable to load analyser for property of type "
+ dataType.getName() + " using " + analyserClassName);
}
}
/**
* For multilingual fields we separate the tokens for each instance to break phrase queries spanning different languages etc.
*/
@Override
public int getPositionIncrementGap(String fieldName)
{
if (fieldName.startsWith("@") && !fieldName.endsWith(".mimetype"))
{
QName propertyQName = QName.createQName(fieldName.substring(1));
PropertyDefinition propertyDef = dictionaryService.getProperty(propertyQName);
if (propertyDef != null)
{
if (propertyDef.getDataType().equals(DataTypeDefinition.MLTEXT))
{
return 1000;
}
}
}
return super.getPositionIncrementGap(fieldName);
}
}

View File

@@ -21,6 +21,7 @@ import java.io.IOException;
import java.util.Set;
import org.alfresco.repo.search.IndexerException;
import org.alfresco.repo.search.MLAnalysisMode;
import org.alfresco.repo.search.impl.lucene.index.IndexInfo;
import org.alfresco.repo.search.impl.lucene.index.TransactionStatus;
import org.alfresco.repo.search.impl.lucene.index.IndexInfo.LockWork;
@@ -86,7 +87,7 @@ public abstract class LuceneBase2
* @param deltaId
* @throws IOException
*/
protected void initialise(StoreRef store, String deltaId, boolean createMain, boolean createDelta)
protected void initialise(StoreRef store, String deltaId)
throws LuceneIndexException
{
this.store = store;
@@ -208,7 +209,7 @@ public abstract class LuceneBase2
*/
protected IndexWriter getDeltaWriter() throws LuceneIndexException, IOException
{
return indexInfo.getDeltaIndexWriter(deltaId, new LuceneAnalyser(dictionaryService));
return indexInfo.getDeltaIndexWriter(deltaId, new LuceneAnalyser(dictionaryService, config.getDefaultMLIndexAnalysisMode()));
}
/**

View File

@@ -32,10 +32,9 @@ import org.alfresco.repo.dictionary.M2Aspect;
import org.alfresco.repo.dictionary.M2Model;
import org.alfresco.repo.dictionary.M2Property;
import org.alfresco.repo.search.impl.lucene.fts.FullTextSearchIndexer;
import org.alfresco.repo.search.transaction.LuceneIndexLock;
import org.alfresco.service.ServiceRegistry;
import org.alfresco.service.cmr.dictionary.DictionaryService;
import org.alfresco.service.cmr.dictionary.DataTypeDefinition;
import org.alfresco.service.cmr.dictionary.DictionaryService;
import org.alfresco.service.cmr.repository.ChildAssociationRef;
import org.alfresco.service.cmr.repository.NodeRef;
import org.alfresco.service.cmr.repository.NodeService;
@@ -59,7 +58,6 @@ public class LuceneCategoryTest2 extends TestCase
static ApplicationContext ctx = ApplicationContextHelper.getApplicationContext();
NodeService nodeService;
DictionaryService dictionaryService;
LuceneIndexLock luceneIndexLock;
private NodeRef rootNodeRef;
private NodeRef n1;
private NodeRef n2;
@@ -111,7 +109,6 @@ public class LuceneCategoryTest2 extends TestCase
public void setUp() throws Exception
{
nodeService = (NodeService)ctx.getBean("dbNodeService");
luceneIndexLock = (LuceneIndexLock)ctx.getBean("luceneIndexLock");
dictionaryService = (DictionaryService)ctx.getBean("dictionaryService");
luceneFTS = (FullTextSearchIndexer) ctx.getBean("LuceneFullTextSearchIndexer");
dictionaryDAO = (DictionaryDAO) ctx.getBean("dictionaryDAO");

View File

@@ -16,21 +16,48 @@
*/
package org.alfresco.repo.search.impl.lucene;
import org.alfresco.repo.search.MLAnalysisMode;
public interface LuceneConfig
{
/**
* Set the lock dir - just to make sure - this should no longer be used.
*
* @param lockDirectory
*/
public void setLockDirectory(String lockDirectory);
/**
* The path to the index location
* @return
*/
public String getIndexRootLocation();
/**
* The batch size in which to group flushes of the index.
*
* @return
*/
public int getIndexerBatchSize();
public int getIndexerMaxMergeDocs();
public int getIndexerMergeFactor();
public int getIndexerMinMergeDocs();
public String getLockDirectory();
/**
* The maximum numbr of sub-queries the can be generated out of wild card expansion etc
* @return
*/
public int getQueryMaxClauses();
/**
* The default mode for analysing ML text during index.
*
* @return
*/
public MLAnalysisMode getDefaultMLIndexAnalysisMode();
/**
* The default mode for analysis of ML text during search.
*
* @return
*/
public MLAnalysisMode getDefaultMLSearchAnalysisMode();
}

View File

@@ -1,46 +0,0 @@
/*
* Copyright (C) 2005 Alfresco, Inc.
*
* Licensed under the Mozilla Public License version 1.1
* with a permitted attribution clause. You may obtain a
* copy of the License at
*
* http://www.alfresco.org/legal/license.txt
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
* either express or implied. See the License for the specific
* language governing permissions and limitations under the
* License.
*/
package org.alfresco.repo.search.impl.lucene;
import java.util.Set;
import org.alfresco.repo.search.Indexer;
import org.alfresco.repo.search.IndexerSPI;
import org.alfresco.repo.search.impl.lucene.fts.FTSIndexerAware;
import org.alfresco.repo.search.impl.lucene.fts.FullTextSearchIndexer;
import org.alfresco.service.cmr.dictionary.DictionaryService;
import org.alfresco.service.cmr.repository.NodeRef;
import org.alfresco.service.cmr.repository.NodeService;
/**
* @author Andy Hind
*/
public interface LuceneIndexer extends IndexerSPI, Lockable
{
public void commit();
public void rollback();
public int prepare();
public boolean isModified();
public void setNodeService(NodeService nodeService);
public void setDictionaryService(DictionaryService dictionaryService);
public void setLuceneFullTextSearchIndexer(FullTextSearchIndexer luceneFullTextSearchIndexer);
public String getDeltaId();
public void flushPending() throws LuceneIndexException;
public Set<NodeRef> getDeletions();
}

View File

@@ -30,11 +30,11 @@ import javax.transaction.xa.Xid;
import org.alfresco.error.AlfrescoRuntimeException;
import org.alfresco.repo.search.IndexerException;
import org.alfresco.repo.search.MLAnalysisMode;
import org.alfresco.repo.search.QueryRegisterComponent;
import org.alfresco.repo.search.SearcherException;
import org.alfresco.repo.search.impl.lucene.fts.FullTextSearchIndexer;
import org.alfresco.repo.search.impl.lucene.index.IndexInfo;
import org.alfresco.repo.search.transaction.LuceneIndexLock;
import org.alfresco.repo.search.transaction.SimpleTransaction;
import org.alfresco.repo.search.transaction.SimpleTransactionManager;
import org.alfresco.repo.transaction.AlfrescoTransactionSupport;
@@ -50,7 +50,6 @@ import org.alfresco.util.GUID;
import org.apache.commons.io.FileUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.store.Lock;
import org.quartz.Job;
@@ -59,14 +58,11 @@ import org.quartz.JobExecutionContext;
import org.quartz.JobExecutionException;
/**
* This class is resource manager LuceneIndexers and LuceneSearchers.
*
* It supports two phase commit inside XA transactions and outside transactions it provides thread local transaction support.
*
* TODO: Provide pluggable support for a transaction manager TODO: Integrate with Spring transactions
* This class is resource manager LuceneIndexers and LuceneSearchers. It supports two phase commit inside XA
* transactions and outside transactions it provides thread local transaction support. TODO: Provide pluggable support
* for a transaction manager TODO: Integrate with Spring transactions
*
* @author andyh
*
*/
public class LuceneIndexerAndSearcherFactory2 implements LuceneIndexerAndSearcher, XAResource
@@ -81,16 +77,9 @@ public class LuceneIndexerAndSearcherFactory2 implements LuceneIndexerAndSearche
private int indexerBatchSize;
private int indexerMinMergeDocs;
private int indexerMergeFactor;
private int indexerMaxMergeDocs;
private String lockDirectory;
/**
* A map of active global transactions . It contains all the indexers a transaction has used, with at most one indexer for each store within a transaction
* A map of active global transactions . It contains all the indexers a transaction has used, with at most one
* indexer for each store within a transaction
*/
private static Map<Xid, Map<StoreRef, LuceneIndexer2>> activeIndexersInGlobalTx = new HashMap<Xid, Map<StoreRef, LuceneIndexer2>>();
@@ -123,8 +112,6 @@ public class LuceneIndexerAndSearcherFactory2 implements LuceneIndexerAndSearche
private NodeService nodeService;
private LuceneIndexLock luceneIndexLock;
private FullTextSearchIndexer luceneFullTextSearchIndexer;
private String indexRootLocation;
@@ -142,6 +129,12 @@ public class LuceneIndexerAndSearcherFactory2 implements LuceneIndexerAndSearche
private long commitLockTimeout;
private String lockDirectory;
private MLAnalysisMode defaultMLIndexAnalysisMode = MLAnalysisMode.LOCALE_ONLY;
private MLAnalysisMode defaultMLSearchAnalysisMode = MLAnalysisMode.LOCALE_AND_ALL;
/**
* Private constructor for the singleton TODO: FIt in with IOC
*/
@@ -172,11 +165,6 @@ public class LuceneIndexerAndSearcherFactory2 implements LuceneIndexerAndSearche
this.nameSpaceService = nameSpaceService;
}
public void setLuceneIndexLock(LuceneIndexLock luceneIndexLock)
{
this.luceneIndexLock = luceneIndexLock;
}
public void setLuceneFullTextSearchIndexer(FullTextSearchIndexer luceneFullTextSearchIndexer)
{
this.luceneFullTextSearchIndexer = luceneFullTextSearchIndexer;
@@ -193,7 +181,8 @@ public class LuceneIndexerAndSearcherFactory2 implements LuceneIndexerAndSearche
}
/**
* Set the maximum average transformation time allowed to a transformer in order to have the transformation performed in the current transaction. The default is 20ms.
* Set the maximum average transformation time allowed to a transformer in order to have the transformation
* performed in the current transaction. The default is 20ms.
*
* @param maxAtomicTransformationTime
* the maximum average time that a text transformation may take in order to be performed atomically.
@@ -796,36 +785,6 @@ public class LuceneIndexerAndSearcherFactory2 implements LuceneIndexerAndSearche
this.indexerBatchSize = indexerBatchSize;
}
public int getIndexerMaxMergeDocs()
{
return indexerMaxMergeDocs;
}
public void setIndexerMaxMergeDocs(int indexerMaxMergeDocs)
{
this.indexerMaxMergeDocs = indexerMaxMergeDocs;
}
public int getIndexerMergeFactor()
{
return indexerMergeFactor;
}
public void setIndexerMergeFactor(int indexerMergeFactor)
{
this.indexerMergeFactor = indexerMergeFactor;
}
public int getIndexerMinMergeDocs()
{
return indexerMinMergeDocs;
}
public void setIndexerMinMergeDocs(int indexerMinMergeDocs)
{
this.indexerMinMergeDocs = indexerMinMergeDocs;
}
public String getLockDirectory()
{
return lockDirectory;
@@ -882,7 +841,7 @@ public class LuceneIndexerAndSearcherFactory2 implements LuceneIndexerAndSearche
{
this.commitLockTimeout = timeout;
}
public long getCommitLockTimeout()
{
return commitLockTimeout;
@@ -912,7 +871,8 @@ public class LuceneIndexerAndSearcherFactory2 implements LuceneIndexerAndSearche
/**
* This component is able to <i>safely</i> perform backups of the Lucene indexes while the server is running.
* <p>
* It can be run directly by calling the {@link #backup() } method, but the convenience {@link LuceneIndexBackupJob} can be used to call it as well.
* It can be run directly by calling the {@link #backup() } method, but the convenience {@link LuceneIndexBackupJob}
* can be used to call it as well.
*
* @author Derek Hulley
*/
@@ -1204,4 +1164,27 @@ public class LuceneIndexerAndSearcherFactory2 implements LuceneIndexerAndSearche
});
}
}
public MLAnalysisMode getDefaultMLIndexAnalysisMode()
{
return defaultMLIndexAnalysisMode;
}
public void setDefaultMLIndexAnalysisMode(String mode)
{
defaultMLIndexAnalysisMode = MLAnalysisMode.getMLAnalysisMode(mode);
}
public MLAnalysisMode getDefaultMLSearchAnalysisMode()
{
return defaultMLSearchAnalysisMode;
}
public void setDefaultMLSearchAnalysisMode(String mode)
{
defaultMLSearchAnalysisMode = MLAnalysisMode.getMLAnalysisMode(mode);
}
}

View File

@@ -30,6 +30,7 @@ import java.util.LinkedHashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.ListIterator;
import java.util.Locale;
import java.util.Map;
import java.util.Set;
@@ -55,6 +56,7 @@ import org.alfresco.service.cmr.repository.ContentReader;
import org.alfresco.service.cmr.repository.ContentService;
import org.alfresco.service.cmr.repository.ContentWriter;
import org.alfresco.service.cmr.repository.InvalidNodeRefException;
import org.alfresco.service.cmr.repository.MLText;
import org.alfresco.service.cmr.repository.NodeRef;
import org.alfresco.service.cmr.repository.NodeService;
import org.alfresco.service.cmr.repository.Path;
@@ -475,7 +477,7 @@ public class LuceneIndexerImpl2 extends LuceneBase2 implements LuceneIndexer2
}
LuceneIndexerImpl2 indexer = new LuceneIndexerImpl2();
indexer.setLuceneConfig(config);
indexer.initialise(storeRef, deltaId, false, true);
indexer.initialise(storeRef, deltaId);
return indexer;
}
@@ -1443,6 +1445,7 @@ public class LuceneIndexerImpl2 extends LuceneBase2 implements LuceneIndexer2
boolean tokenise = true;
boolean atomic = true;
boolean isContent = false;
boolean isMultiLingual = false;
PropertyDefinition propertyDef = getDictionaryService().getProperty(propertyName);
if (propertyDef != null)
@@ -1452,6 +1455,7 @@ public class LuceneIndexerImpl2 extends LuceneBase2 implements LuceneIndexer2
tokenise = propertyDef.isTokenisedInIndex();
atomic = propertyDef.isIndexedAtomically();
isContent = propertyDef.getDataType().getName().equals(DataTypeDefinition.CONTENT);
isMultiLingual = propertyDef.getDataType().getName().equals(DataTypeDefinition.MLTEXT);
}
if (value == null)
{
@@ -1621,7 +1625,20 @@ public class LuceneIndexerImpl2 extends LuceneBase2 implements LuceneIndexer2
fieldIndex = Field.Index.NO;
}
doc.add(new Field(attributeName, strValue, fieldStore, fieldIndex, Field.TermVector.NO));
if(isMultiLingual)
{
MLText mlText = DefaultTypeConverter.INSTANCE.convert(MLText.class, value);
for(Locale locale : mlText.getLocales())
{
String localeString = mlText.getValue(locale);
doc.add(new Field(attributeName, "\u0000" + locale.toString() +"\u0000" + localeString, fieldStore, fieldIndex, Field.TermVector.NO));
}
}
else
{
doc.add(new Field(attributeName, strValue, fieldStore, fieldIndex, Field.TermVector.NO));
}
}
}

View File

@@ -18,16 +18,18 @@ package org.alfresco.repo.search.impl.lucene;
import java.io.IOException;
import java.io.StringReader;
import java.util.Collection;
import java.util.Collections;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
import java.util.List;
import java.util.Locale;
import org.alfresco.i18n.I18NUtil;
import org.alfresco.repo.search.SearcherException;
import org.alfresco.repo.search.impl.lucene.query.PathQuery;
import org.alfresco.service.cmr.dictionary.AspectDefinition;
import org.alfresco.service.cmr.dictionary.DataTypeDefinition;
import org.alfresco.service.cmr.dictionary.DictionaryService;
import org.alfresco.service.cmr.dictionary.ModelDefinition;
import org.alfresco.service.cmr.dictionary.PropertyDefinition;
import org.alfresco.service.cmr.dictionary.TypeDefinition;
import org.alfresco.service.namespace.NamespacePrefixResolver;
@@ -53,6 +55,8 @@ public class LuceneQueryParser extends QueryParser
private DictionaryService dictionaryService;
private List<Locale> locales;
/**
* Parses a query string, returning a {@link org.apache.lucene.search.Query}.
*
@@ -66,8 +70,8 @@ public class LuceneQueryParser extends QueryParser
* if the parsing fails
*/
static public Query parse(String query, String field, Analyzer analyzer,
NamespacePrefixResolver namespacePrefixResolver, DictionaryService dictionaryService, Operator defaultOperator)
throws ParseException
NamespacePrefixResolver namespacePrefixResolver, DictionaryService dictionaryService,
Operator defaultOperator, List<Locale> locales) throws ParseException
{
if (s_logger.isDebugEnabled())
{
@@ -77,9 +81,16 @@ public class LuceneQueryParser extends QueryParser
parser.setDefaultOperator(defaultOperator);
parser.setNamespacePrefixResolver(namespacePrefixResolver);
parser.setDictionaryService(dictionaryService);
parser.setLocales(locales);
// TODO: Apply locale contstraints at the top level if required for the non ML doc types.
return parser.parse(query);
}
private void setLocales(List<Locale> locales)
{
this.locales = locales;
}
public void setNamespacePrefixResolver(NamespacePrefixResolver namespacePrefixResolver)
{
this.namespacePrefixResolver = namespacePrefixResolver;
@@ -128,14 +139,14 @@ public class LuceneQueryParser extends QueryParser
pathQuery.setRepeats(true);
return pathQuery;
}
else if(field.equals("TEXT"))
else if (field.equals("TEXT"))
{
Set<QName> contentAttributes = getContentAttributes();
Collection<QName> contentAttributes = dictionaryService.getAllProperties(DataTypeDefinition.CONTENT);
BooleanQuery query = new BooleanQuery();
for(QName qname : contentAttributes)
for (QName qname : contentAttributes)
{
// The super implementation will create phrase queries etc if required
Query part = super.getFieldQuery("@"+qname.toString(), queryText);
Query part = super.getFieldQuery("@" + qname.toString(), queryText);
query.add(part, Occur.SHOULD);
}
return query;
@@ -173,7 +184,7 @@ public class LuceneQueryParser extends QueryParser
else if (field.equals("TYPE"))
{
TypeDefinition target;
if(queryText.startsWith("{"))
if (queryText.startsWith("{"))
{
target = dictionaryService.getType(QName.createQName(queryText));
}
@@ -183,12 +194,15 @@ public class LuceneQueryParser extends QueryParser
if (colonPosition == -1)
{
// use the default namespace
target = dictionaryService.getType(QName.createQName(namespacePrefixResolver.getNamespaceURI(""), queryText));
target = dictionaryService.getType(QName.createQName(namespacePrefixResolver
.getNamespaceURI(""), queryText));
}
else
{
// find the prefix
target = dictionaryService.getType(QName.createQName(namespacePrefixResolver.getNamespaceURI(queryText.substring(0, colonPosition)), queryText.substring(colonPosition + 1)));
target = dictionaryService.getType(QName.createQName(namespacePrefixResolver
.getNamespaceURI(queryText.substring(0, colonPosition)), queryText
.substring(colonPosition + 1)));
}
}
if (target == null)
@@ -221,7 +235,7 @@ public class LuceneQueryParser extends QueryParser
else if (field.equals("ASPECT"))
{
AspectDefinition target;
if(queryText.startsWith("{"))
if (queryText.startsWith("{"))
{
target = dictionaryService.getAspect(QName.createQName(queryText));
}
@@ -231,15 +245,18 @@ public class LuceneQueryParser extends QueryParser
if (colonPosition == -1)
{
// use the default namespace
target = dictionaryService.getAspect(QName.createQName(namespacePrefixResolver.getNamespaceURI(""), queryText));
target = dictionaryService.getAspect(QName.createQName(namespacePrefixResolver
.getNamespaceURI(""), queryText));
}
else
{
// find the prefix
target = dictionaryService.getAspect(QName.createQName(namespacePrefixResolver.getNamespaceURI(queryText.substring(0, colonPosition)), queryText.substring(colonPosition + 1)));
target = dictionaryService.getAspect(QName.createQName(namespacePrefixResolver
.getNamespaceURI(queryText.substring(0, colonPosition)), queryText
.substring(colonPosition + 1)));
}
}
QName targetQName = target.getName();
HashSet<QName> subclasses = new HashSet<QName>();
for (QName classRef : dictionaryService.getAllAspects())
@@ -266,6 +283,7 @@ public class LuceneQueryParser extends QueryParser
}
else if (field.startsWith("@"))
{
// Expand prefixes
String expandedFieldName = field;
// Check for any prefixes and expand to the full uri
@@ -276,7 +294,7 @@ public class LuceneQueryParser extends QueryParser
{
// use the default namespace
expandedFieldName = "@{"
+ namespacePrefixResolver.getNamespaceURI("") + "}" + field.substring(1);
+ namespacePrefixResolver.getNamespaceURI("") + "}" + field.substring(1);
}
else
{
@@ -286,21 +304,46 @@ public class LuceneQueryParser extends QueryParser
+ field.substring(colonPosition + 1);
}
}
if(expandedFieldName.endsWith(".mimetype"))
// Mime type
if (expandedFieldName.endsWith(".mimetype"))
{
QName propertyQName = QName.createQName(expandedFieldName.substring(1, expandedFieldName.length()-9));
QName propertyQName = QName.createQName(expandedFieldName.substring(1,
expandedFieldName.length() - 9));
PropertyDefinition propertyDef = dictionaryService.getProperty(propertyQName);
if((propertyDef != null) && (propertyDef.getDataType().getName().equals(DataTypeDefinition.CONTENT)))
if ((propertyDef != null)
&& (propertyDef.getDataType().getName().equals(DataTypeDefinition.CONTENT)))
{
return super.getFieldQuery(expandedFieldName, queryText);
}
}
// Already in expanded form
return super.getFieldQuery(expandedFieldName, queryText);
// ML
QName propertyQName = QName.createQName(expandedFieldName.substring(1));
PropertyDefinition propertyDef = dictionaryService.getProperty(propertyQName);
if ((propertyDef != null) && (propertyDef.getDataType().getName().equals(DataTypeDefinition.MLTEXT)))
{
// Build a sub query for each locale and or the results together - the analysis will take care of
// cross language matching for each entry
BooleanQuery booleanQuery = new BooleanQuery();
for (Locale locale : (((locales == null) || (locales.size() == 0)) ? Collections
.singletonList(I18NUtil.getLocale()) : locales))
{
StringBuilder builder = new StringBuilder(queryText.length() + 10);
builder.append("\u0000").append(locale.toString()).append("\u0000").append(queryText);
Query subQuery = super.getFieldQuery(expandedFieldName, builder.toString());
booleanQuery.add(subQuery, Occur.SHOULD);
}
return booleanQuery;
}
else
{
return super.getFieldQuery(expandedFieldName, queryText);
}
}
else
@@ -315,37 +358,6 @@ public class LuceneQueryParser extends QueryParser
}
private Set<QName> getContentAttributes()
{
HashSet<QName> contentAttributes = new HashSet<QName>();
for(QName type : dictionaryService.getAllTypes())
{
Map<QName, PropertyDefinition> props = dictionaryService.getType(type).getProperties();
for(QName prop : props.keySet())
{
if(props.get(prop).getDataType().getName().equals(DataTypeDefinition.CONTENT))
{
contentAttributes.add(prop);
}
}
}
for(QName aspect : dictionaryService.getAllAspects())
{
Map<QName, PropertyDefinition> props = dictionaryService.getAspect(aspect).getProperties();
for(QName prop : props.keySet())
{
if(props.get(prop).getDataType().getName().equals(DataTypeDefinition.CONTENT))
{
contentAttributes.add(prop);
}
}
}
return contentAttributes;
}
/**
* @exception ParseException
* throw in overridden method to disallow
@@ -415,11 +427,264 @@ public class LuceneQueryParser extends QueryParser
}
@Override
protected Query getPrefixQuery(String field, String termStr) throws ParseException
{
if (field.startsWith("@"))
{
// Expand prefixes
String expandedFieldName = field;
// Check for any prefixes and expand to the full uri
if (field.charAt(1) != '{')
{
int colonPosition = field.indexOf(':');
if (colonPosition == -1)
{
// use the default namespace
expandedFieldName = "@{" + namespacePrefixResolver.getNamespaceURI("") + "}" + field.substring(1);
}
else
{
// find the prefix
expandedFieldName = "@{"
+ namespacePrefixResolver.getNamespaceURI(field.substring(1, colonPosition)) + "}"
+ field.substring(colonPosition + 1);
}
}
// Mime type
if (expandedFieldName.endsWith(".mimetype"))
{
QName propertyQName = QName.createQName(expandedFieldName.substring(1, expandedFieldName.length() - 9));
PropertyDefinition propertyDef = dictionaryService.getProperty(propertyQName);
if ((propertyDef != null) && (propertyDef.getDataType().getName().equals(DataTypeDefinition.CONTENT)))
{
return super.getPrefixQuery(expandedFieldName, termStr);
}
}
// Already in expanded form
// ML
QName propertyQName = QName.createQName(expandedFieldName.substring(1));
PropertyDefinition propertyDef = dictionaryService.getProperty(propertyQName);
if ((propertyDef != null) && (propertyDef.getDataType().getName().equals(DataTypeDefinition.MLTEXT)))
{
// Build a sub query for each locale and or the results together - the analysis will take care of
// cross language matching for each entry
BooleanQuery booleanQuery = new BooleanQuery();
for (Locale locale : (((locales == null) || (locales.size() == 0)) ? Collections.singletonList(I18NUtil
.getLocale()) : locales))
{
StringBuilder builder = new StringBuilder(termStr.length() + 10);
builder.append("\u0000").append(locale.toString()).append("\u0000").append(termStr);
Query subQuery = super.getPrefixQuery(expandedFieldName, builder.toString());
booleanQuery.add(subQuery, Occur.SHOULD);
}
return booleanQuery;
}
else
{
return super.getPrefixQuery(expandedFieldName, termStr);
}
}
else if (field.equals("TEXT"))
{
Collection<QName> contentAttributes = dictionaryService.getAllProperties(DataTypeDefinition.CONTENT);
BooleanQuery query = new BooleanQuery();
for (QName qname : contentAttributes)
{
// The super implementation will create phrase queries etc if required
Query part = super.getPrefixQuery("@" + qname.toString(), termStr);
query.add(part, Occur.SHOULD);
}
return query;
}
else
{
return super.getFieldQuery(field, termStr);
}
}
@Override
protected Query getWildcardQuery(String field, String termStr) throws ParseException
{
if (field.startsWith("@"))
{
// Expand prefixes
String expandedFieldName = field;
// Check for any prefixes and expand to the full uri
if (field.charAt(1) != '{')
{
int colonPosition = field.indexOf(':');
if (colonPosition == -1)
{
// use the default namespace
expandedFieldName = "@{" + namespacePrefixResolver.getNamespaceURI("") + "}" + field.substring(1);
}
else
{
// find the prefix
expandedFieldName = "@{"
+ namespacePrefixResolver.getNamespaceURI(field.substring(1, colonPosition)) + "}"
+ field.substring(colonPosition + 1);
}
}
// Mime type
if (expandedFieldName.endsWith(".mimetype"))
{
QName propertyQName = QName.createQName(expandedFieldName.substring(1, expandedFieldName.length() - 9));
PropertyDefinition propertyDef = dictionaryService.getProperty(propertyQName);
if ((propertyDef != null) && (propertyDef.getDataType().getName().equals(DataTypeDefinition.CONTENT)))
{
return super.getWildcardQuery(expandedFieldName, termStr);
}
}
// Already in expanded form
// ML
QName propertyQName = QName.createQName(expandedFieldName.substring(1));
PropertyDefinition propertyDef = dictionaryService.getProperty(propertyQName);
if ((propertyDef != null) && (propertyDef.getDataType().getName().equals(DataTypeDefinition.MLTEXT)))
{
// Build a sub query for each locale and or the results together - the analysis will take care of
// cross language matching for each entry
BooleanQuery booleanQuery = new BooleanQuery();
for (Locale locale : (((locales == null) || (locales.size() == 0)) ? Collections.singletonList(I18NUtil
.getLocale()) : locales))
{
StringBuilder builder = new StringBuilder(termStr.length() + 10);
builder.append("\u0000").append(locale.toString()).append("\u0000").append(termStr);
Query subQuery = super.getWildcardQuery(expandedFieldName, builder.toString());
booleanQuery.add(subQuery, Occur.SHOULD);
}
return booleanQuery;
}
else
{
return super.getWildcardQuery(expandedFieldName, termStr);
}
}
else if (field.equals("TEXT"))
{
Collection<QName> contentAttributes = dictionaryService.getAllProperties(DataTypeDefinition.CONTENT);
BooleanQuery query = new BooleanQuery();
for (QName qname : contentAttributes)
{
// The super implementation will create phrase queries etc if required
Query part = super.getWildcardQuery("@" + qname.toString(), termStr);
query.add(part, Occur.SHOULD);
}
return query;
}
else
{
return super.getWildcardQuery(field, termStr);
}
}
@Override
protected Query getFuzzyQuery(String field, String termStr, float minSimilarity) throws ParseException
{
if (field.startsWith("@"))
{
// Expand prefixes
String expandedFieldName = field;
// Check for any prefixes and expand to the full uri
if (field.charAt(1) != '{')
{
int colonPosition = field.indexOf(':');
if (colonPosition == -1)
{
// use the default namespace
expandedFieldName = "@{" + namespacePrefixResolver.getNamespaceURI("") + "}" + field.substring(1);
}
else
{
// find the prefix
expandedFieldName = "@{"
+ namespacePrefixResolver.getNamespaceURI(field.substring(1, colonPosition)) + "}"
+ field.substring(colonPosition + 1);
}
}
// Mime type
if (expandedFieldName.endsWith(".mimetype"))
{
QName propertyQName = QName.createQName(expandedFieldName.substring(1, expandedFieldName.length() - 9));
PropertyDefinition propertyDef = dictionaryService.getProperty(propertyQName);
if ((propertyDef != null) && (propertyDef.getDataType().getName().equals(DataTypeDefinition.CONTENT)))
{
return super.getFuzzyQuery(expandedFieldName, termStr, minSimilarity);
}
}
// Already in expanded form
// ML
QName propertyQName = QName.createQName(expandedFieldName.substring(1));
PropertyDefinition propertyDef = dictionaryService.getProperty(propertyQName);
if ((propertyDef != null) && (propertyDef.getDataType().getName().equals(DataTypeDefinition.MLTEXT)))
{
// Build a sub query for each locale and or the results together - the analysis will take care of
// cross language matching for each entry
BooleanQuery booleanQuery = new BooleanQuery();
for (Locale locale : (((locales == null) || (locales.size() == 0)) ? Collections.singletonList(I18NUtil
.getLocale()) : locales))
{
StringBuilder builder = new StringBuilder(termStr.length() + 10);
builder.append("\u0000").append(locale.toString()).append("\u0000").append(termStr);
Query subQuery = super.getFuzzyQuery(expandedFieldName, builder.toString(), minSimilarity);
booleanQuery.add(subQuery, Occur.SHOULD);
}
return booleanQuery;
}
else
{
return super.getFuzzyQuery(expandedFieldName, termStr, minSimilarity);
}
}
else if (field.equals("TEXT"))
{
Collection<QName> contentAttributes = dictionaryService.getAllProperties(DataTypeDefinition.CONTENT);
BooleanQuery query = new BooleanQuery();
for (QName qname : contentAttributes)
{
// The super implementation will create phrase queries etc if required
Query part = super.getFuzzyQuery("@" + qname.toString(), termStr, minSimilarity);
query.add(part, Occur.SHOULD);
}
return query;
}
else
{
return super.getFuzzyQuery(field, termStr, minSimilarity);
}
}
public void setDictionaryService(DictionaryService dictionaryService)
{
this.dictionaryService = dictionaryService;
}
}

View File

@@ -1,28 +0,0 @@
/*
* Copyright (C) 2005 Alfresco, Inc.
*
* Licensed under the Mozilla Public License version 1.1
* with a permitted attribution clause. You may obtain a
* copy of the License at
*
* http://www.alfresco.org/legal/license.txt
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
* either express or implied. See the License for the specific
* language governing permissions and limitations under the
* License.
*/
package org.alfresco.repo.search.impl.lucene;
import org.alfresco.service.cmr.repository.NodeService;
import org.alfresco.service.cmr.search.SearchService;
import org.alfresco.service.namespace.NamespacePrefixResolver;
public interface LuceneSearcher extends SearchService, Lockable
{
public boolean indexExists();
public void setNodeService(NodeService nodeService);
public void setNamespacePrefixResolver(NamespacePrefixResolver namespacePrefixResolver);
}

View File

@@ -102,7 +102,7 @@ public class LuceneSearcherImpl2 extends LuceneBase2 implements LuceneSearcher2
searcher.setLuceneConfig(config);
try
{
searcher.initialise(storeRef, indexer == null ? null : indexer.getDeltaId(), false, false);
searcher.initialise(storeRef, indexer == null ? null : indexer.getDeltaId());
searcher.indexer = indexer;
}
catch (LuceneIndexException e)
@@ -215,7 +215,7 @@ public class LuceneSearcherImpl2 extends LuceneBase2 implements LuceneSearcher2
}
Query query = LuceneQueryParser.parse(parameterisedQueryString, DEFAULT_FIELD, new LuceneAnalyser(
dictionaryService), namespacePrefixResolver, dictionaryService, defaultOperator);
dictionaryService, searchParameters.getMlAnalaysisMode() == null ? getLuceneConfig().getDefaultMLSearchAnalysisMode() : searchParameters.getMlAnalaysisMode()), namespacePrefixResolver, dictionaryService, defaultOperator, searchParameters.getLocales());
ClosingIndexSearcher searcher = getSearcher(indexer);
if (searcher == null)
{

View File

@@ -27,6 +27,7 @@ import java.util.ArrayList;
import java.util.Date;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Locale;
import java.util.Map;
import java.util.Random;
@@ -43,11 +44,9 @@ import org.alfresco.repo.dictionary.NamespaceDAOImpl;
import org.alfresco.repo.node.BaseNodeServiceTest;
import org.alfresco.repo.search.QueryParameterDefImpl;
import org.alfresco.repo.search.QueryRegisterComponent;
import org.alfresco.repo.search.impl.lucene.analysis.NumericEncoder;
import org.alfresco.repo.search.impl.lucene.fts.FullTextSearchIndexer;
import org.alfresco.repo.search.results.ChildAssocRefResultSet;
import org.alfresco.repo.search.results.DetachedResultSet;
import org.alfresco.repo.search.transaction.LuceneIndexLock;
import org.alfresco.repo.security.authentication.AuthenticationComponent;
import org.alfresco.repo.security.authentication.AuthenticationUtil;
import org.alfresco.service.ServiceRegistry;
@@ -57,6 +56,7 @@ import org.alfresco.service.cmr.repository.ChildAssociationRef;
import org.alfresco.service.cmr.repository.ContentData;
import org.alfresco.service.cmr.repository.ContentService;
import org.alfresco.service.cmr.repository.ContentWriter;
import org.alfresco.service.cmr.repository.MLText;
import org.alfresco.service.cmr.repository.NodeRef;
import org.alfresco.service.cmr.repository.NodeService;
import org.alfresco.service.cmr.repository.Path;
@@ -83,7 +83,6 @@ import org.springframework.context.ApplicationContext;
/**
* @author andyh
*
*/
@SuppressWarnings("unused")
public class LuceneTest2 extends TestCase
@@ -100,11 +99,11 @@ public class LuceneTest2 extends TestCase
QName createdDate = QName.createQName(TEST_NAMESPACE, "createdDate");
QName orderDouble = QName.createQName(TEST_NAMESPACE, "orderDouble");
QName orderFloat = QName.createQName(TEST_NAMESPACE, "orderFloat");
QName orderLong = QName.createQName(TEST_NAMESPACE, "orderLong");
QName orderInt = QName.createQName(TEST_NAMESPACE, "orderInt");
TransactionService transactionService;
@@ -113,8 +112,6 @@ public class LuceneTest2 extends TestCase
DictionaryService dictionaryService;
LuceneIndexLock luceneIndexLock;
private NodeRef rootNodeRef;
private NodeRef n1;
@@ -183,7 +180,6 @@ public class LuceneTest2 extends TestCase
public void setUp() throws Exception
{
nodeService = (NodeService) ctx.getBean("dbNodeService");
luceneIndexLock = (LuceneIndexLock) ctx.getBean("luceneIndexLock");
dictionaryService = (DictionaryService) ctx.getBean("dictionaryService");
dictionaryDAO = (DictionaryDAO) ctx.getBean("dictionaryDAO");
luceneFTS = (FullTextSearchIndexer) ctx.getBean("LuceneFullTextSearchIndexer");
@@ -193,31 +189,28 @@ public class LuceneTest2 extends TestCase
indexerAndSearcher = (LuceneIndexerAndSearcher) ctx.getBean("luceneIndexerAndSearcherFactory");
transactionService = (TransactionService) ctx.getBean("transactionComponent");
serviceRegistry = (ServiceRegistry) ctx.getBean(ServiceRegistry.SERVICE_REGISTRY);
namespaceDao = (NamespaceDAOImpl) ctx.getBean("namespaceDAO");
namespaceDao = (NamespaceDAOImpl) ctx.getBean("namespaceDAO");
this.authenticationComponent = (AuthenticationComponent) ctx.getBean("authenticationComponent");
queryRegisterComponent.loadQueryCollection("testQueryRegister.xml");
assertEquals(true, ctx.isSingleton("luceneIndexLock"));
assertEquals(true, ctx.isSingleton("LuceneFullTextSearchIndexer"));
testTX = transactionService.getUserTransaction();
testTX.begin();
this.authenticationComponent.setSystemUserAsCurrentUser();
// load in the test model
ClassLoader cl = BaseNodeServiceTest.class.getClassLoader();
InputStream modelStream = cl.getResourceAsStream("org/alfresco/repo/search/impl/lucene/LuceneTest_model.xml");
assertNotNull(modelStream);
M2Model model = M2Model.createModel(modelStream);
dictionaryDAO.putModel(model);
namespaceDao.addPrefix("test", TEST_NAMESPACE);
StoreRef storeRef = nodeService.createStore(StoreRef.PROTOCOL_WORKSPACE, "Test_" + System.currentTimeMillis());
rootNodeRef = nodeService.getRootNode(storeRef);
@@ -276,6 +269,20 @@ public class LuceneTest2 extends TestCase
testProperties.put(QName.createQName(TEST_NAMESPACE, "path-ista"), nodeService.getPath(n3));
testProperties.put(QName.createQName(TEST_NAMESPACE, "null"), null);
testProperties.put(QName.createQName(TEST_NAMESPACE, "list"), new ArrayList());
MLText mlText = new MLText();
mlText.addValue(Locale.ENGLISH, "banana");
mlText.addValue(Locale.FRENCH, "banane");
mlText.addValue(Locale.CHINESE, "香蕉");
mlText.addValue(new Locale("nl"), "banaan");
mlText.addValue(Locale.GERMAN, "banane");
mlText.addValue(new Locale("el"), "μπανάνα");
mlText.addValue(Locale.ITALIAN, "banana");
mlText.addValue(new Locale("ja"), "バナナ");
mlText.addValue(new Locale("ko"), "바나나");
mlText.addValue(new Locale("pt"), "banana");
mlText.addValue(new Locale("ru"), "банан");
mlText.addValue(new Locale("es"), "plátano");
testProperties.put(QName.createQName(TEST_NAMESPACE, "ml"), mlText);
ArrayList<Object> testList = new ArrayList<Object>();
testList.add(null);
testProperties.put(QName.createQName(TEST_NAMESPACE, "nullList"), testList);
@@ -341,11 +348,11 @@ public class LuceneTest2 extends TestCase
private double orderDoubleCount = -0.11d;
private Date orderDate = new Date();
private float orderFloatCount = -3.5556f;
private long orderLongCount = -1999999999999999l;
private int orderIntCount = -45764576;
public Map<QName, Serializable> getOrderProperties()
@@ -385,7 +392,7 @@ public class LuceneTest2 extends TestCase
{
testSort();
}
public void test0() throws Exception
{
luceneFTS.pause();
@@ -860,7 +867,7 @@ public class LuceneTest2 extends TestCase
{
Date currentBun = DefaultTypeConverter.INSTANCE.convert(Date.class, nodeService.getProperty(row
.getNodeRef(), createdDate));
//System.out.println(currentBun);
// System.out.println(currentBun);
if (date != null)
{
assertTrue(date.compareTo(currentBun) <= 0);
@@ -881,7 +888,7 @@ public class LuceneTest2 extends TestCase
{
Date currentBun = DefaultTypeConverter.INSTANCE.convert(Date.class, nodeService.getProperty(row
.getNodeRef(), createdDate));
//System.out.println(currentBun);
// System.out.println(currentBun);
if ((date != null) && (currentBun != null))
{
assertTrue(date.compareTo(currentBun) >= 0);
@@ -904,7 +911,7 @@ public class LuceneTest2 extends TestCase
{
Double currentBun = DefaultTypeConverter.INSTANCE.convert(Double.class, nodeService.getProperty(row
.getNodeRef(), orderDouble));
//System.out.println( (currentBun == null ? "null" : NumericEncoder.encode(currentBun))+ " "+currentBun);
// System.out.println( (currentBun == null ? "null" : NumericEncoder.encode(currentBun))+ " "+currentBun);
if (d != null)
{
assertTrue(d.compareTo(currentBun) <= 0);
@@ -925,7 +932,7 @@ public class LuceneTest2 extends TestCase
{
Double currentBun = DefaultTypeConverter.INSTANCE.convert(Double.class, nodeService.getProperty(row
.getNodeRef(), orderDouble));
//System.out.println(currentBun);
// System.out.println(currentBun);
if ((d != null) && (currentBun != null))
{
assertTrue(d.compareTo(currentBun) >= 0);
@@ -933,7 +940,7 @@ public class LuceneTest2 extends TestCase
d = currentBun;
}
results.close();
// sort by float
SearchParameters sp11 = new SearchParameters();
@@ -948,7 +955,7 @@ public class LuceneTest2 extends TestCase
{
Float currentBun = DefaultTypeConverter.INSTANCE.convert(Float.class, nodeService.getProperty(row
.getNodeRef(), orderFloat));
//System.out.println( (currentBun == null ? "null" : NumericEncoder.encode(currentBun))+ " "+currentBun);
// System.out.println( (currentBun == null ? "null" : NumericEncoder.encode(currentBun))+ " "+currentBun);
if (f != null)
{
assertTrue(f.compareTo(currentBun) <= 0);
@@ -969,7 +976,7 @@ public class LuceneTest2 extends TestCase
{
Float currentBun = DefaultTypeConverter.INSTANCE.convert(Float.class, nodeService.getProperty(row
.getNodeRef(), orderFloat));
//System.out.println(currentBun);
// System.out.println(currentBun);
if ((f != null) && (currentBun != null))
{
assertTrue(f.compareTo(currentBun) >= 0);
@@ -977,7 +984,7 @@ public class LuceneTest2 extends TestCase
f = currentBun;
}
results.close();
// sort by long
SearchParameters sp13 = new SearchParameters();
@@ -992,7 +999,7 @@ public class LuceneTest2 extends TestCase
{
Long currentBun = DefaultTypeConverter.INSTANCE.convert(Long.class, nodeService.getProperty(row
.getNodeRef(), orderLong));
//System.out.println( (currentBun == null ? "null" : NumericEncoder.encode(currentBun))+ " "+currentBun);
// System.out.println( (currentBun == null ? "null" : NumericEncoder.encode(currentBun))+ " "+currentBun);
if (l != null)
{
assertTrue(l.compareTo(currentBun) <= 0);
@@ -1013,7 +1020,7 @@ public class LuceneTest2 extends TestCase
{
Long currentBun = DefaultTypeConverter.INSTANCE.convert(Long.class, nodeService.getProperty(row
.getNodeRef(), orderLong));
//System.out.println(currentBun);
// System.out.println(currentBun);
if ((l != null) && (currentBun != null))
{
assertTrue(l.compareTo(currentBun) >= 0);
@@ -1036,7 +1043,7 @@ public class LuceneTest2 extends TestCase
{
Integer currentBun = DefaultTypeConverter.INSTANCE.convert(Integer.class, nodeService.getProperty(row
.getNodeRef(), orderInt));
//System.out.println( (currentBun == null ? "null" : NumericEncoder.encode(currentBun))+ " "+currentBun);
// System.out.println( (currentBun == null ? "null" : NumericEncoder.encode(currentBun))+ " "+currentBun);
if (i != null)
{
assertTrue(i.compareTo(currentBun) <= 0);
@@ -1057,7 +1064,7 @@ public class LuceneTest2 extends TestCase
{
Integer currentBun = DefaultTypeConverter.INSTANCE.convert(Integer.class, nodeService.getProperty(row
.getNodeRef(), orderInt));
//System.out.println(currentBun);
// System.out.println(currentBun);
if ((i != null) && (currentBun != null))
{
assertTrue(i.compareTo(currentBun) >= 0);
@@ -1065,10 +1072,9 @@ public class LuceneTest2 extends TestCase
i = currentBun;
}
results.close();
luceneFTS.resume();
SearchParameters sp17 = new SearchParameters();
sp17.addStore(rootNodeRef.getStoreRef());
sp17.setLanguage(SearchService.LANGUAGE_LUCENE);
@@ -1076,7 +1082,7 @@ public class LuceneTest2 extends TestCase
sp17.addSort("cabbage", false);
results = searcher.query(sp17);
results.close();
luceneFTS.resume();
}
@@ -1141,7 +1147,7 @@ public class LuceneTest2 extends TestCase
+ System.currentTimeMillis() + "_1", indexerAndSearcher);
indexer.setNodeService(nodeService);
//indexer.setLuceneIndexLock(luceneIndexLock);
// indexer.setLuceneIndexLock(luceneIndexLock);
indexer.setDictionaryService(dictionaryService);
indexer.setLuceneFullTextSearchIndexer(luceneFTS);
indexer.setContentService(contentService);
@@ -1155,7 +1161,6 @@ public class LuceneTest2 extends TestCase
* Test basic index and search
*
* @throws InterruptedException
*
*/
public void testStandAloneIndexerCommit() throws Exception
@@ -1165,7 +1170,7 @@ public class LuceneTest2 extends TestCase
+ System.currentTimeMillis() + "_1", indexerAndSearcher);
indexer.setNodeService(nodeService);
//indexer.setLuceneIndexLock(luceneIndexLock);
// indexer.setLuceneIndexLock(luceneIndexLock);
indexer.setDictionaryService(dictionaryService);
indexer.setLuceneFullTextSearchIndexer(luceneFTS);
indexer.setContentService(contentService);
@@ -1356,7 +1361,7 @@ public class LuceneTest2 extends TestCase
LuceneIndexerImpl2 indexer = LuceneIndexerImpl2.getUpdateIndexer(rootNodeRef.getStoreRef(), "delta"
+ System.currentTimeMillis() + "_" + (new Random().nextInt()), indexerAndSearcher);
indexer.setNodeService(nodeService);
//indexer.setLuceneIndexLock(luceneIndexLock);
// indexer.setLuceneIndexLock(luceneIndexLock);
indexer.setDictionaryService(dictionaryService);
indexer.setLuceneFullTextSearchIndexer(luceneFTS);
indexer.setContentService(contentService);
@@ -1885,9 +1890,9 @@ public class LuceneTest2 extends TestCase
null);
assertEquals(1, results.length());
results.close();
results = searcher.query(rootNodeRef.getStoreRef(), "lucene", "TYPE:\"" + testType.toPrefixString(namespacePrefixResolver) + "\"", null,
null);
results = searcher.query(rootNodeRef.getStoreRef(), "lucene", "TYPE:\""
+ testType.toPrefixString(namespacePrefixResolver) + "\"", null, null);
assertEquals(1, results.length());
results.close();
@@ -1895,27 +1900,27 @@ public class LuceneTest2 extends TestCase
null, null);
assertEquals(13, results.length());
results.close();
results = searcher.query(rootNodeRef.getStoreRef(), "lucene", "TYPE:\"" + testSuperType.toPrefixString(namespacePrefixResolver) + "\"",
null, null);
results = searcher.query(rootNodeRef.getStoreRef(), "lucene", "TYPE:\""
+ testSuperType.toPrefixString(namespacePrefixResolver) + "\"", null, null);
assertEquals(13, results.length());
results.close();
results = searcher.query(rootNodeRef.getStoreRef(), "lucene", "ASPECT:\""
+ testAspect.toString() + "\"", null, null);
results = searcher.query(rootNodeRef.getStoreRef(), "lucene", "ASPECT:\"" + testAspect.toString() + "\"", null,
null);
assertEquals(1, results.length());
results.close();
results = searcher.query(rootNodeRef.getStoreRef(), "lucene", "ASPECT:\""
+ testAspect.toPrefixString(namespacePrefixResolver) + "\"", null, null);
assertEquals(1, results.length());
results.close();
results = searcher.query(rootNodeRef.getStoreRef(), "lucene", "ASPECT:\""
+ testAspect.toString() + "\"", null, null);
results = searcher.query(rootNodeRef.getStoreRef(), "lucene", "ASPECT:\"" + testAspect.toString() + "\"", null,
null);
assertEquals(1, results.length());
results.close();
results = searcher.query(rootNodeRef.getStoreRef(), "lucene", "ASPECT:\""
+ testAspect.toPrefixString(namespacePrefixResolver) + "\"", null, null);
assertEquals(1, results.length());
@@ -1943,6 +1948,193 @@ public class LuceneTest2 extends TestCase
assertEquals(1, results.length());
results.close();
// Direct ML tests
QName mlQName = QName.createQName(TEST_NAMESPACE, "ml");
SearchParameters sp = new SearchParameters();
sp.addStore(rootNodeRef.getStoreRef());
sp.setLanguage("lucene");
sp.setQuery("@" + LuceneQueryParser.escape(mlQName.toString()) + ":banana");
results = searcher.query(sp);
assertEquals(1, results.length());
results.close();
sp.addLocale(Locale.UK);
results = searcher.query(sp);
assertEquals(1, results.length());
results.close();
sp = new SearchParameters();
sp.addStore(rootNodeRef.getStoreRef());
sp.setLanguage("lucene");
sp.setQuery("@" + LuceneQueryParser.escape(mlQName.toString()) + ":banana");
sp.addLocale(Locale.ENGLISH);
results = searcher.query(sp);
assertEquals(1, results.length());
results.close();
sp = new SearchParameters();
sp.addStore(rootNodeRef.getStoreRef());
sp.setLanguage("lucene");
sp.setQuery("@" + LuceneQueryParser.escape(mlQName.toString()) + ":banane");
sp.addLocale(Locale.FRENCH);
results = searcher.query(sp);
assertEquals(1, results.length());
results.close();
sp = new SearchParameters();
sp.addStore(rootNodeRef.getStoreRef());
sp.setLanguage("lucene");
sp.setQuery("@" + LuceneQueryParser.escape(mlQName.toString()) + ":香蕉");
sp.addLocale(Locale.CHINESE);
results = searcher.query(sp);
assertEquals(1, results.length());
results.close();
sp = new SearchParameters();
sp.addStore(rootNodeRef.getStoreRef());
sp.setLanguage("lucene");
sp.setQuery("@" + LuceneQueryParser.escape(mlQName.toString()) + ":banaan");
sp.addLocale(new Locale("nl"));
results = searcher.query(sp);
assertEquals(1, results.length());
results.close();
sp = new SearchParameters();
sp.addStore(rootNodeRef.getStoreRef());
sp.setLanguage("lucene");
sp.setQuery("@" + LuceneQueryParser.escape(mlQName.toString()) + ":banane");
sp.addLocale(Locale.GERMAN);
results = searcher.query(sp);
assertEquals(1, results.length());
results.close();
sp = new SearchParameters();
sp.addStore(rootNodeRef.getStoreRef());
sp.setLanguage("lucene");
sp.setQuery("@" + LuceneQueryParser.escape(mlQName.toString()) + ":μπανάνα");
sp.addLocale(new Locale("el"));
results = searcher.query(sp);
assertEquals(1, results.length());
results.close();
sp = new SearchParameters();
sp.addStore(rootNodeRef.getStoreRef());
sp.setLanguage("lucene");
sp.setQuery("@" + LuceneQueryParser.escape(mlQName.toString()) + ":banana");
sp.addLocale(Locale.ITALIAN);
results = searcher.query(sp);
assertEquals(1, results.length());
results.close();
sp = new SearchParameters();
sp.addStore(rootNodeRef.getStoreRef());
sp.setLanguage("lucene");
sp.setQuery("@" + LuceneQueryParser.escape(mlQName.toString()) + ":バナナ");
sp.addLocale(new Locale("ja"));
results = searcher.query(sp);
assertEquals(1, results.length());
results.close();
sp = new SearchParameters();
sp.addStore(rootNodeRef.getStoreRef());
sp.setLanguage("lucene");
sp.setQuery("@" + LuceneQueryParser.escape(mlQName.toString()) + ":바나나");
sp.addLocale(new Locale("ko"));
results = searcher.query(sp);
assertEquals(1, results.length());
results.close();
sp = new SearchParameters();
sp.addStore(rootNodeRef.getStoreRef());
sp.setLanguage("lucene");
sp.setQuery("@" + LuceneQueryParser.escape(mlQName.toString()) + ":banana");
sp.addLocale(new Locale("pt"));
results = searcher.query(sp);
assertEquals(1, results.length());
results.close();
sp = new SearchParameters();
sp.addStore(rootNodeRef.getStoreRef());
sp.setLanguage("lucene");
sp.setQuery("@" + LuceneQueryParser.escape(mlQName.toString()) + ":банан");
sp.addLocale(new Locale("ru"));
results = searcher.query(sp);
assertEquals(1, results.length());
results.close();
sp = new SearchParameters();
sp.addStore(rootNodeRef.getStoreRef());
sp.setLanguage("lucene");
sp.setQuery("@" + LuceneQueryParser.escape(mlQName.toString()) + ":plátano");
sp.addLocale(new Locale("es"));
results = searcher.query(sp);
assertEquals(1, results.length());
results.close();
// Test non field queries
results = searcher.query(rootNodeRef.getStoreRef(), "lucene", "TEXT:fox", null, null);
assertEquals(1, results.length());
results.close();
results = searcher.query(rootNodeRef.getStoreRef(), "lucene", "TEXT:fo*", null, null);
assertEquals(1, results.length());
results.close();
results = searcher.query(rootNodeRef.getStoreRef(), "lucene", "TEXT:f*x", null, null);
assertEquals(1, results.length());
results.close();
results = searcher.query(rootNodeRef.getStoreRef(), "lucene", "TEXT:*ox", null, null);
assertEquals(1, results.length());
results.close();
results = searcher.query(rootNodeRef.getStoreRef(), "lucene", "@"
+ LuceneQueryParser.escape(ContentModel.PROP_CONTENT.toString()) +":fox", null, null);
assertEquals(1, results.length());
results.close();
results = searcher.query(rootNodeRef.getStoreRef(), "lucene", "@"
+ LuceneQueryParser.escape(ContentModel.PROP_CONTENT.toString()) +":fo*", null, null);
assertEquals(1, results.length());
results.close();
results = searcher.query(rootNodeRef.getStoreRef(), "lucene", "@"
+ LuceneQueryParser.escape(ContentModel.PROP_CONTENT.toString()) +":f*x", null, null);
assertEquals(1, results.length());
results.close();
results = searcher.query(rootNodeRef.getStoreRef(), "lucene", "@"
+ LuceneQueryParser.escape(ContentModel.PROP_CONTENT.toString()) +":*ox", null, null);
assertEquals(1, results.length());
results.close();
results = searcher.query(rootNodeRef.getStoreRef(), "lucene", "@"
+ LuceneQueryParser.escape(ContentModel.PROP_CONTENT.toPrefixString(namespacePrefixResolver)) +":fox", null, null);
assertEquals(1, results.length());
results.close();
results = searcher.query(rootNodeRef.getStoreRef(), "lucene", "@"
+ LuceneQueryParser.escape(ContentModel.PROP_CONTENT.toPrefixString(namespacePrefixResolver)) +":fo*", null, null);
assertEquals(1, results.length());
results.close();
results = searcher.query(rootNodeRef.getStoreRef(), "lucene", "@"
+ LuceneQueryParser.escape(ContentModel.PROP_CONTENT.toPrefixString(namespacePrefixResolver)) +":f*x", null, null);
assertEquals(1, results.length());
results.close();
results = searcher.query(rootNodeRef.getStoreRef(), "lucene", "@"
+ LuceneQueryParser.escape(ContentModel.PROP_CONTENT.toPrefixString(namespacePrefixResolver)) +":*ox", null, null);
assertEquals(1, results.length());
results.close();
// Parameters
queryQName = QName.createQName("alf:test2", namespacePrefixResolver);
@@ -2088,7 +2280,7 @@ public class LuceneTest2 extends TestCase
LuceneIndexerImpl2 indexer = LuceneIndexerImpl2.getUpdateIndexer(rootNodeRef.getStoreRef(), "delta"
+ System.currentTimeMillis(), indexerAndSearcher);
indexer.setNodeService(nodeService);
//indexer.setLuceneIndexLock(luceneIndexLock);
// indexer.setLuceneIndexLock(luceneIndexLock);
indexer.setDictionaryService(dictionaryService);
indexer.setLuceneFullTextSearchIndexer(luceneFTS);
indexer.setContentService(contentService);
@@ -2124,7 +2316,7 @@ public class LuceneTest2 extends TestCase
LuceneIndexerImpl2 indexer = LuceneIndexerImpl2.getUpdateIndexer(rootNodeRef.getStoreRef(), "delta"
+ System.currentTimeMillis(), indexerAndSearcher);
indexer.setNodeService(nodeService);
//indexer.setLuceneIndexLock(luceneIndexLock);
// indexer.setLuceneIndexLock(luceneIndexLock);
indexer.setDictionaryService(dictionaryService);
indexer.setLuceneFullTextSearchIndexer(luceneFTS);
indexer.setContentService(contentService);
@@ -2361,7 +2553,7 @@ public class LuceneTest2 extends TestCase
LuceneIndexerImpl2 indexer = LuceneIndexerImpl2.getUpdateIndexer(rootNodeRef.getStoreRef(), "delta"
+ System.currentTimeMillis(), indexerAndSearcher);
indexer.setNodeService(nodeService);
//indexer.setLuceneIndexLock(luceneIndexLock);
// indexer.setLuceneIndexLock(luceneIndexLock);
indexer.setDictionaryService(dictionaryService);
indexer.setLuceneFullTextSearchIndexer(luceneFTS);
indexer.setContentService(contentService);
@@ -2381,7 +2573,7 @@ public class LuceneTest2 extends TestCase
assertEquals(1, results.length());
results.close();
}
public void testNumericInPath() throws Exception
{
String COMPLEX_LOCAL_NAME = "Woof12";
@@ -2393,7 +2585,7 @@ public class LuceneTest2 extends TestCase
LuceneIndexerImpl2 indexer = LuceneIndexerImpl2.getUpdateIndexer(rootNodeRef.getStoreRef(), "delta"
+ System.currentTimeMillis(), indexerAndSearcher);
indexer.setNodeService(nodeService);
//indexer.setLuceneIndexLock(luceneIndexLock);
// indexer.setLuceneIndexLock(luceneIndexLock);
indexer.setDictionaryService(dictionaryService);
indexer.setLuceneFullTextSearchIndexer(luceneFTS);
indexer.setContentService(contentService);
@@ -2423,7 +2615,7 @@ public class LuceneTest2 extends TestCase
LuceneIndexerImpl2 indexer = LuceneIndexerImpl2.getUpdateIndexer(rootNodeRef.getStoreRef(), "delta"
+ System.currentTimeMillis(), indexerAndSearcher);
indexer.setNodeService(nodeService);
//indexer.setLuceneIndexLock(luceneIndexLock);
// indexer.setLuceneIndexLock(luceneIndexLock);
indexer.setDictionaryService(dictionaryService);
indexer.setLuceneFullTextSearchIndexer(luceneFTS);
indexer.setContentService(contentService);
@@ -2661,7 +2853,7 @@ public class LuceneTest2 extends TestCase
LuceneIndexerImpl2 indexer = LuceneIndexerImpl2.getUpdateIndexer(rootNodeRef.getStoreRef(), "delta"
+ System.currentTimeMillis(), indexerAndSearcher);
indexer.setNodeService(nodeService);
//indexer.setLuceneIndexLock(luceneIndexLock);
// indexer.setLuceneIndexLock(luceneIndexLock);
indexer.setDictionaryService(dictionaryService);
indexer.setLuceneFullTextSearchIndexer(luceneFTS);
indexer.setContentService(contentService);
@@ -2904,7 +3096,7 @@ public class LuceneTest2 extends TestCase
indexer = LuceneIndexerImpl2.getUpdateIndexer(rootNodeRef.getStoreRef(), "delta" + System.currentTimeMillis(),
indexerAndSearcher);
indexer.setNodeService(nodeService);
//indexer.setLuceneIndexLock(luceneIndexLock);
// indexer.setLuceneIndexLock(luceneIndexLock);
indexer.setDictionaryService(dictionaryService);
indexer.setLuceneFullTextSearchIndexer(luceneFTS);
indexer.setContentService(contentService);
@@ -2948,7 +3140,7 @@ public class LuceneTest2 extends TestCase
LuceneIndexerImpl2 indexer = LuceneIndexerImpl2.getUpdateIndexer(rootNodeRef.getStoreRef(), "delta"
+ System.currentTimeMillis(), indexerAndSearcher);
indexer.setNodeService(nodeService);
//indexer.setLuceneIndexLock(luceneIndexLock);
// indexer.setLuceneIndexLock(luceneIndexLock);
indexer.setDictionaryService(dictionaryService);
indexer.setLuceneFullTextSearchIndexer(luceneFTS);
indexer.setContentService(contentService);
@@ -3017,7 +3209,7 @@ public class LuceneTest2 extends TestCase
LuceneIndexerImpl2 indexer = LuceneIndexerImpl2.getUpdateIndexer(rootNodeRef.getStoreRef(), "delta"
+ System.currentTimeMillis() + "_" + (new Random().nextInt()), indexerAndSearcher);
indexer.setNodeService(nodeService);
//indexer.setLuceneIndexLock(luceneIndexLock);
// indexer.setLuceneIndexLock(luceneIndexLock);
indexer.setDictionaryService(dictionaryService);
indexer.setLuceneFullTextSearchIndexer(luceneFTS);
indexer.setContentService(contentService);
@@ -3096,7 +3288,7 @@ public class LuceneTest2 extends TestCase
LuceneIndexerImpl2 indexer = LuceneIndexerImpl2.getUpdateIndexer(rootNodeRef.getStoreRef(), "delta"
+ System.currentTimeMillis() + "_" + (new Random().nextInt()), indexerAndSearcher);
indexer.setNodeService(nodeService);
//indexer.setLuceneIndexLock(luceneIndexLock);
// indexer.setLuceneIndexLock(luceneIndexLock);
indexer.setDictionaryService(dictionaryService);
indexer.setLuceneFullTextSearchIndexer(luceneFTS);
indexer.setContentService(contentService);
@@ -3429,7 +3621,7 @@ public class LuceneTest2 extends TestCase
LuceneIndexerImpl2 indexer = LuceneIndexerImpl2.getUpdateIndexer(rootNodeRef.getStoreRef(), "delta"
+ System.currentTimeMillis() + "_" + (new Random().nextInt()), indexerAndSearcher);
indexer.setNodeService(nodeService);
//indexer.setLuceneIndexLock(luceneIndexLock);
// indexer.setLuceneIndexLock(luceneIndexLock);
indexer.setDictionaryService(dictionaryService);
indexer.setLuceneFullTextSearchIndexer(luceneFTS);
indexer.setContentService(contentService);

View File

@@ -228,6 +228,16 @@
<stored>true</stored>
<tokenised>false</tokenised>
</index>
</property>
<property name="test:ml">
<type>d:mltext</type>
<mandatory>true</mandatory>
<multiple>false</multiple>
<index enabled="true">
<atomic>true</atomic>
<stored>true</stored>
<tokenised>true</tokenised>
</index>
</property>
</properties>
<mandatory-aspects>

View File

@@ -0,0 +1,37 @@
package org.alfresco.repo.search.impl.lucene.analysis;
import org.alfresco.error.AlfrescoRuntimeException;
public class AnalysisException extends AlfrescoRuntimeException
{
/**
*
*/
private static final long serialVersionUID = -7722380192490118459L;
public AnalysisException(String msgId)
{
super(msgId);
// TODO Auto-generated constructor stub
}
public AnalysisException(String msgId, Object[] msgParams)
{
super(msgId, msgParams);
// TODO Auto-generated constructor stub
}
public AnalysisException(String msgId, Throwable cause)
{
super(msgId, cause);
// TODO Auto-generated constructor stub
}
public AnalysisException(String msgId, Object[] msgParams, Throwable cause)
{
super(msgId, msgParams, cause);
// TODO Auto-generated constructor stub
}
}

View File

@@ -0,0 +1,12 @@
package org.alfresco.repo.search.impl.lucene.analysis;
import org.apache.lucene.analysis.snowball.SnowballAnalyzer;
public class DanishSnowballAnalyser extends SnowballAnalyzer
{
public DanishSnowballAnalyser()
{
super("Danish");
}
}

View File

@@ -0,0 +1,12 @@
package org.alfresco.repo.search.impl.lucene.analysis;
import org.apache.lucene.analysis.snowball.SnowballAnalyzer;
public class DutchSnowballAnalyser extends SnowballAnalyzer
{
public DutchSnowballAnalyser()
{
super("Danish");
}
}

View File

@@ -0,0 +1,12 @@
package org.alfresco.repo.search.impl.lucene.analysis;
import org.apache.lucene.analysis.snowball.SnowballAnalyzer;
public class EnglishSnowballAnalyser extends SnowballAnalyzer
{
public EnglishSnowballAnalyser()
{
super("Danish");
}
}

View File

@@ -0,0 +1,12 @@
package org.alfresco.repo.search.impl.lucene.analysis;
import org.apache.lucene.analysis.snowball.SnowballAnalyzer;
public class FrenchSnowballAnalyser extends SnowballAnalyzer
{
public FrenchSnowballAnalyser()
{
super("Danish");
}
}

View File

@@ -0,0 +1,12 @@
package org.alfresco.repo.search.impl.lucene.analysis;
import org.apache.lucene.analysis.snowball.SnowballAnalyzer;
public class German2SnowballAnalyser extends SnowballAnalyzer
{
public German2SnowballAnalyser()
{
super("Danish");
}
}

View File

@@ -0,0 +1,12 @@
package org.alfresco.repo.search.impl.lucene.analysis;
import org.apache.lucene.analysis.snowball.SnowballAnalyzer;
public class GermanSnowballAnalyser extends SnowballAnalyzer
{
public GermanSnowballAnalyser()
{
super("Danish");
}
}

View File

@@ -0,0 +1,12 @@
package org.alfresco.repo.search.impl.lucene.analysis;
import org.apache.lucene.analysis.snowball.SnowballAnalyzer;
public class ItalianSnowballAnalyser extends SnowballAnalyzer
{
public ItalianSnowballAnalyser()
{
super("Danish");
}
}

View File

@@ -0,0 +1,12 @@
package org.alfresco.repo.search.impl.lucene.analysis;
import org.apache.lucene.analysis.snowball.SnowballAnalyzer;
public class KPSnowballAnalyser extends SnowballAnalyzer
{
public KPSnowballAnalyser()
{
super("Danish");
}
}

View File

@@ -0,0 +1,12 @@
package org.alfresco.repo.search.impl.lucene.analysis;
import org.apache.lucene.analysis.snowball.SnowballAnalyzer;
public class LovinsSnowballAnalyser extends SnowballAnalyzer
{
public LovinsSnowballAnalyser()
{
super("Danish");
}
}

View File

@@ -0,0 +1,191 @@
package org.alfresco.repo.search.impl.lucene.analysis;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.Reader;
import java.util.HashMap;
import java.util.Locale;
import org.alfresco.i18n.I18NUtil;
import org.alfresco.repo.search.impl.lucene.LuceneQueryParser;
import org.alfresco.service.cmr.dictionary.DataTypeDefinition;
import org.alfresco.service.cmr.dictionary.DictionaryService;
import org.apache.log4j.Logger;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
public class MLAnalayser extends Analyzer
{
private static Logger s_logger = Logger.getLogger(MLAnalayser.class);
private DictionaryService dictionaryService;
private HashMap<Locale, Analyzer> analysers = new HashMap<Locale, Analyzer>();
public MLAnalayser(DictionaryService dictionaryService)
{
this.dictionaryService = dictionaryService;
}
@Override
public TokenStream tokenStream(String fieldName, Reader reader)
{
// We use read ahead to get the language info - if this does not exist we need to restart
// an use the default - there foer we need mark and restore.
if (!(reader instanceof BufferedReader))
{
BufferedReader breader = new BufferedReader(reader);
try
{
if (!breader.markSupported())
{
throw new AnalysisException(
"Multilingual tokenisation requires a reader that supports marks and reset");
}
breader.mark(100);
StringBuilder builder = new StringBuilder();
if (breader.read() == '\u0000')
{
String language = "";
String country = "";
String varient = "";
char c;
int count = 0;
while ((c = (char) breader.read()) != '\u0000')
{
if (count++ > 99)
{
breader.reset();
return getDefaultAnalyser().tokenStream(fieldName, breader);
}
if (c == '_')
{
if (language.length() == 0)
{
language = builder.toString();
}
else if (country.length() == 0)
{
country = builder.toString();
}
else if (varient.length() == 0)
{
varient = builder.toString();
}
else
{
breader.reset();
return getDefaultAnalyser().tokenStream(fieldName, breader);
}
builder = new StringBuilder();
}
else
{
builder.append(c);
}
}
if (builder.length() > 0)
{
if (language.length() == 0)
{
language = builder.toString();
}
else if (country.length() == 0)
{
country = builder.toString();
}
else if (varient.length() == 0)
{
varient = builder.toString();
}
else
{
breader.reset();
return getDefaultAnalyser().tokenStream(fieldName, breader);
}
}
Locale locale = new Locale(language, country, varient);
// leave the reader where it is ....
return new MLTokenDuplicator(getAnalyser(locale).tokenStream(fieldName, breader), locale, breader);
}
else
{
breader.reset();
return getDefaultAnalyser().tokenStream(fieldName, breader);
}
}
catch (IOException io)
{
try
{
breader.reset();
}
catch (IOException e)
{
throw new AnalysisException("Failed to reset buffered reader - token stream will be invalid", e);
}
return getDefaultAnalyser().tokenStream(fieldName, breader);
}
}
else
{
throw new AnalysisException("Multilingual tokenisation requires a buffered reader");
}
}
private Analyzer getDefaultAnalyser()
{
return getAnalyser(I18NUtil.getLocale());
}
private Analyzer getAnalyser(Locale locale)
{
Analyzer analyser = (Analyzer) analysers.get(locale);
if (analyser == null)
{
analyser = findAnalyser(locale);
}
// wrap analyser to produce plain and prefixed tokens
return analyser;
}
private Analyzer findAnalyser(Locale locale)
{
Analyzer analyser = loadAnalyzer(locale);
analysers.put(locale, analyser);
return analyser;
}
private Analyzer loadAnalyzer(Locale locale)
{
DataTypeDefinition dataType = dictionaryService.getDataType(DataTypeDefinition.TEXT);
String analyserClassName = dataType.getAnalyserClassName(locale);
if (s_logger.isDebugEnabled())
{
s_logger.debug("Loading " + analyserClassName + " for " + locale);
}
try
{
Class<?> clazz = Class.forName(analyserClassName);
Analyzer analyser = (Analyzer) clazz.newInstance();
return analyser;
}
catch (ClassNotFoundException e)
{
throw new RuntimeException("Unable to load analyser for property of type "
+ dataType.getName() + " using " + analyserClassName);
}
catch (InstantiationException e)
{
throw new RuntimeException("Unable to load analyser for property of type "
+ dataType.getName() + " using " + analyserClassName);
}
catch (IllegalAccessException e)
{
throw new RuntimeException("Unable to load analyser for property of type "
+ dataType.getName() + " using " + analyserClassName);
}
}
}

View File

@@ -0,0 +1,122 @@
package org.alfresco.repo.search.impl.lucene.analysis;
import java.io.IOException;
import java.io.Reader;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.Locale;
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
/**
* Create duplicate tokens for multilingual varients
*
* The forms are
*
* Tokens:
* Token - all languages
* {fr}Token - if a language is specified
* {fr_CA}Token - if a language and country is specified
* {fr_CA_Varient}Token - for all three
* {fr__Varient}Token - for a language varient with no country
*
* @author andyh
*
*/
public class MLTokenDuplicator extends Tokenizer
{
TokenStream source;
Locale locale;
Iterator<Token> it;
ArrayList<String> prefixes;
public MLTokenDuplicator(TokenStream source, Locale locale, Reader reader)
{
super(reader);
this.source = source;
this.locale = locale;
boolean l = locale.getLanguage().length() != 0;
boolean c = locale.getCountry().length() != 0;
boolean v = locale.getVariant().length() != 0;
prefixes = new ArrayList<String>(4);
prefixes.add("");
if (l)
{
StringBuffer result = new StringBuffer();
result.append("{").append(locale.getLanguage()).append("}");
prefixes.add(result.toString());
result.deleteCharAt(result.length()-1);
if (c || (l && v))
{
result.append('_').append(locale.getCountry()).append("}");
prefixes.add(result.toString());
result.deleteCharAt(result.length()-1);
}
if (v && (l || c))
{
result.append('_').append(locale.getVariant()).append("}");
prefixes.add(result.toString());
}
}
}
@Override
public Token next() throws IOException
{
if (it == null)
{
it = buildIterator();
}
if (it == null)
{
return null;
}
if(it.hasNext())
{
return it.next();
}
else
{
it = null;
return this.next();
}
}
private Iterator<Token> buildIterator() throws IOException
{
Token token = source.next();
if (token == null)
{
return null;
}
ArrayList<Token> tokens = new ArrayList<Token>(prefixes.size());
for(String prefix : prefixes)
{
Token newToken = new Token(prefix+token.termText(), token.startOffset(), token.endOffset(), token.type());
if(tokens.size() == 0)
{
newToken.setPositionIncrement(token.getPositionIncrement());
}
else
{
newToken.setPositionIncrement(0);
}
tokens.add(newToken);
}
return tokens.iterator();
}
}

View File

@@ -0,0 +1,12 @@
package org.alfresco.repo.search.impl.lucene.analysis;
import org.apache.lucene.analysis.snowball.SnowballAnalyzer;
public class NorwegianSnowballAnalyser extends SnowballAnalyzer
{
public NorwegianSnowballAnalyser()
{
super("Danish");
}
}

View File

@@ -0,0 +1,12 @@
package org.alfresco.repo.search.impl.lucene.analysis;
import org.apache.lucene.analysis.snowball.SnowballAnalyzer;
public class PorterSnowballAnalyser extends SnowballAnalyzer
{
public PorterSnowballAnalyser()
{
super("Danish");
}
}

View File

@@ -0,0 +1,12 @@
package org.alfresco.repo.search.impl.lucene.analysis;
import org.apache.lucene.analysis.snowball.SnowballAnalyzer;
public class PortugueseSnowballAnalyser extends SnowballAnalyzer
{
public PortugueseSnowballAnalyser()
{
super("Danish");
}
}

View File

@@ -0,0 +1,12 @@
package org.alfresco.repo.search.impl.lucene.analysis;
import org.apache.lucene.analysis.snowball.SnowballAnalyzer;
public class RussianSnowballAnalyser extends SnowballAnalyzer
{
public RussianSnowballAnalyser()
{
super("Danish");
}
}

View File

@@ -0,0 +1,12 @@
package org.alfresco.repo.search.impl.lucene.analysis;
import org.apache.lucene.analysis.snowball.SnowballAnalyzer;
public class SpanishSnowballAnalyser extends SnowballAnalyzer
{
public SpanishSnowballAnalyser()
{
super("Danish");
}
}

View File

@@ -0,0 +1,12 @@
package org.alfresco.repo.search.impl.lucene.analysis;
import org.apache.lucene.analysis.snowball.SnowballAnalyzer;
public class SwedishSnowballAnalyser extends SnowballAnalyzer
{
public SwedishSnowballAnalyser()
{
super("Danish");
}
}

View File

@@ -1,71 +0,0 @@
/*
* Copyright (C) 2005 Alfresco, Inc.
*
* Licensed under the Mozilla Public License version 1.1
* with a permitted attribution clause. You may obtain a
* copy of the License at
*
* http://www.alfresco.org/legal/license.txt
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
* either express or implied. See the License for the specific
* language governing permissions and limitations under the
* License.
*/
package org.alfresco.repo.search.transaction;
import java.util.HashMap;
import java.util.concurrent.locks.ReentrantLock;
import org.alfresco.service.cmr.repository.StoreRef;
public class LuceneIndexLock
{
private HashMap<StoreRef, ReentrantLock> locks = new HashMap<StoreRef, ReentrantLock> ();
public LuceneIndexLock()
{
super();
}
public void getReadLock(StoreRef ref)
{
return;
}
public void releaseReadLock(StoreRef ref)
{
return;
}
public void getWriteLock(StoreRef ref)
{
ReentrantLock lock;
synchronized(locks)
{
lock = locks.get(ref);
if(lock == null)
{
lock = new ReentrantLock(true);
locks.put(ref, lock);
}
}
lock.lock();
}
public void releaseWriteLock(StoreRef ref)
{
ReentrantLock lock;
synchronized(locks)
{
lock = locks.get(ref);
}
if(lock != null)
{
lock.unlock();
}
}
}

View File

@@ -170,6 +170,40 @@ public interface DictionaryService
@NotAuditable
PropertyDefinition getProperty(QName propertyName);
/**
* Get all properties defined across all models with the given data type.
*
* Note that DataTypeDefinition.ANY will only match this type and can not be used as get all properties.
*
* If dataType is null then this method will return *ALL* properties regardless of data type.
*
* @param dataType
* @return
*/
@NotAuditable
Collection<QName> getAllProperties(QName dataType);
/**
* Get all properties defined for the given model with the given data type.
*
* Note that DataTypeDefinition.ANY will only match this type and can not be used as get all properties.
*
* If dataType is null then this method will return *ALL* properties regardless of data type.
*
* @param dataType
* @return
*/
@NotAuditable
Collection<QName> getProperties(QName model, QName dataType);
/**
* Get all poroperties for the specified model
*
* @param model
* @return
*/
Collection<QName> getProperties(QName model);
/**
* Gets the definition of the association as defined by its owning Class.
*

View File

@@ -17,18 +17,18 @@
package org.alfresco.service.cmr.search;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Locale;
import org.alfresco.repo.search.MLAnalysisMode;
import org.alfresco.service.cmr.repository.Path;
import org.alfresco.service.cmr.repository.StoreRef;
/**
* This class provides parameters to define a search.
*
* TODO
* - paging of results page number and page size
* - paging isolation - REPEATABLE READ, READ COMMITTED, may SEE ONCE tracking node refs in previous result sets
* - how long repeatable read may be held
* - limit by the number of permission evaluations
* This class provides parameters to define a search. TODO - paging of results page number and page size - paging
* isolation - REPEATABLE READ, READ COMMITTED, may SEE ONCE tracking node refs in previous result sets - how long
* repeatable read may be held - limit by the number of permission evaluations
*
* @author Andy Hind
*/
@@ -38,71 +38,97 @@ public class SearchParameters extends SearchStatement
* The default limit if someone asks for a limited result set but does not say how to limit....
*/
private static int DEFAULT_LIMIT = 500;
/*
* Standard sort definitions for sorting in document and score order.
*/
public static final SortDefinition SORT_IN_DOCUMENT_ORDER_ASCENDING = new SortDefinition(SortDefinition.SortType.DOCUMENT, null, true);
public static final SortDefinition SORT_IN_DOCUMENT_ORDER_DESCENDING = new SortDefinition(SortDefinition.SortType.DOCUMENT, null, false);
public static final SortDefinition SORT_IN_SCORE_ORDER_ASCENDING = new SortDefinition(SortDefinition.SortType.SCORE, null, false);
public static final SortDefinition SORT_IN_SCORE_ORDER_DESCENDING = new SortDefinition(SortDefinition.SortType.SCORE, null, true);
public static final SortDefinition SORT_IN_DOCUMENT_ORDER_ASCENDING = new SortDefinition(
SortDefinition.SortType.DOCUMENT, null, true);
public static final SortDefinition SORT_IN_DOCUMENT_ORDER_DESCENDING = new SortDefinition(
SortDefinition.SortType.DOCUMENT, null, false);
public static final SortDefinition SORT_IN_SCORE_ORDER_ASCENDING = new SortDefinition(
SortDefinition.SortType.SCORE, null, false);
public static final SortDefinition SORT_IN_SCORE_ORDER_DESCENDING = new SortDefinition(
SortDefinition.SortType.SCORE, null, true);
/**
* An emum defining if the default action is to "and" or "or" unspecified components in the query register.
* Not all search implementations will support this.
* An emum defining if the default action is to "and" or "or" unspecified components in the query register. Not all
* search implementations will support this.
*/
public enum Operator
{
OR, AND
}
/*
* Expose as constants
* Expose as constants
*/
public static final Operator OR = Operator.OR;
public static final Operator AND = Operator.AND;
/*
* The parameters that can be set
*/
private ArrayList<StoreRef> stores = new ArrayList<StoreRef>(1);
private ArrayList<Path> attributePaths = new ArrayList<Path>(1);
private ArrayList<QueryParameterDefinition> queryParameterDefinitions = new ArrayList<QueryParameterDefinition>(1);
private boolean excludeDataInTheCurrentTransaction = false;
private ArrayList<SortDefinition> sortDefinitions = new ArrayList<SortDefinition>(1);
private Operator defaultOperator = Operator.OR;
private ArrayList<Locale> locales = new ArrayList<Locale>();
private MLAnalysisMode mlAnalaysisMode = null; // Pick up from config if null
private LimitBy limitBy = LimitBy.UNLIMITED;
private PermissionEvaluationMode permissionEvaluation = PermissionEvaluationMode.EAGER;
private int limit = DEFAULT_LIMIT;
/**
* Default constructor
*/
public SearchParameters()
{
super();
}
/**
* Set the stores to be supported - currently there can be only one.
* Searching across multiple stores is on the todo list.
* Set the stores to be supported - currently there can be only one. Searching across multiple stores is on the todo
* list.
*
* @param store
*/
public void addStore(StoreRef store)
{
if(stores.size() != 0)
if (stores.size() != 0)
{
throw new IllegalStateException("At the moment, there can only be one store set for the search");
}
stores.add(store);
}
/**
* Add paths for attributes in the result set.
*
* Generally this only makes sense for disconnected results sets.
* These atttributes/paths state what must be present in the result set, akin
* to the selection of columns is sql.
* Add paths for attributes in the result set. Generally this only makes sense for disconnected results sets. These
* atttributes/paths state what must be present in the result set, akin to the selection of columns is sql.
*
* @param attributePath
*/
public void addAttrbutePath(Path attributePath)
public void addAttrbutePath(Path attributePath)
{
attributePaths.add(attributePath);
}
/**
* Add parameter definitions for the query - used to parameterise the query string
*
@@ -112,17 +138,13 @@ public class SearchParameters extends SearchStatement
{
queryParameterDefinitions.add(queryParameterDefinition);
}
/**
* If true, any data in the current transaction will be ignored in the search.
* You will not see anything you have added in the current transaction.
*
* By default you will see data in the current transaction.
* This effectively gives read committed isolation.
*
* There is a performance overhead for this, at least when using lucene.
* This flag may be set to avoid that performance hit if you know you do not want to find results
* that are yet to be committed (this includes creations, deletions and updates)
* If true, any data in the current transaction will be ignored in the search. You will not see anything you have
* added in the current transaction. By default you will see data in the current transaction. This effectively gives
* read committed isolation. There is a performance overhead for this, at least when using lucene. This flag may be
* set to avoid that performance hit if you know you do not want to find results that are yet to be committed (this
* includes creations, deletions and updates)
*
* @param excludeDataInTheCurrentTransaction
*/
@@ -130,73 +152,32 @@ public class SearchParameters extends SearchStatement
{
this.excludeDataInTheCurrentTransaction = excludeDataInTheCurrentTransaction;
}
/**
* Add a sort to the query (for those query languages that do not support it directly)
* Add a sort to the query (for those query languages that do not support it directly) The first sort added is
* treated as primary, the second as secondary etc. A helper method to create SortDefinitions.
*
* The first sort added is treated as primary, the second as secondary etc.
*
* A helper method to create SortDefinitions.
*
* @param field - this is intially a direct attribute on a node not an attribute on the parent etc
* TODO: It could be a relative path at some time.
*
* @param ascending - true to sort ascending, false for descending.
* @param field -
* this is intially a direct attribute on a node not an attribute on the parent etc TODO: It could be a
* relative path at some time.
* @param ascending -
* true to sort ascending, false for descending.
*/
public void addSort(String field, boolean ascending)
{
addSort(new SortDefinition(SortDefinition.SortType.FIELD, field, ascending));
addSort(new SortDefinition(SortDefinition.SortType.FIELD, field, ascending));
}
/**
* Add a sort definition.
*
* @param sortDefinition - the sort definition to add. Use the static member variables
* for sorting in score and index order.
* @param sortDefinition -
* the sort definition to add. Use the static member variables for sorting in score and index order.
*/
public void addSort(SortDefinition sortDefinition)
{
sortDefinitions.add(sortDefinition);
}
/**
* A helper class for sort definition.
* Encapsulated using the lucene sortType, field name and a flag for ascending/descending.
*
* @author Andy Hind
*/
public static class SortDefinition
{
public enum SortType {FIELD, DOCUMENT, SCORE};
SortType sortType;
String field;
boolean ascending;
SortDefinition(SortType sortType, String field, boolean ascending)
{
this.sortType = sortType;
this.field = field;
this.ascending = ascending;
}
public boolean isAscending()
{
return ascending;
}
public String getField()
{
return field;
}
public SortType getSortType()
{
return sortType;
}
}
/**
* Get the list of attribute paths that are guarenteed to be in the result set.
@@ -208,7 +189,7 @@ public class SearchParameters extends SearchStatement
return attributePaths;
}
/**
/**
* Is data in the current transaction excluded from the search.
*
* @return
@@ -218,7 +199,7 @@ public class SearchParameters extends SearchStatement
return excludeDataInTheCurrentTransaction;
}
/**
/**
* Get the query parameters that apply to this query.
*
* @return
@@ -247,7 +228,7 @@ public class SearchParameters extends SearchStatement
{
return stores;
}
/**
* Set the default operator for query elements when they are not explicit in the query.
*
@@ -257,7 +238,7 @@ public class SearchParameters extends SearchStatement
{
this.defaultOperator = defaultOperator;
}
/**
* Get the default operator for query elements when they are not explicit in the query.
*
@@ -267,14 +248,8 @@ public class SearchParameters extends SearchStatement
{
return defaultOperator;
}
private LimitBy limitBy = LimitBy.UNLIMITED;
private PermissionEvaluationMode permissionEvaluation = PermissionEvaluationMode.EAGER;
private int limit = DEFAULT_LIMIT;
/**
/**
* Get how the result set should be limited
*
* @return
@@ -314,15 +289,110 @@ public class SearchParameters extends SearchStatement
this.permissionEvaluation = permissionEvaluation;
}
/**
* If limiting the result set in some way, get the limiting value used.
*
* @return
*/
public int getLimit()
{
return limit;
}
/**
* If limiting the result set in some way, set the limiting value used.
*
* @param limit
*/
public void setLimit(int limit)
{
this.limit = limit;
}
/**
* The way in which multilingual fields are treated durig a search.
* By default, only the specified locale is used and it must be an exact match.
*
* @return
*/
public MLAnalysisMode getMlAnalaysisMode()
{
return mlAnalaysisMode;
}
/**
* Set the way in which multilingual fields are treated durig a search.
* This controls in which locales an multilingual fields will match.
*
* @param mlAnalaysisMode
*/
public void setMlAnalaysisMode(MLAnalysisMode mlAnalaysisMode)
{
this.mlAnalaysisMode = mlAnalaysisMode;
}
/**
* Add a locale to include for multi-lingual text searches.
* If non are set, the default is to use the user's locale.
*
* @param locale
*/
public void addLocale(Locale locale)
{
locales.add(locale);
}
/**
* Get the locales used for multi-lingual text searches.
*
* @return
*/
public List<Locale> getLocales()
{
return Collections.unmodifiableList(locales);
}
/**
* A helper class for sort definition. Encapsulated using the lucene sortType, field name and a flag for
* ascending/descending.
*
* @author Andy Hind
*/
public static class SortDefinition
{
public enum SortType
{
FIELD, DOCUMENT, SCORE
};
SortType sortType;
String field;
boolean ascending;
SortDefinition(SortType sortType, String field, boolean ascending)
{
this.sortType = sortType;
this.field = field;
this.ascending = ascending;
}
public boolean isAscending()
{
return ascending;
}
public String getField()
{
return field;
}
public SortType getSortType()
{
return sortType;
}
}
}