Next stage of multi lingual searches

git-svn-id: https://svn.alfresco.com/repos/alfresco-enterprise/alfresco/HEAD/root@4609 c4b6b30b-aa2e-2d43-bbcb-ca4b014f7261
This commit is contained in:
Andrew Hind
2006-12-14 16:09:43 +00:00
parent fee38b6eb3
commit 9a5df4b303
27 changed files with 1365 additions and 554 deletions

View File

@@ -13,30 +13,144 @@ public enum MLAnalysisMode
/**
* Only exact locale is used.
*/
LOCALE_ONLY,
LOCALE_ONLY
{
public boolean includesAll()
{
return false;
}
public boolean includesContained()
{
return false;
}
public boolean includesContaining()
{
return false;
}
public boolean includesExact()
{
return true;
}
},
/**
* Only the exact locale and no local === all lnaguages
*/
LOCALE_AND_ALL,
LOCALE_AND_ALL
{
public boolean includesAll()
{
return true;
}
public boolean includesContained()
{
return false;
}
public boolean includesContaining()
{
return false;
}
public boolean includesExact()
{
return true;
}
},
/**
* Expand the locale to include all the locales that contain it.
* en_GB would be en_GB, en, but not all languages
*/
LOCALE_AND_ALL_CONTAINING_LOCALES,
LOCALE_AND_ALL_CONTAINING_LOCALES
{
public boolean includesAll()
{
return false;
}
public boolean includesContained()
{
return false;
}
public boolean includesContaining()
{
return true;
}
public boolean includesExact()
{
return true;
}
},
/**
* Expand the locale to include all the locales that contain it.
* en_GB would be en_GB, en, and all.
*/
LOCALE_AND_ALL_CONTAINING_LOCALES_AND_ALL,
LOCALE_AND_ALL_CONTAINING_LOCALES_AND_ALL
{
public boolean includesAll()
{
return true;
}
public boolean includesContained()
{
return false;
}
public boolean includesContaining()
{
return true;
}
public boolean includesExact()
{
return true;
}
},
/**
* Expand to all the locales that are contained by this.
* en would expand to en, en_GB, en_US, ....
*/
LOCAL_AND_ALL_CONTAINED_LOCALES;
LOCALE_AND_ALL_CONTAINED_LOCALES
{
public boolean includesAll()
{
return false;
}
public boolean includesContained()
{
return true;
}
public boolean includesContaining()
{
return false;
}
public boolean includesExact()
{
return true;
}
},
/**
* No prefix only
*/
ALL_ONLY
{
public boolean includesAll()
{
return true;
}
public boolean includesContained()
{
return false;
}
public boolean includesContaining()
{
return false;
}
public boolean includesExact()
{
return false;
}
};
public static MLAnalysisMode getMLAnalysisMode(String mode)
{
@@ -49,4 +163,13 @@ public enum MLAnalysisMode
}
throw new AlfrescoRuntimeException("Unknown ML Analysis mode "+mode);
}
public abstract boolean includesAll();
public abstract boolean includesContained();
public abstract boolean includesContaining();
public abstract boolean includesExact();
}

View File

@@ -17,7 +17,8 @@ package org.alfresco.repo.search.impl.lucene;
* limitations under the License.
*/
import java.io.*;
import java.io.IOException;
import java.io.Reader;
/** An efficient implementation of JavaCC's CharStream interface. <p>Note that
* this does not do line-number counting, but instead keeps track of the

View File

@@ -21,21 +21,23 @@ import java.util.HashMap;
import java.util.Map;
import org.alfresco.repo.search.MLAnalysisMode;
import org.alfresco.repo.search.impl.lucene.analysis.AlfrescoStandardAnalyser;
import org.alfresco.repo.search.impl.lucene.analysis.LongAnalyser;
import org.alfresco.repo.search.impl.lucene.analysis.MLAnalayser;
import org.alfresco.repo.search.impl.lucene.analysis.PathAnalyser;
import org.alfresco.repo.search.impl.lucene.analysis.VerbatimAnalyser;
import org.alfresco.service.cmr.dictionary.DataTypeDefinition;
import org.alfresco.service.cmr.dictionary.DictionaryService;
import org.alfresco.service.cmr.dictionary.PropertyDefinition;
import org.alfresco.service.cmr.dictionary.DataTypeDefinition;
import org.alfresco.service.namespace.QName;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.WhitespaceAnalyzer;
import org.alfresco.repo.search.impl.lucene.analysis.AlfrescoStandardAnalyser;
/**
* Analyse properties according to the property definition. The default is to use the standard tokeniser. The tokeniser should not have been called when indexing properties that
* require no tokenisation. (tokenise should be set to false when adding the field to the document)
* Analyse properties according to the property definition. The default is to use the standard tokeniser. The tokeniser
* should not have been called when indexing properties that require no tokenisation. (tokenise should be set to false
* when adding the field to the document)
*
* @author andyh
*/
@@ -82,7 +84,7 @@ public class LuceneAnalyser extends Analyzer
// Treat multilingual as a special case.
// If multilingual then we need to find the correct tokeniser.
// This is done dynamically by reading a language code at the start of the reader.
if (fieldName.startsWith("@") && !fieldName.endsWith(".mimetype"))
if (fieldName.startsWith("@"))
{
QName propertyQName = QName.createQName(fieldName.substring(1));
PropertyDefinition propertyDef = dictionaryService.getProperty(propertyQName);
@@ -90,7 +92,7 @@ public class LuceneAnalyser extends Analyzer
{
if (propertyDef.getDataType().getName().equals(DataTypeDefinition.MLTEXT))
{
MLAnalayser analyser = new MLAnalayser(dictionaryService);
MLAnalayser analyser = new MLAnalayser(dictionaryService, mlAlaysisMode);
return analyser.tokenStream(fieldName, reader);
}
}
@@ -133,17 +135,20 @@ public class LuceneAnalyser extends Analyzer
{
analyser = new WhitespaceAnalyzer();
}
else if (fieldName.equals("TEXT"))
{
DataTypeDefinition dataType = dictionaryService.getDataType(DataTypeDefinition.TEXT);
analyser = loadAnalyzer(dataType);
}
else if (fieldName.startsWith("@"))
{
if (fieldName.endsWith(".mimetype"))
{
analyser = new VerbatimAnalyser();
}
else if (fieldName.endsWith(".size"))
{
analyser = new LongAnalyser();
}
else if (fieldName.endsWith(".locale"))
{
analyser = new VerbatimAnalyser(true);
}
else
{
QName propertyQName = QName.createQName(fieldName.substring(1));
@@ -153,7 +158,18 @@ public class LuceneAnalyser extends Analyzer
if (propertyDef.isTokenisedInIndex())
{
DataTypeDefinition dataType = propertyDef.getDataType();
analyser = loadAnalyzer(dataType);
if (dataType.getName().equals(DataTypeDefinition.CONTENT))
{
analyser = new MLAnalayser(dictionaryService, MLAnalysisMode.ALL_ONLY);
}
else if (dataType.getName().equals(DataTypeDefinition.TEXT))
{
analyser = new MLAnalayser(dictionaryService, MLAnalysisMode.ALL_ONLY);
}
else
{
analyser = loadAnalyzer(dataType);
}
}
else
{
@@ -208,7 +224,8 @@ public class LuceneAnalyser extends Analyzer
}
/**
* For multilingual fields we separate the tokens for each instance to break phrase queries spanning different languages etc.
* For multilingual fields we separate the tokens for each instance to break phrase queries spanning different
* languages etc.
*/
@Override
public int getPositionIncrementGap(String fieldName)

View File

@@ -21,7 +21,6 @@ import java.io.IOException;
import java.util.Set;
import org.alfresco.repo.search.IndexerException;
import org.alfresco.repo.search.MLAnalysisMode;
import org.alfresco.repo.search.impl.lucene.index.IndexInfo;
import org.alfresco.repo.search.impl.lucene.index.TransactionStatus;
import org.alfresco.repo.search.impl.lucene.index.IndexInfo.LockWork;
@@ -32,7 +31,6 @@ import org.apache.log4j.Logger;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Searcher;
/**
* Common support for abstracting the lucene indexer from its configuration and management requirements.

View File

@@ -40,7 +40,6 @@ import org.alfresco.service.cmr.search.ResultSetRow;
import org.alfresco.service.namespace.NamespacePrefixResolver;
import org.alfresco.service.namespace.QName;
import org.alfresco.util.ISO9075;
import org.bouncycastle.crypto.paddings.ISO7816d4Padding;
public class LuceneCategoryServiceImpl implements CategoryService
{

View File

@@ -131,9 +131,9 @@ public class LuceneIndexerAndSearcherFactory2 implements LuceneIndexerAndSearche
private String lockDirectory;
private MLAnalysisMode defaultMLIndexAnalysisMode = MLAnalysisMode.LOCALE_ONLY;
private MLAnalysisMode defaultMLIndexAnalysisMode = MLAnalysisMode.LOCALE_AND_ALL;
private MLAnalysisMode defaultMLSearchAnalysisMode = MLAnalysisMode.LOCALE_AND_ALL;
private MLAnalysisMode defaultMLSearchAnalysisMode = MLAnalysisMode.LOCALE_AND_ALL_CONTAINING_LOCALES_AND_ALL;
/**
* Private constructor for the singleton TODO: FIt in with IOC

View File

@@ -19,7 +19,9 @@ package org.alfresco.repo.search.impl.lucene;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.io.Serializable;
import java.io.StringReader;
import java.io.UnsupportedEncodingException;
import java.util.ArrayList;
import java.util.Collection;
@@ -85,7 +87,6 @@ import org.apache.lucene.search.BooleanClause.Occur;
* The implementation of the lucene based indexer. Supports basic transactional behaviour if used on its own.
*
* @author andyh
*
*/
public class LuceneIndexerImpl2 extends LuceneBase2 implements LuceneIndexer2
{
@@ -119,9 +120,8 @@ public class LuceneIndexerImpl2 extends LuceneBase2 implements LuceneIndexer2
private long maxAtomicTransformationTime = 20;
/**
* A list of all deletions we have made - at merge these deletions need to be made against the main index.
*
* TODO: Consider if this information needs to be persisted for recovery
* A list of all deletions we have made - at merge these deletions need to be made against the main index. TODO:
* Consider if this information needs to be persisted for recovery
*/
private Set<NodeRef> deletions = new LinkedHashSet<NodeRef>();
@@ -140,8 +140,8 @@ public class LuceneIndexerImpl2 extends LuceneBase2 implements LuceneIndexer2
private boolean isModified = false;
/**
* Flag to indicte if we are doing an in transactional delta or a batch update to the index. If true, we are just fixing up non atomically indexed things from one or more other
* updates.
* Flag to indicte if we are doing an in transactional delta or a batch update to the index. If true, we are just
* fixing up non atomically indexed things from one or more other updates.
*/
private Boolean isFTSUpdate = null;
@@ -168,7 +168,6 @@ public class LuceneIndexerImpl2 extends LuceneBase2 implements LuceneIndexer2
/**
* Default construction
*
*/
LuceneIndexerImpl2()
{
@@ -216,7 +215,6 @@ public class LuceneIndexerImpl2 extends LuceneBase2 implements LuceneIndexer2
/**
* Utility method to check we are in the correct state to do work Also keeps track of the dirty flag.
*
*/
private void checkAbleToDoWork(boolean isFTS, boolean isModified)
@@ -508,7 +506,7 @@ public class LuceneIndexerImpl2 extends LuceneBase2 implements LuceneIndexer2
case Status.STATUS_ACTIVE:
// special case - commit from active
prepare();
// drop through to do the commit;
// drop through to do the commit;
default:
if (status != Status.STATUS_PREPARED)
{
@@ -585,37 +583,36 @@ public class LuceneIndexerImpl2 extends LuceneBase2 implements LuceneIndexer2
for (Helper helper : toFTSIndex)
{
//BooleanQuery query = new BooleanQuery();
//query.add(new TermQuery(new Term("ID", helper.nodeRef.toString())), true, false);
//query.add(new TermQuery(new Term("TX", helper.tx)), true, false);
//query.add(new TermQuery(new Term("ISNODE", "T")), false, false);
// BooleanQuery query = new BooleanQuery();
// query.add(new TermQuery(new Term("ID", helper.nodeRef.toString())), true, false);
// query.add(new TermQuery(new Term("TX", helper.tx)), true, false);
// query.add(new TermQuery(new Term("ISNODE", "T")), false, false);
deletions.add(helper.nodeRef);
// try
// {
// Hits hits = mainSearcher.search(query);
// if (hits.length() > 0)
// {
// for (int i = 0; i < hits.length(); i++)
// {
// mainReader.delete(hits.id(i));
// }
// }
// else
// {
// hits = deltaSearcher.search(query);
// for (int i = 0; i < hits.length(); i++)
// {
// deltaReader.delete(hits.id(i));
// }
// }
// }
// catch (IOException e)
// {
// throw new LuceneIndexException("Failed to delete an FTS update from the original index", e);
// }
// try
// {
// Hits hits = mainSearcher.search(query);
// if (hits.length() > 0)
// {
// for (int i = 0; i < hits.length(); i++)
// {
// mainReader.delete(hits.id(i));
// }
// }
// else
// {
// hits = deltaSearcher.search(query);
// for (int i = 0; i < hits.length(); i++)
// {
// deltaReader.delete(hits.id(i));
// }
// }
// }
// catch (IOException e)
// {
// throw new LuceneIndexException("Failed to delete an FTS update from the original index", e);
// }
}
}
@@ -688,11 +685,8 @@ public class LuceneIndexerImpl2 extends LuceneBase2 implements LuceneIndexer2
}
/**
* Prepare to commit
*
* At the moment this makes sure we have all the locks
*
* TODO: This is not doing proper serialisation against the index as would a data base transaction.
* Prepare to commit At the moment this makes sure we have all the locks TODO: This is not doing proper
* serialisation against the index as would a data base transaction.
*
* @return
*/
@@ -766,7 +760,6 @@ public class LuceneIndexerImpl2 extends LuceneBase2 implements LuceneIndexer2
/**
* Roll back the index changes (this just means they are never added)
*
*/
public void rollback() throws LuceneIndexException
@@ -781,7 +774,7 @@ public class LuceneIndexerImpl2 extends LuceneBase2 implements LuceneIndexer2
case Status.STATUS_ROLLEDBACK:
throw new IndexerException("Unable to roll back: Transaction is already rolled back");
case Status.STATUS_COMMITTING:
// Can roll back during commit
// Can roll back during commit
default:
status = Status.STATUS_ROLLING_BACK;
// if (isModified())
@@ -807,8 +800,8 @@ public class LuceneIndexerImpl2 extends LuceneBase2 implements LuceneIndexer2
}
/**
* Mark this index for roll back only. This action can not be reversed. It will reject all other work and only allow roll back.
*
* Mark this index for roll back only. This action can not be reversed. It will reject all other work and only allow
* roll back.
*/
public void setRollbackOnly()
@@ -1242,7 +1235,8 @@ public class LuceneIndexerImpl2 extends LuceneBase2 implements LuceneIndexer2
Document xdoc = new Document();
xdoc.add(new Field("ID", nodeRef.toString(), Field.Store.YES, Field.Index.UN_TOKENIZED, Field.TermVector.NO));
xdoc.add(new Field("TX", nodeStatus.getChangeTxnId(), Field.Store.YES, Field.Index.UN_TOKENIZED, Field.TermVector.NO));
xdoc.add(new Field("TX", nodeStatus.getChangeTxnId(), Field.Store.YES, Field.Index.UN_TOKENIZED,
Field.TermVector.NO));
boolean isAtomic = true;
for (QName propertyName : properties.keySet())
{
@@ -1298,8 +1292,10 @@ public class LuceneIndexerImpl2 extends LuceneBase2 implements LuceneIndexer2
qNameBuffer.append(";/");
}
qNameBuffer.append(ISO9075.getXPathName(qNameRef.getQName()));
xdoc.add(new Field("PARENT", qNameRef.getParentRef().toString(), Field.Store.YES, Field.Index.UN_TOKENIZED, Field.TermVector.NO));
xdoc.add(new Field("ASSOCTYPEQNAME", ISO9075.getXPathName(qNameRef.getTypeQName()), Field.Store.YES, Field.Index.NO, Field.TermVector.NO));
xdoc.add(new Field("PARENT", qNameRef.getParentRef().toString(), Field.Store.YES,
Field.Index.UN_TOKENIZED, Field.TermVector.NO));
xdoc.add(new Field("ASSOCTYPEQNAME", ISO9075.getXPathName(qNameRef.getTypeQName()),
Field.Store.YES, Field.Index.NO, Field.TermVector.NO));
xdoc.add(new Field("LINKASPECT", (pair.getSecond() == null) ? "" : ISO9075.getXPathName(pair
.getSecond()), Field.Store.YES, Field.Index.UN_TOKENIZED, Field.TermVector.NO));
}
@@ -1322,17 +1318,22 @@ public class LuceneIndexerImpl2 extends LuceneBase2 implements LuceneIndexer2
if (directPaths.contains(pair.getFirst()))
{
Document directoryEntry = new Document();
directoryEntry.add(new Field("ID", nodeRef.toString(), Field.Store.YES, Field.Index.UN_TOKENIZED, Field.TermVector.NO));
directoryEntry.add(new Field("PATH", pathString, Field.Store.YES, Field.Index.TOKENIZED, Field.TermVector.NO));
directoryEntry.add(new Field("ID", nodeRef.toString(), Field.Store.YES,
Field.Index.UN_TOKENIZED, Field.TermVector.NO));
directoryEntry.add(new Field("PATH", pathString, Field.Store.YES, Field.Index.TOKENIZED,
Field.TermVector.NO));
for (NodeRef parent : getParents(pair.getFirst()))
{
directoryEntry.add(new Field("ANCESTOR", parent.toString(), Field.Store.NO, Field.Index.UN_TOKENIZED, Field.TermVector.NO));
directoryEntry.add(new Field("ANCESTOR", parent.toString(), Field.Store.NO,
Field.Index.UN_TOKENIZED, Field.TermVector.NO));
}
directoryEntry.add(new Field("ISCONTAINER", "T", Field.Store.YES, Field.Index.UN_TOKENIZED, Field.TermVector.NO));
directoryEntry.add(new Field("ISCONTAINER", "T", Field.Store.YES, Field.Index.UN_TOKENIZED,
Field.TermVector.NO));
if (isCategory(getDictionaryService().getType(nodeService.getType(nodeRef))))
{
directoryEntry.add(new Field("ISCATEGORY", "T", Field.Store.YES, Field.Index.UN_TOKENIZED, Field.TermVector.NO));
directoryEntry.add(new Field("ISCATEGORY", "T", Field.Store.YES,
Field.Index.UN_TOKENIZED, Field.TermVector.NO));
}
docs.add(directoryEntry);
@@ -1350,7 +1351,8 @@ public class LuceneIndexerImpl2 extends LuceneBase2 implements LuceneIndexer2
xdoc.add(new Field("PATH", "", Field.Store.YES, Field.Index.TOKENIZED, Field.TermVector.NO));
xdoc.add(new Field("QNAME", "", Field.Store.YES, Field.Index.TOKENIZED, Field.TermVector.NO));
xdoc.add(new Field("ISROOT", "T", Field.Store.NO, Field.Index.UN_TOKENIZED, Field.TermVector.NO));
xdoc.add(new Field("PRIMARYASSOCTYPEQNAME", ISO9075.getXPathName(ContentModel.ASSOC_CHILDREN), Field.Store.YES, Field.Index.NO, Field.TermVector.NO));
xdoc.add(new Field("PRIMARYASSOCTYPEQNAME", ISO9075.getXPathName(ContentModel.ASSOC_CHILDREN),
Field.Store.YES, Field.Index.NO, Field.TermVector.NO));
xdoc.add(new Field("ISNODE", "T", Field.Store.NO, Field.Index.UN_TOKENIZED, Field.TermVector.NO));
docs.add(xdoc);
@@ -1358,36 +1360,45 @@ public class LuceneIndexerImpl2 extends LuceneBase2 implements LuceneIndexer2
else
// not a root node
{
xdoc.add(new Field("QNAME", qNameBuffer.toString(),Field.Store.YES, Field.Index.TOKENIZED, Field.TermVector.NO));
xdoc.add(new Field("QNAME", qNameBuffer.toString(), Field.Store.YES, Field.Index.TOKENIZED,
Field.TermVector.NO));
// xdoc.add(new Field("PARENT", parentBuffer.toString(), true, true,
// true));
ChildAssociationRef primary = nodeService.getPrimaryParent(nodeRef);
xdoc.add(new Field("PRIMARYPARENT", primary.getParentRef().toString(), Field.Store.YES, Field.Index.UN_TOKENIZED, Field.TermVector.NO));
xdoc.add(new Field("PRIMARYASSOCTYPEQNAME", ISO9075.getXPathName(primary.getTypeQName()), Field.Store.YES, Field.Index.NO, Field.TermVector.NO));
xdoc.add(new Field("PRIMARYPARENT", primary.getParentRef().toString(), Field.Store.YES,
Field.Index.UN_TOKENIZED, Field.TermVector.NO));
xdoc.add(new Field("PRIMARYASSOCTYPEQNAME", ISO9075.getXPathName(primary.getTypeQName()), Field.Store.YES,
Field.Index.NO, Field.TermVector.NO));
QName typeQName = nodeService.getType(nodeRef);
xdoc.add(new Field("TYPE", ISO9075.getXPathName(typeQName), Field.Store.YES, Field.Index.UN_TOKENIZED, Field.TermVector.NO));
xdoc.add(new Field("TYPE", ISO9075.getXPathName(typeQName), Field.Store.YES, Field.Index.UN_TOKENIZED,
Field.TermVector.NO));
for (QName classRef : nodeService.getAspects(nodeRef))
{
xdoc.add(new Field("ASPECT", ISO9075.getXPathName(classRef), Field.Store.YES, Field.Index.UN_TOKENIZED, Field.TermVector.NO));
xdoc.add(new Field("ASPECT", ISO9075.getXPathName(classRef), Field.Store.YES, Field.Index.UN_TOKENIZED,
Field.TermVector.NO));
}
xdoc.add(new Field("ISROOT", "F", Field.Store.NO, Field.Index.UN_TOKENIZED, Field.TermVector.NO));
xdoc.add(new Field("ISNODE", "T", Field.Store.NO, Field.Index.UN_TOKENIZED, Field.TermVector.NO));
if (isAtomic || indexAllProperties)
{
xdoc.add(new Field("FTSSTATUS", "Clean", Field.Store.NO, Field.Index.UN_TOKENIZED, Field.TermVector.NO));
xdoc
.add(new Field("FTSSTATUS", "Clean", Field.Store.NO, Field.Index.UN_TOKENIZED,
Field.TermVector.NO));
}
else
{
if (isNew)
{
xdoc.add(new Field("FTSSTATUS", "New", Field.Store.NO, Field.Index.UN_TOKENIZED, Field.TermVector.NO));
xdoc.add(new Field("FTSSTATUS", "New", Field.Store.NO, Field.Index.UN_TOKENIZED,
Field.TermVector.NO));
}
else
{
xdoc.add(new Field("FTSSTATUS", "Dirty", Field.Store.NO, Field.Index.UN_TOKENIZED, Field.TermVector.NO));
xdoc.add(new Field("FTSSTATUS", "Dirty", Field.Store.NO, Field.Index.UN_TOKENIZED,
Field.TermVector.NO));
}
}
@@ -1446,6 +1457,7 @@ public class LuceneIndexerImpl2 extends LuceneBase2 implements LuceneIndexer2
boolean atomic = true;
boolean isContent = false;
boolean isMultiLingual = false;
boolean isText = false;
PropertyDefinition propertyDef = getDictionaryService().getProperty(propertyName);
if (propertyDef != null)
@@ -1456,6 +1468,7 @@ public class LuceneIndexerImpl2 extends LuceneBase2 implements LuceneIndexer2
atomic = propertyDef.isIndexedAtomically();
isContent = propertyDef.getDataType().getName().equals(DataTypeDefinition.CONTENT);
isMultiLingual = propertyDef.getDataType().getName().equals(DataTypeDefinition.MLTEXT);
isText = propertyDef.getDataType().getName().equals(DataTypeDefinition.TEXT);
}
if (value == null)
{
@@ -1493,7 +1506,19 @@ public class LuceneIndexerImpl2 extends LuceneBase2 implements LuceneIndexer2
continue;
}
// store mimetype in index - even if content does not index it is useful
doc.add(new Field(attributeName + ".mimetype", contentData.getMimetype(), Field.Store.NO, Field.Index.UN_TOKENIZED, Field.TermVector.NO));
// Added szie and locale - size needs to be tokenised correctly
doc.add(new Field(attributeName + ".mimetype", contentData.getMimetype(), Field.Store.NO,
Field.Index.UN_TOKENIZED, Field.TermVector.NO));
doc.add(new Field(attributeName + ".size", Long.toString(contentData.getSize()), Field.Store.NO,
Field.Index.TOKENIZED, Field.TermVector.NO));
// TODO: Use the node locale in preferanced to the system locale
Locale locale = contentData.getLocale();
if (locale == null)
{
locale = Locale.getDefault();
}
doc.add(new Field(attributeName + ".locale", locale.toString().toLowerCase(), Field.Store.NO,
Field.Index.UN_TOKENIZED, Field.TermVector.NO));
ContentReader reader = contentService.getReader(nodeRef, propertyName);
if (reader != null && reader.exists())
@@ -1519,8 +1544,10 @@ public class LuceneIndexerImpl2 extends LuceneBase2 implements LuceneIndexer2
// don't index from the reader
readerReady = false;
// not indexed: no transformation
//doc.add(new Field("TEXT", NOT_INDEXED_NO_TRANSFORMATION, Field.Store.NO, Field.Index.TOKENIZED, Field.TermVector.NO));
doc.add(new Field(attributeName, NOT_INDEXED_NO_TRANSFORMATION, Field.Store.NO, Field.Index.TOKENIZED, Field.TermVector.NO));
// doc.add(new Field("TEXT", NOT_INDEXED_NO_TRANSFORMATION, Field.Store.NO,
// Field.Index.TOKENIZED, Field.TermVector.NO));
doc.add(new Field(attributeName, NOT_INDEXED_NO_TRANSFORMATION, Field.Store.NO,
Field.Index.TOKENIZED, Field.TermVector.NO));
}
else if (indexAtomicPropertiesOnly
&& transformer.getTransformationTime() > maxAtomicTransformationTime)
@@ -1554,8 +1581,10 @@ public class LuceneIndexerImpl2 extends LuceneBase2 implements LuceneIndexer2
readerReady = false;
// not indexed: transformation
// failed
//doc.add(new Field("TEXT", NOT_INDEXED_TRANSFORMATION_FAILED, Field.Store.NO, Field.Index.TOKENIZED, Field.TermVector.NO));
doc.add(new Field(attributeName, NOT_INDEXED_TRANSFORMATION_FAILED, Field.Store.NO, Field.Index.TOKENIZED, Field.TermVector.NO));
// doc.add(new Field("TEXT", NOT_INDEXED_TRANSFORMATION_FAILED, Field.Store.NO,
// Field.Index.TOKENIZED, Field.TermVector.NO));
doc.add(new Field(attributeName, NOT_INDEXED_TRANSFORMATION_FAILED, Field.Store.NO,
Field.Index.TOKENIZED, Field.TermVector.NO));
}
}
}
@@ -1564,16 +1593,16 @@ public class LuceneIndexerImpl2 extends LuceneBase2 implements LuceneIndexer2
if (readerReady)
{
InputStreamReader isr = null;
//InputStream ris = reader.getContentInputStream();
//try
//{
// isr = new InputStreamReader(ris, "UTF-8");
// InputStream ris = reader.getContentInputStream();
// try
// {
// isr = new InputStreamReader(ris, "UTF-8");
// }
//catch (UnsupportedEncodingException e)
// {
// isr = new InputStreamReader(ris);
//}
//doc.add(new Field("TEXT", isr, Field.TermVector.NO));
// catch (UnsupportedEncodingException e)
// {
// isr = new InputStreamReader(ris);
// }
// doc.add(new Field("TEXT", isr, Field.TermVector.NO));
InputStream ris = reader.getReader().getContentInputStream();
try
@@ -1584,10 +1613,11 @@ public class LuceneIndexerImpl2 extends LuceneBase2 implements LuceneIndexer2
{
isr = new InputStreamReader(ris);
}
doc.add(new Field("@"
+ QName.createQName(propertyName.getNamespaceURI(), ISO9075.encode(propertyName
.getLocalName())), isr, Field.TermVector.NO));
StringBuilder builder = new StringBuilder();
builder.append("\u0000").append(locale.toString()).append("\u0000");
StringReader prefix = new StringReader(builder.toString());
Reader multiReader = new MultiReader(prefix, isr);
doc.add(new Field(attributeName, multiReader, Field.TermVector.NO));
}
}
else
@@ -1601,17 +1631,19 @@ public class LuceneIndexerImpl2 extends LuceneBase2 implements LuceneIndexer2
+ (reader == null ? " --- " : Boolean.toString(reader.exists())));
}
// not indexed: content missing
doc.add(new Field("TEXT", NOT_INDEXED_CONTENT_MISSING, Field.Store.NO, Field.Index.TOKENIZED, Field.TermVector.NO));
doc.add(new Field(attributeName, NOT_INDEXED_CONTENT_MISSING, Field.Store.NO, Field.Index.TOKENIZED, Field.TermVector.NO));
doc.add(new Field("TEXT", NOT_INDEXED_CONTENT_MISSING, Field.Store.NO, Field.Index.TOKENIZED,
Field.TermVector.NO));
doc.add(new Field(attributeName, NOT_INDEXED_CONTENT_MISSING, Field.Store.NO,
Field.Index.TOKENIZED, Field.TermVector.NO));
}
}
else
{
Field.Store fieldStore = store ? Field.Store.YES : Field.Store.NO;
Field.Index fieldIndex;
if(index )
if (index)
{
if(tokenise)
if (tokenise)
{
fieldIndex = Field.Index.TOKENIZED;
}
@@ -1625,15 +1657,27 @@ public class LuceneIndexerImpl2 extends LuceneBase2 implements LuceneIndexer2
fieldIndex = Field.Index.NO;
}
if(isMultiLingual)
if (isMultiLingual)
{
MLText mlText = DefaultTypeConverter.INSTANCE.convert(MLText.class, value);
for(Locale locale : mlText.getLocales())
for (Locale locale : mlText.getLocales())
{
String localeString = mlText.getValue(locale);
doc.add(new Field(attributeName, "\u0000" + locale.toString() +"\u0000" + localeString, fieldStore, fieldIndex, Field.TermVector.NO));
StringBuilder builder = new StringBuilder();
builder.append("\u0000").append(locale.toString()).append("\u0000").append(localeString);
doc.add(new Field(attributeName, builder.toString(), fieldStore, fieldIndex,
Field.TermVector.NO));
}
}
else if(isText)
{
// TODO: Use the node locale in preferanced to the system locale
Locale locale = Locale.getDefault();
StringBuilder builder = new StringBuilder();
builder.append("\u0000").append(locale.toString()).append("\u0000").append(strValue);
doc.add(new Field(attributeName, builder.toString(), fieldStore, fieldIndex,
Field.TermVector.NO));
}
else
{
doc.add(new Field(attributeName, strValue, fieldStore, fieldIndex, Field.TermVector.NO));
@@ -2000,19 +2044,14 @@ public class LuceneIndexerImpl2 extends LuceneBase2 implements LuceneIndexer2
public boolean getDeleteOnlyNodes()
{
if(isFTSUpdate != null)
{
return isFTSUpdate.booleanValue();
}
else
{
return false;
}
if (isFTSUpdate != null)
{
return isFTSUpdate.booleanValue();
}
else
{
return false;
}
}
}

View File

@@ -23,6 +23,7 @@ import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Locale;
import java.util.Set;
import org.alfresco.i18n.I18NUtil;
import org.alfresco.repo.search.SearcherException;
@@ -32,6 +33,7 @@ import org.alfresco.service.cmr.dictionary.DataTypeDefinition;
import org.alfresco.service.cmr.dictionary.DictionaryService;
import org.alfresco.service.cmr.dictionary.PropertyDefinition;
import org.alfresco.service.cmr.dictionary.TypeDefinition;
import org.alfresco.service.cmr.search.SearchParameters;
import org.alfresco.service.namespace.NamespacePrefixResolver;
import org.alfresco.service.namespace.QName;
import org.apache.log4j.Logger;
@@ -55,7 +57,7 @@ public class LuceneQueryParser extends QueryParser
private DictionaryService dictionaryService;
private List<Locale> locales;
private SearchParameters searchParameters;
/**
* Parses a query string, returning a {@link org.apache.lucene.search.Query}.
@@ -71,7 +73,7 @@ public class LuceneQueryParser extends QueryParser
*/
static public Query parse(String query, String field, Analyzer analyzer,
NamespacePrefixResolver namespacePrefixResolver, DictionaryService dictionaryService,
Operator defaultOperator, List<Locale> locales) throws ParseException
Operator defaultOperator, SearchParameters searchParameters) throws ParseException
{
if (s_logger.isDebugEnabled())
{
@@ -81,14 +83,19 @@ public class LuceneQueryParser extends QueryParser
parser.setDefaultOperator(defaultOperator);
parser.setNamespacePrefixResolver(namespacePrefixResolver);
parser.setDictionaryService(dictionaryService);
parser.setLocales(locales);
parser.setSearchParameters(searchParameters);
// TODO: Apply locale contstraints at the top level if required for the non ML doc types.
return parser.parse(query);
Query result = parser.parse(query);
if (s_logger.isDebugEnabled())
{
s_logger.debug("Query " + query + " is\n\t" + result.toString());
}
return result;
}
private void setLocales(List<Locale> locales)
private void setSearchParameters(SearchParameters searchParameters)
{
this.locales = locales;
this.searchParameters = searchParameters;
}
public void setNamespacePrefixResolver(NamespacePrefixResolver namespacePrefixResolver)
@@ -141,15 +148,31 @@ public class LuceneQueryParser extends QueryParser
}
else if (field.equals("TEXT"))
{
Collection<QName> contentAttributes = dictionaryService.getAllProperties(DataTypeDefinition.CONTENT);
BooleanQuery query = new BooleanQuery();
for (QName qname : contentAttributes)
Set<String> text = searchParameters.getTextAttributes();
if ((text == null) || (text.size() == 0))
{
// The super implementation will create phrase queries etc if required
Query part = super.getFieldQuery("@" + qname.toString(), queryText);
query.add(part, Occur.SHOULD);
Collection<QName> contentAttributes = dictionaryService
.getAllProperties(DataTypeDefinition.CONTENT);
BooleanQuery query = new BooleanQuery();
for (QName qname : contentAttributes)
{
// The super implementation will create phrase queries etc if required
Query part = getFieldQuery("@" + qname.toString(), queryText);
query.add(part, Occur.SHOULD);
}
return query;
}
return query;
else
{
BooleanQuery query = new BooleanQuery();
for (String fieldName : text)
{
Query part = getFieldQuery(fieldName, queryText);
query.add(part, Occur.SHOULD);
}
return query;
}
}
else if (field.equals("ID"))
{
@@ -232,6 +255,39 @@ public class LuceneQueryParser extends QueryParser
}
return booleanQuery;
}
else if (field.equals("EXACTTYPE"))
{
TypeDefinition target;
if (queryText.startsWith("{"))
{
target = dictionaryService.getType(QName.createQName(queryText));
}
else
{
int colonPosition = queryText.indexOf(':');
if (colonPosition == -1)
{
// use the default namespace
target = dictionaryService.getType(QName.createQName(namespacePrefixResolver
.getNamespaceURI(""), queryText));
}
else
{
// find the prefix
target = dictionaryService.getType(QName.createQName(namespacePrefixResolver
.getNamespaceURI(queryText.substring(0, colonPosition)), queryText
.substring(colonPosition + 1)));
}
}
if (target == null)
{
throw new SearcherException("Invalid type: " + queryText);
}
QName targetQName = target.getName();
TermQuery termQuery = new TermQuery(new Term("TYPE", targetQName.toString()));
return termQuery;
}
else if (field.equals("ASPECT"))
{
AspectDefinition target;
@@ -281,100 +337,133 @@ public class LuceneQueryParser extends QueryParser
}
return booleanQuery;
}
else if (field.startsWith("@"))
else if (field.equals("EXACTASPECT"))
{
// Expand prefixes
String expandedFieldName = field;
// Check for any prefixes and expand to the full uri
if (field.charAt(1) != '{')
AspectDefinition target;
if (queryText.startsWith("{"))
{
int colonPosition = field.indexOf(':');
target = dictionaryService.getAspect(QName.createQName(queryText));
}
else
{
int colonPosition = queryText.indexOf(':');
if (colonPosition == -1)
{
// use the default namespace
expandedFieldName = "@{"
+ namespacePrefixResolver.getNamespaceURI("") + "}" + field.substring(1);
target = dictionaryService.getAspect(QName.createQName(namespacePrefixResolver
.getNamespaceURI(""), queryText));
}
else
{
// find the prefix
expandedFieldName = "@{"
+ namespacePrefixResolver.getNamespaceURI(field.substring(1, colonPosition)) + "}"
+ field.substring(colonPosition + 1);
target = dictionaryService.getAspect(QName.createQName(namespacePrefixResolver
.getNamespaceURI(queryText.substring(0, colonPosition)), queryText
.substring(colonPosition + 1)));
}
}
// Mime type
if (expandedFieldName.endsWith(".mimetype"))
QName targetQName = target.getName();
TermQuery termQuery = new TermQuery(new Term("ASPECT", targetQName.toString()));
return termQuery;
}
else if (field.startsWith("@"))
{
return attributeQueryBuilder(field, queryText, new FieldQuery());
}
else if (field.equals("ALL"))
{
Set<String> all = searchParameters.getAllAttributes();
if ((all == null) || (all.size() == 0))
{
QName propertyQName = QName.createQName(expandedFieldName.substring(1,
expandedFieldName.length() - 9));
PropertyDefinition propertyDef = dictionaryService.getProperty(propertyQName);
if ((propertyDef != null)
&& (propertyDef.getDataType().getName().equals(DataTypeDefinition.CONTENT)))
Collection<QName> contentAttributes = dictionaryService.getAllProperties(null);
BooleanQuery query = new BooleanQuery();
for (QName qname : contentAttributes)
{
return super.getFieldQuery(expandedFieldName, queryText);
// The super implementation will create phrase queries etc if required
Query part = getFieldQuery("@" + qname.toString(), queryText);
if (part != null)
{
query.add(part, Occur.SHOULD);
}
}
}
else if (expandedFieldName.endsWith(".size"))
{
QName propertyQName = QName.createQName(expandedFieldName.substring(1,
expandedFieldName.length() - 5));
PropertyDefinition propertyDef = dictionaryService.getProperty(propertyQName);
if ((propertyDef != null)
&& (propertyDef.getDataType().getName().equals(DataTypeDefinition.CONTENT)))
{
return super.getFieldQuery(expandedFieldName, queryText);
}
}
else if (expandedFieldName.endsWith(".locale"))
{
QName propertyQName = QName.createQName(expandedFieldName.substring(1,
expandedFieldName.length() - 7));
PropertyDefinition propertyDef = dictionaryService.getProperty(propertyQName);
if ((propertyDef != null)
&& (propertyDef.getDataType().getName().equals(DataTypeDefinition.CONTENT)))
{
return super.getFieldQuery(expandedFieldName, queryText);
}
}
// Already in expanded form
// ML
QName propertyQName = QName.createQName(expandedFieldName.substring(1));
PropertyDefinition propertyDef = dictionaryService.getProperty(propertyQName);
if ((propertyDef != null) && (propertyDef.getDataType().getName().equals(DataTypeDefinition.MLTEXT)))
{
// Build a sub query for each locale and or the results together - the analysis will take care of
// cross language matching for each entry
BooleanQuery booleanQuery = new BooleanQuery();
for (Locale locale : (((locales == null) || (locales.size() == 0)) ? Collections
.singletonList(I18NUtil.getLocale()) : locales))
{
StringBuilder builder = new StringBuilder(queryText.length() + 10);
builder.append("\u0000").append(locale.toString()).append("\u0000").append(queryText);
Query subQuery = super.getFieldQuery(expandedFieldName, builder.toString());
booleanQuery.add(subQuery, Occur.SHOULD);
}
return booleanQuery;
return query;
}
else
{
return super.getFieldQuery(expandedFieldName, queryText);
BooleanQuery query = new BooleanQuery();
for (String fieldName : all)
{
Query part = getFieldQuery(fieldName, queryText);
if (part != null)
{
query.add(part, Occur.SHOULD);
}
}
return query;
}
}
else if (field.equals("ISNULL"))
{
String qnameString = expandFieldName(queryText);
QName qname = QName.createQName(qnameString);
PropertyDefinition pd = dictionaryService.getProperty(qname);
if (pd != null)
{
QName container = pd.getContainerClass().getName();
BooleanQuery query = new BooleanQuery();
Query typeQuery = getFieldQuery("TYPE", container.toString());
query.add(typeQuery, Occur.MUST);
Query presenceQuery = getWildcardQuery("@" + qname.toString(), "*");
query.add(presenceQuery, Occur.MUST_NOT);
return query;
}
else
{
return super.getFieldQuery(field, queryText);
}
}
else if (field.equals("ISNOTNULL"))
{
String qnameString = expandFieldName(queryText);
QName qname = QName.createQName(qnameString);
PropertyDefinition pd = dictionaryService.getProperty(qname);
if (pd != null)
{
QName container = pd.getContainerClass().getName();
BooleanQuery query = new BooleanQuery();
Query typeQuery = getFieldQuery("TYPE", container.toString());
query.add(typeQuery, Occur.MUST);
Query presenceQuery = getWildcardQuery("@" + qname.toString(), "*");
query.add(presenceQuery, Occur.MUST);
return query;
}
else
{
return super.getFieldQuery(field, queryText);
}
}
else if (dictionaryService.getDataType(QName.createQName(expandFieldName(field))) != null)
{
Collection<QName> contentAttributes = dictionaryService.getAllProperties(dictionaryService.getDataType(
QName.createQName(expandFieldName(field))).getName());
BooleanQuery query = new BooleanQuery();
for (QName qname : contentAttributes)
{
// The super implementation will create phrase queries etc if required
Query part = getFieldQuery("@" + qname.toString(), queryText);
query.add(part, Occur.SHOULD);
}
return query;
}
else
{
return super.getFieldQuery(field, queryText);
}
}
catch (SAXPathException e)
{
@@ -391,24 +480,7 @@ public class LuceneQueryParser extends QueryParser
{
if (field.startsWith("@"))
{
String fieldName = field;
// Check for any prefixes and expand to the full uri
if (field.charAt(1) != '{')
{
int colonPosition = field.indexOf(':');
if (colonPosition == -1)
{
// use the default namespace
fieldName = "@{" + namespacePrefixResolver.getNamespaceURI("") + "}" + field.substring(1);
}
else
{
// find the prefix
fieldName = "@{"
+ namespacePrefixResolver.getNamespaceURI(field.substring(1, colonPosition)) + "}"
+ field.substring(colonPosition + 1);
}
}
String fieldName = expandAttributeFieldName(field);
return new RangeQuery(new Term(fieldName, getToken(fieldName, part1)), new Term(fieldName, getToken(
fieldName, part2)), inclusive);
@@ -420,6 +492,52 @@ public class LuceneQueryParser extends QueryParser
}
private String expandAttributeFieldName(String field)
{
String fieldName = field;
// Check for any prefixes and expand to the full uri
if (field.charAt(1) != '{')
{
int colonPosition = field.indexOf(':');
if (colonPosition == -1)
{
// use the default namespace
fieldName = "@{" + namespacePrefixResolver.getNamespaceURI("") + "}" + field.substring(1);
}
else
{
// find the prefix
fieldName = "@{"
+ namespacePrefixResolver.getNamespaceURI(field.substring(1, colonPosition)) + "}"
+ field.substring(colonPosition + 1);
}
}
return fieldName;
}
private String expandFieldName(String field)
{
String fieldName = field;
// Check for any prefixes and expand to the full uri
if (field.charAt(0) != '{')
{
int colonPosition = field.indexOf(':');
if (colonPosition == -1)
{
// use the default namespace
fieldName = "{" + namespacePrefixResolver.getNamespaceURI("") + "}" + field;
}
else
{
// find the prefix
fieldName = "{"
+ namespacePrefixResolver.getNamespaceURI(field.substring(0, colonPosition)) + "}"
+ field.substring(colonPosition + 1);
}
}
return fieldName;
}
private String getToken(String field, String value)
{
TokenStream source = analyzer.tokenStream(field, new StringReader(value));
@@ -457,67 +575,8 @@ public class LuceneQueryParser extends QueryParser
{
if (field.startsWith("@"))
{
// Expand prefixes
String expandedFieldName = field;
// Check for any prefixes and expand to the full uri
if (field.charAt(1) != '{')
{
int colonPosition = field.indexOf(':');
if (colonPosition == -1)
{
// use the default namespace
expandedFieldName = "@{" + namespacePrefixResolver.getNamespaceURI("") + "}" + field.substring(1);
}
else
{
// find the prefix
expandedFieldName = "@{"
+ namespacePrefixResolver.getNamespaceURI(field.substring(1, colonPosition)) + "}"
+ field.substring(colonPosition + 1);
}
}
// Mime type
if (expandedFieldName.endsWith(".mimetype"))
{
QName propertyQName = QName.createQName(expandedFieldName.substring(1, expandedFieldName.length() - 9));
PropertyDefinition propertyDef = dictionaryService.getProperty(propertyQName);
if ((propertyDef != null) && (propertyDef.getDataType().getName().equals(DataTypeDefinition.CONTENT)))
{
return super.getPrefixQuery(expandedFieldName, termStr);
}
}
// Already in expanded form
// ML
QName propertyQName = QName.createQName(expandedFieldName.substring(1));
PropertyDefinition propertyDef = dictionaryService.getProperty(propertyQName);
if ((propertyDef != null) && (propertyDef.getDataType().getName().equals(DataTypeDefinition.MLTEXT)))
{
// Build a sub query for each locale and or the results together - the analysis will take care of
// cross language matching for each entry
BooleanQuery booleanQuery = new BooleanQuery();
for (Locale locale : (((locales == null) || (locales.size() == 0)) ? Collections.singletonList(I18NUtil
.getLocale()) : locales))
{
StringBuilder builder = new StringBuilder(termStr.length() + 10);
builder.append("\u0000").append(locale.toString()).append("\u0000").append(termStr);
Query subQuery = super.getPrefixQuery(expandedFieldName, builder.toString());
booleanQuery.add(subQuery, Occur.SHOULD);
}
return booleanQuery;
}
else
{
return super.getPrefixQuery(expandedFieldName, termStr);
}
return attributeQueryBuilder(field, termStr, new PrefixQuery());
}
else if (field.equals("TEXT"))
{
Collection<QName> contentAttributes = dictionaryService.getAllProperties(DataTypeDefinition.CONTENT);
@@ -525,15 +584,14 @@ public class LuceneQueryParser extends QueryParser
for (QName qname : contentAttributes)
{
// The super implementation will create phrase queries etc if required
Query part = super.getPrefixQuery("@" + qname.toString(), termStr);
Query part = getPrefixQuery("@" + qname.toString(), termStr);
query.add(part, Occur.SHOULD);
}
return query;
}
else
{
return super.getFieldQuery(field, termStr);
return super.getPrefixQuery(field, termStr);
}
}
@@ -542,65 +600,7 @@ public class LuceneQueryParser extends QueryParser
{
if (field.startsWith("@"))
{
// Expand prefixes
String expandedFieldName = field;
// Check for any prefixes and expand to the full uri
if (field.charAt(1) != '{')
{
int colonPosition = field.indexOf(':');
if (colonPosition == -1)
{
// use the default namespace
expandedFieldName = "@{" + namespacePrefixResolver.getNamespaceURI("") + "}" + field.substring(1);
}
else
{
// find the prefix
expandedFieldName = "@{"
+ namespacePrefixResolver.getNamespaceURI(field.substring(1, colonPosition)) + "}"
+ field.substring(colonPosition + 1);
}
}
// Mime type
if (expandedFieldName.endsWith(".mimetype"))
{
QName propertyQName = QName.createQName(expandedFieldName.substring(1, expandedFieldName.length() - 9));
PropertyDefinition propertyDef = dictionaryService.getProperty(propertyQName);
if ((propertyDef != null) && (propertyDef.getDataType().getName().equals(DataTypeDefinition.CONTENT)))
{
return super.getWildcardQuery(expandedFieldName, termStr);
}
}
// Already in expanded form
// ML
QName propertyQName = QName.createQName(expandedFieldName.substring(1));
PropertyDefinition propertyDef = dictionaryService.getProperty(propertyQName);
if ((propertyDef != null) && (propertyDef.getDataType().getName().equals(DataTypeDefinition.MLTEXT)))
{
// Build a sub query for each locale and or the results together - the analysis will take care of
// cross language matching for each entry
BooleanQuery booleanQuery = new BooleanQuery();
for (Locale locale : (((locales == null) || (locales.size() == 0)) ? Collections.singletonList(I18NUtil
.getLocale()) : locales))
{
StringBuilder builder = new StringBuilder(termStr.length() + 10);
builder.append("\u0000").append(locale.toString()).append("\u0000").append(termStr);
Query subQuery = super.getWildcardQuery(expandedFieldName, builder.toString());
booleanQuery.add(subQuery, Occur.SHOULD);
}
return booleanQuery;
}
else
{
return super.getWildcardQuery(expandedFieldName, termStr);
}
return attributeQueryBuilder(field, termStr, new WildcardQuery());
}
else if (field.equals("TEXT"))
@@ -610,11 +610,10 @@ public class LuceneQueryParser extends QueryParser
for (QName qname : contentAttributes)
{
// The super implementation will create phrase queries etc if required
Query part = super.getWildcardQuery("@" + qname.toString(), termStr);
Query part = getWildcardQuery("@" + qname.toString(), termStr);
query.add(part, Occur.SHOULD);
}
return query;
}
else
{
@@ -627,65 +626,7 @@ public class LuceneQueryParser extends QueryParser
{
if (field.startsWith("@"))
{
// Expand prefixes
String expandedFieldName = field;
// Check for any prefixes and expand to the full uri
if (field.charAt(1) != '{')
{
int colonPosition = field.indexOf(':');
if (colonPosition == -1)
{
// use the default namespace
expandedFieldName = "@{" + namespacePrefixResolver.getNamespaceURI("") + "}" + field.substring(1);
}
else
{
// find the prefix
expandedFieldName = "@{"
+ namespacePrefixResolver.getNamespaceURI(field.substring(1, colonPosition)) + "}"
+ field.substring(colonPosition + 1);
}
}
// Mime type
if (expandedFieldName.endsWith(".mimetype"))
{
QName propertyQName = QName.createQName(expandedFieldName.substring(1, expandedFieldName.length() - 9));
PropertyDefinition propertyDef = dictionaryService.getProperty(propertyQName);
if ((propertyDef != null) && (propertyDef.getDataType().getName().equals(DataTypeDefinition.CONTENT)))
{
return super.getFuzzyQuery(expandedFieldName, termStr, minSimilarity);
}
}
// Already in expanded form
// ML
QName propertyQName = QName.createQName(expandedFieldName.substring(1));
PropertyDefinition propertyDef = dictionaryService.getProperty(propertyQName);
if ((propertyDef != null) && (propertyDef.getDataType().getName().equals(DataTypeDefinition.MLTEXT)))
{
// Build a sub query for each locale and or the results together - the analysis will take care of
// cross language matching for each entry
BooleanQuery booleanQuery = new BooleanQuery();
for (Locale locale : (((locales == null) || (locales.size() == 0)) ? Collections.singletonList(I18NUtil
.getLocale()) : locales))
{
StringBuilder builder = new StringBuilder(termStr.length() + 10);
builder.append("\u0000").append(locale.toString()).append("\u0000").append(termStr);
Query subQuery = super.getFuzzyQuery(expandedFieldName, builder.toString(), minSimilarity);
booleanQuery.add(subQuery, Occur.SHOULD);
}
return booleanQuery;
}
else
{
return super.getFuzzyQuery(expandedFieldName, termStr, minSimilarity);
}
return attributeQueryBuilder(field, termStr, new FuzzyQuery(minSimilarity));
}
else if (field.equals("TEXT"))
@@ -695,11 +636,10 @@ public class LuceneQueryParser extends QueryParser
for (QName qname : contentAttributes)
{
// The super implementation will create phrase queries etc if required
Query part = super.getFuzzyQuery("@" + qname.toString(), termStr, minSimilarity);
Query part = getFuzzyQuery("@" + qname.toString(), termStr, minSimilarity);
query.add(part, Occur.SHOULD);
}
return query;
}
else
{
@@ -712,4 +652,155 @@ public class LuceneQueryParser extends QueryParser
this.dictionaryService = dictionaryService;
}
public Query getSuperFieldQuery(String field, String queryText) throws ParseException
{
return super.getFieldQuery(field, queryText);
}
public Query getSuperFuzzyQuery(String field, String termStr, float minSimilarity) throws ParseException
{
return super.getFuzzyQuery(field, termStr, minSimilarity);
}
public Query getSuperPrefixQuery(String field, String termStr) throws ParseException
{
return super.getPrefixQuery(field, termStr);
}
public Query getSuperWildcardQuery(String field, String termStr) throws ParseException
{
return super.getWildcardQuery(field, termStr);
}
interface SubQuery
{
Query getQuery(String field, String queryText) throws ParseException;
}
class FieldQuery implements SubQuery
{
public Query getQuery(String field, String queryText) throws ParseException
{
return getSuperFieldQuery(field, queryText);
}
}
class FuzzyQuery implements SubQuery
{
float minSimilarity;
FuzzyQuery(float minSimilarity)
{
this.minSimilarity = minSimilarity;
}
public Query getQuery(String field, String termStr) throws ParseException
{
return getSuperFuzzyQuery(field, termStr, minSimilarity);
}
}
class PrefixQuery implements SubQuery
{
public Query getQuery(String field, String termStr) throws ParseException
{
return getSuperPrefixQuery(field, termStr);
}
}
class WildcardQuery implements SubQuery
{
public Query getQuery(String field, String termStr) throws ParseException
{
return getSuperWildcardQuery(field, termStr);
}
}
private Query attributeQueryBuilder(String field, String queryText, SubQuery subQueryBuilder) throws ParseException
{
// Expand prefixes
String expandedFieldName = expandAttributeFieldName(field);
// Mime type
if (expandedFieldName.endsWith(".mimetype"))
{
QName propertyQName = QName.createQName(expandedFieldName.substring(1, expandedFieldName.length() - 9));
PropertyDefinition propertyDef = dictionaryService.getProperty(propertyQName);
if ((propertyDef != null) && (propertyDef.getDataType().getName().equals(DataTypeDefinition.CONTENT)))
{
return subQueryBuilder.getQuery(expandedFieldName, queryText);
}
}
else if (expandedFieldName.endsWith(".size"))
{
QName propertyQName = QName.createQName(expandedFieldName.substring(1, expandedFieldName.length() - 5));
PropertyDefinition propertyDef = dictionaryService.getProperty(propertyQName);
if ((propertyDef != null) && (propertyDef.getDataType().getName().equals(DataTypeDefinition.CONTENT)))
{
return subQueryBuilder.getQuery(expandedFieldName, queryText);
}
}
else if (expandedFieldName.endsWith(".locale"))
{
QName propertyQName = QName.createQName(expandedFieldName.substring(1, expandedFieldName.length() - 7));
PropertyDefinition propertyDef = dictionaryService.getProperty(propertyQName);
if ((propertyDef != null) && (propertyDef.getDataType().getName().equals(DataTypeDefinition.CONTENT)))
{
return subQueryBuilder.getQuery(expandedFieldName, queryText);
}
}
// Already in expanded form
// ML
QName propertyQName = QName.createQName(expandedFieldName.substring(1));
PropertyDefinition propertyDef = dictionaryService.getProperty(propertyQName);
if ((propertyDef != null) && (propertyDef.getDataType().getName().equals(DataTypeDefinition.MLTEXT)))
{
// Build a sub query for each locale and or the results together - the analysis will take care of
// cross language matching for each entry
BooleanQuery booleanQuery = new BooleanQuery();
List<Locale> locales = searchParameters.getLocales();
for (Locale locale : (((locales == null) || (locales.size() == 0)) ? Collections.singletonList(I18NUtil
.getLocale()) : locales))
{
StringBuilder builder = new StringBuilder(queryText.length() + 10);
builder.append("\u0000").append(locale.toString()).append("\u0000").append(queryText);
Query subQuery = subQueryBuilder.getQuery(expandedFieldName, builder.toString());
booleanQuery.add(subQuery, Occur.SHOULD);
}
return booleanQuery;
}
// Content
else if ((propertyDef != null) && (propertyDef.getDataType().getName().equals(DataTypeDefinition.CONTENT)))
{
// Build a sub query for each locale and or the results together -
// - add an explicit condition for the locale
BooleanQuery booleanQuery = new BooleanQuery();
List<Locale> locales = searchParameters.getLocales();
for (Locale locale : (((locales == null) || (locales.size() == 0)) ? Collections.singletonList(I18NUtil
.getLocale()) : locales))
{
BooleanQuery subQuery = new BooleanQuery();
Query contentQuery = subQueryBuilder.getQuery(expandedFieldName, queryText);
subQuery.add(contentQuery, Occur.MUST);
StringBuilder builder = new StringBuilder();
builder.append(expandedFieldName).append(".locale");
Query localeQuery = getFieldQuery(builder.toString(), locale.toString());
subQuery.add(localeQuery, Occur.MUST);
booleanQuery.add(subQuery, Occur.SHOULD);
}
return booleanQuery;
}
else
{
return subQueryBuilder.getQuery(expandedFieldName, queryText);
}
}
}

View File

@@ -215,7 +215,7 @@ public class LuceneSearcherImpl2 extends LuceneBase2 implements LuceneSearcher2
}
Query query = LuceneQueryParser.parse(parameterisedQueryString, DEFAULT_FIELD, new LuceneAnalyser(
dictionaryService, searchParameters.getMlAnalaysisMode() == null ? getLuceneConfig().getDefaultMLSearchAnalysisMode() : searchParameters.getMlAnalaysisMode()), namespacePrefixResolver, dictionaryService, defaultOperator, searchParameters.getLocales());
dictionaryService, searchParameters.getMlAnalaysisMode() == null ? getLuceneConfig().getDefaultMLSearchAnalysisMode() : searchParameters.getMlAnalaysisMode()), namespacePrefixResolver, dictionaryService, defaultOperator, searchParameters);
ClosingIndexSearcher searcher = getSearcher(indexer);
if (searcher == null)
{

View File

@@ -42,6 +42,7 @@ import org.alfresco.repo.dictionary.DictionaryNamespaceComponent;
import org.alfresco.repo.dictionary.M2Model;
import org.alfresco.repo.dictionary.NamespaceDAOImpl;
import org.alfresco.repo.node.BaseNodeServiceTest;
import org.alfresco.repo.search.MLAnalysisMode;
import org.alfresco.repo.search.QueryParameterDefImpl;
import org.alfresco.repo.search.QueryRegisterComponent;
import org.alfresco.repo.search.impl.lucene.fts.FullTextSearchIndexer;
@@ -1896,6 +1897,16 @@ public class LuceneTest2 extends TestCase
assertEquals(1, results.length());
results.close();
results = searcher.query(rootNodeRef.getStoreRef(), "lucene", "EXACTTYPE:\"" + testType.toString() + "\"", null,
null);
assertEquals(1, results.length());
results.close();
results = searcher.query(rootNodeRef.getStoreRef(), "lucene", "EXACTTYPE:\""
+ testType.toPrefixString(namespacePrefixResolver) + "\"", null, null);
assertEquals(1, results.length());
results.close();
results = searcher.query(rootNodeRef.getStoreRef(), "lucene", "TYPE:\"" + testSuperType.toString() + "\"",
null, null);
assertEquals(13, results.length());
@@ -1906,6 +1917,16 @@ public class LuceneTest2 extends TestCase
assertEquals(13, results.length());
results.close();
results = searcher.query(rootNodeRef.getStoreRef(), "lucene", "EXACTTYPE:\"" + testSuperType.toString() + "\"",
null, null);
assertEquals(12, results.length());
results.close();
results = searcher.query(rootNodeRef.getStoreRef(), "lucene", "EXACTTYPE:\""
+ testSuperType.toPrefixString(namespacePrefixResolver) + "\"", null, null);
assertEquals(12, results.length());
results.close();
results = searcher.query(rootNodeRef.getStoreRef(), "lucene", "ASPECT:\"" + testAspect.toString() + "\"", null,
null);
assertEquals(1, results.length());
@@ -1926,6 +1947,28 @@ public class LuceneTest2 extends TestCase
assertEquals(1, results.length());
results.close();
// Test for AR-384
results = searcher.query(rootNodeRef.getStoreRef(), "lucene", "TEXT:fox AND TYPE:\""
+ ContentModel.PROP_CONTENT.toString() + "\"", null, null);
assertEquals(1, results.length());
results.close();
results = searcher.query(rootNodeRef.getStoreRef(), "lucene", "TEXT:fo AND TYPE:\""
+ ContentModel.PROP_CONTENT.toString() + "\"", null, null);
assertEquals(0, results.length());
results.close();
// Test stop words are equivalent
results = searcher.query(rootNodeRef.getStoreRef(), "lucene", "TEXT:\"over the lazy\"", null, null);
assertEquals(1, results.length());
results.close();
results = searcher.query(rootNodeRef.getStoreRef(), "lucene", "TEXT:\"over a lazy\"", null, null);
assertEquals(1, results.length());
results.close();
// FTS test
results = searcher.query(rootNodeRef.getStoreRef(), "lucene", "TEXT:\"fox\"", null, null);
@@ -1943,16 +1986,127 @@ public class LuceneTest2 extends TestCase
assertEquals(1, results.length());
results.close();
results = searcher.query(rootNodeRef.getStoreRef(), "lucene", "@"
+ LuceneQueryParser.escape(ContentModel.PROP_CONTENT.toString()) + ".locale:\"en_GB\"", null, null);
assertEquals(1, results.length());
results.close();
results = searcher.query(rootNodeRef.getStoreRef(), "lucene", "@"
+ LuceneQueryParser.escape(ContentModel.PROP_CONTENT.toString()) + ".locale:en_*", null, null);
assertEquals(1, results.length());
results.close();
results = searcher.query(rootNodeRef.getStoreRef(), "lucene", "@"
+ LuceneQueryParser.escape(ContentModel.PROP_CONTENT.toString()) + ".locale:e*_GB", null, null);
assertEquals(1, results.length());
results.close();
results = searcher.query(rootNodeRef.getStoreRef(), "lucene", "@"
+ LuceneQueryParser.escape(ContentModel.PROP_CONTENT.toString()) + ".size:\"90\"", null, null);
assertEquals(1, results.length());
results.close();
QName queryQName = QName.createQName("alf:test1", namespacePrefixResolver);
results = searcher.query(rootNodeRef.getStoreRef(), queryQName, null);
assertEquals(1, results.length());
results.close();
// Configuration of TEXT
SearchParameters sp = new SearchParameters();
sp.addStore(rootNodeRef.getStoreRef());
sp.setLanguage("lucene");
sp.setQuery("@" + LuceneQueryParser.escape(ContentModel.PROP_CONTENT.toString()) + ":\"fox\"");
results = searcher.query(sp);
assertEquals(1, results.length());
results.close();
sp = new SearchParameters();
sp.addStore(rootNodeRef.getStoreRef());
sp.setLanguage("lucene");
sp.setQuery("TEXT:\"fox\"");
results = searcher.query(sp);
assertEquals(1, results.length());
results.close();
sp = new SearchParameters();
sp.addStore(rootNodeRef.getStoreRef());
sp.setLanguage("lucene");
sp.setQuery("TEXT:\"fox\"");
sp.addTextAttribute("@"+ContentModel.PROP_NAME.toString());
results = searcher.query(sp);
assertEquals(0, results.length());
results.close();
sp.addTextAttribute("@"+ContentModel.PROP_CONTENT.toString());
results = searcher.query(sp);
assertEquals(1, results.length());
results.close();
// ALL and its configuration
sp = new SearchParameters();
sp.addStore(rootNodeRef.getStoreRef());
sp.setLanguage("lucene");
sp.setQuery("ALL:\"fox\"");
results = searcher.query(sp);
assertEquals(1, results.length());
results.close();
sp = new SearchParameters();
sp.addStore(rootNodeRef.getStoreRef());
sp.setLanguage("lucene");
sp.setQuery("ALL:\"fox\"");
sp.addAllAttribute("@"+ContentModel.PROP_NAME.toString());
results = searcher.query(sp);
assertEquals(0, results.length());
results.close();
sp.addAllAttribute("@"+ContentModel.PROP_CONTENT.toString());
results = searcher.query(sp);
assertEquals(1, results.length());
results.close();
sp = new SearchParameters();
sp.addStore(rootNodeRef.getStoreRef());
sp.setLanguage("lucene");
sp.setQuery("ALL:\"5.6\"");
results = searcher.query(sp);
assertEquals(1, results.length());
results.close();
// Search by data type
sp = new SearchParameters();
sp.addStore(rootNodeRef.getStoreRef());
sp.setLanguage("lucene");
sp.setQuery("d\\:double:\"5.6\"");
results = searcher.query(sp);
assertEquals(1, results.length());
results.close();
sp = new SearchParameters();
sp.addStore(rootNodeRef.getStoreRef());
sp.setLanguage("lucene");
sp.setQuery("d\\:content:\"fox\"");
results = searcher.query(sp);
assertEquals(1, results.length());
results.close();
// Direct ML tests
QName mlQName = QName.createQName(TEST_NAMESPACE, "ml");
SearchParameters sp = new SearchParameters();
sp = new SearchParameters();
sp.addStore(rootNodeRef.getStoreRef());
sp.setLanguage("lucene");
sp.setMlAnalaysisMode(MLAnalysisMode.ALL_ONLY);
sp.setQuery("@" + LuceneQueryParser.escape(mlQName.toString()) + ":banana");
results = searcher.query(sp);
assertEquals(1, results.length());
results.close();
sp = new SearchParameters();
sp.addStore(rootNodeRef.getStoreRef());
sp.setLanguage("lucene");
sp.setQuery("@" + LuceneQueryParser.escape(mlQName.toString()) + ":banana");
@@ -1960,6 +2114,20 @@ public class LuceneTest2 extends TestCase
assertEquals(1, results.length());
results.close();
sp = new SearchParameters();
sp.addStore(rootNodeRef.getStoreRef());
sp.setLanguage("lucene");
sp.setQuery("@" + LuceneQueryParser.escape(mlQName.toString()) + ":banana");
sp.addLocale(Locale.UK);
results = searcher.query(sp);
assertEquals(1, results.length());
results.close();
sp = new SearchParameters();
sp.addStore(rootNodeRef.getStoreRef());
sp.setLanguage("lucene");
sp.setQuery("@" + LuceneQueryParser.escape(mlQName.toString()) + ":banana");
sp.setMlAnalaysisMode(MLAnalysisMode.LOCALE_AND_ALL_CONTAINING_LOCALES);
sp.addLocale(Locale.UK);
results = searcher.query(sp);
assertEquals(1, results.length());
@@ -2073,6 +2241,42 @@ public class LuceneTest2 extends TestCase
assertEquals(1, results.length());
results.close();
// Test ISNULL/ISNOTNULL
sp = new SearchParameters();
sp.addStore(rootNodeRef.getStoreRef());
sp.setLanguage("lucene");
sp.setQuery("ISNULL:\"" + QName.createQName(TEST_NAMESPACE, "null").toString() + "\"");
results = searcher.query(sp);
assertEquals(1, results.length());
results.close();
sp = new SearchParameters();
sp.addStore(rootNodeRef.getStoreRef());
sp.setLanguage("lucene");
sp.setQuery("ISNULL:\"" + QName.createQName(TEST_NAMESPACE, "path-ista").toString() + "\"");
results = searcher.query(sp);
assertEquals(0, results.length());
results.close();
sp = new SearchParameters();
sp.addStore(rootNodeRef.getStoreRef());
sp.setLanguage("lucene");
sp.setQuery("ISNOTNULL:\"" + QName.createQName(TEST_NAMESPACE, "null").toString() + "\"");
results = searcher.query(sp);
assertEquals(0, results.length());
results.close();
sp = new SearchParameters();
sp.addStore(rootNodeRef.getStoreRef());
sp.setLanguage("lucene");
sp.setQuery("ISNOTNULL:\"" + QName.createQName(TEST_NAMESPACE, "path-ista").toString() + "\"");
results = searcher.query(sp);
assertEquals(1, results.length());
results.close();
// Test non field queries
results = searcher.query(rootNodeRef.getStoreRef(), "lucene", "TEXT:fox", null, null);
@@ -2092,49 +2296,49 @@ public class LuceneTest2 extends TestCase
results.close();
results = searcher.query(rootNodeRef.getStoreRef(), "lucene", "@"
+ LuceneQueryParser.escape(ContentModel.PROP_CONTENT.toString()) +":fox", null, null);
+ LuceneQueryParser.escape(ContentModel.PROP_CONTENT.toString()) + ":fox", null, null);
assertEquals(1, results.length());
results.close();
results = searcher.query(rootNodeRef.getStoreRef(), "lucene", "@"
+ LuceneQueryParser.escape(ContentModel.PROP_CONTENT.toString()) +":fo*", null, null);
+ LuceneQueryParser.escape(ContentModel.PROP_CONTENT.toString()) + ":fo*", null, null);
assertEquals(1, results.length());
results.close();
results = searcher.query(rootNodeRef.getStoreRef(), "lucene", "@"
+ LuceneQueryParser.escape(ContentModel.PROP_CONTENT.toString()) +":f*x", null, null);
+ LuceneQueryParser.escape(ContentModel.PROP_CONTENT.toString()) + ":f*x", null, null);
assertEquals(1, results.length());
results.close();
results = searcher.query(rootNodeRef.getStoreRef(), "lucene", "@"
+ LuceneQueryParser.escape(ContentModel.PROP_CONTENT.toString()) +":*ox", null, null);
+ LuceneQueryParser.escape(ContentModel.PROP_CONTENT.toString()) + ":*ox", null, null);
assertEquals(1, results.length());
results.close();
results = searcher.query(rootNodeRef.getStoreRef(), "lucene", "@"
+ LuceneQueryParser.escape(ContentModel.PROP_CONTENT.toPrefixString(namespacePrefixResolver)) +":fox", null, null);
+ LuceneQueryParser.escape(ContentModel.PROP_CONTENT.toPrefixString(namespacePrefixResolver)) + ":fox",
null, null);
assertEquals(1, results.length());
results.close();
results = searcher.query(rootNodeRef.getStoreRef(), "lucene", "@"
+ LuceneQueryParser.escape(ContentModel.PROP_CONTENT.toPrefixString(namespacePrefixResolver)) +":fo*", null, null);
+ LuceneQueryParser.escape(ContentModel.PROP_CONTENT.toPrefixString(namespacePrefixResolver)) + ":fo*",
null, null);
assertEquals(1, results.length());
results.close();
results = searcher.query(rootNodeRef.getStoreRef(), "lucene", "@"
+ LuceneQueryParser.escape(ContentModel.PROP_CONTENT.toPrefixString(namespacePrefixResolver)) +":f*x", null, null);
+ LuceneQueryParser.escape(ContentModel.PROP_CONTENT.toPrefixString(namespacePrefixResolver)) + ":f*x",
null, null);
assertEquals(1, results.length());
results.close();
results = searcher.query(rootNodeRef.getStoreRef(), "lucene", "@"
+ LuceneQueryParser.escape(ContentModel.PROP_CONTENT.toPrefixString(namespacePrefixResolver)) +":*ox", null, null);
+ LuceneQueryParser.escape(ContentModel.PROP_CONTENT.toPrefixString(namespacePrefixResolver)) + ":*ox",
null, null);
assertEquals(1, results.length());
results.close();
// Parameters
queryQName = QName.createQName("alf:test2", namespacePrefixResolver);
@@ -3661,6 +3865,7 @@ public class LuceneTest2 extends TestCase
DynamicNamespacePrefixResolver nspr = new DynamicNamespacePrefixResolver(null);
nspr.registerNamespace(NamespaceService.ALFRESCO_PREFIX, NamespaceService.ALFRESCO_URI);
nspr.registerNamespace(NamespaceService.CONTENT_MODEL_PREFIX, NamespaceService.CONTENT_MODEL_1_0_URI);
nspr.registerNamespace(NamespaceService.DICTIONARY_MODEL_PREFIX, NamespaceService.DICTIONARY_MODEL_1_0_URI);
nspr.registerNamespace("namespace", "namespace");
nspr.registerNamespace("test", TEST_NAMESPACE);
nspr.registerNamespace(NamespaceService.DEFAULT_PREFIX, defaultURI);

View File

@@ -238,6 +238,26 @@
<stored>true</stored>
<tokenised>true</tokenised>
</index>
</property>
<property name="test:null">
<type>d:text</type>
<mandatory>false</mandatory>
<multiple>false</multiple>
<index enabled="true">
<atomic>true</atomic>
<stored>true</stored>
<tokenised>true</tokenised>
</index>
</property>
<property name="test:path-ista">
<type>d:path</type>
<mandatory>false</mandatory>
<multiple>false</multiple>
<index enabled="true">
<atomic>true</atomic>
<stored>true</stored>
<tokenised>true</tokenised>
</index>
</property>
</properties>
<mandatory-aspects>

View File

@@ -16,6 +16,8 @@ package org.alfresco.repo.search.impl.lucene;
* limitations under the License.
*/
import java.util.Vector;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
@@ -23,8 +25,6 @@ import org.apache.lucene.search.MultiPhraseQuery;
import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.Query;
import java.util.Vector;
/**
* A QueryParser which constructs queries to search multiple fields.
*

View File

@@ -0,0 +1,93 @@
/**
*
*/
package org.alfresco.repo.search.impl.lucene;
import java.io.IOException;
import java.io.Reader;
class MultiReader extends Reader
{
Reader first;
Reader second;
boolean firstActive = true;
MultiReader(Reader first, Reader second)
{
this.first = first;
this.second = second;
}
@Override
public void close() throws IOException
{
IOException ioe = null;
try
{
first.close();
}
catch (IOException e)
{
ioe = e;
}
second.close();
if (ioe != null)
{
throw ioe;
}
}
@Override
public int read(char[] cbuf, int off, int len) throws IOException
{
synchronized (lock)
{
if ((off < 0) || (off > cbuf.length) || (len < 0) || ((off + len) > cbuf.length) || ((off + len) < 0))
{
throw new IndexOutOfBoundsException();
}
else if (len == 0)
{
return 0;
}
for(int i = 0; i < len; i++)
{
int c;
if(firstActive)
{
c = first.read();
if(c == -1)
{
firstActive = false;
c = second.read();
}
}
else
{
c = second.read();
}
if(c == -1)
{
if(i == 0)
{
return -1;
}
else
{
return i;
}
}
else
{
cbuf[off+i] = (char)c;
}
}
return len;
}
}
}

View File

@@ -0,0 +1,85 @@
package org.alfresco.repo.search.impl.lucene;
import java.io.IOException;
import java.io.Reader;
import java.io.StringReader;
import junit.framework.TestCase;
public class MultiReaderTest extends TestCase
{
public MultiReaderTest()
{
super();
}
public MultiReaderTest(String arg0)
{
super(arg0);
}
public void testMultiReader_single() throws IOException
{
String first = "my first string";
String second = "another little string";
StringReader one = new StringReader(first);
StringReader two = new StringReader(second);
Reader multiReader = new MultiReader(one, two);
StringBuilder builder = new StringBuilder();
int c;
while ((c = multiReader.read()) != -1)
{
builder.append((char) c);
}
assertEquals(builder.toString(), first + second);
}
public void testMultiReader_bits() throws IOException
{
String first = "my first string";
String second = "another little string";
StringReader one = new StringReader(first);
StringReader two = new StringReader(second);
Reader multiReader = new MultiReader(one, two);
StringBuilder builder = new StringBuilder();
for (int chunk = 1; chunk < 100; chunk++)
{
char[] c = new char[chunk];
int i = 0;
while (i != -1)
{
i = multiReader.read(c);
for (int j = 0; j < i; j++)
{
builder.append(c[j]);
}
}
assertEquals(builder.toString(), first + second);
}
}
public void testSkip() throws IOException
{
String first = "my first string";
String second = "another little string";
StringReader one = new StringReader(first);
StringReader two = new StringReader(second);
Reader multiReader = new MultiReader(one, two);
multiReader.skip(3);
String all = first + second;
assertEquals((char)multiReader.read(), all.charAt(3));
multiReader.skip(15);
assertEquals((char)multiReader.read(), all.charAt(3+15+1));
}
}

View File

@@ -1,14 +1,31 @@
/* Generated By:JavaCC: Do not edit this line. QueryParser.java */
package org.alfresco.repo.search.impl.lucene;
import java.io.IOException;
import java.io.StringReader;
import java.text.DateFormat;
import java.util.ArrayList;
import java.util.Calendar;
import java.util.Date;
import java.util.List;
import java.util.Locale;
import java.util.Vector;
import java.io.*;
import java.text.*;
import java.util.*;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.document.DateField;
import org.apache.lucene.document.DateTools;
import org.apache.lucene.index.Term;
import org.apache.lucene.analysis.*;
import org.apache.lucene.document.*;
import org.apache.lucene.search.*;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.FuzzyQuery;
import org.apache.lucene.search.MultiPhraseQuery;
import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.PrefixQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.RangeQuery;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.WildcardQuery;
import org.apache.lucene.util.Parameter;
/**

View File

@@ -1,14 +1,5 @@
/* Generated By:JavaCC: Do not edit this line. QueryParserTokenManager.java */
package org.alfresco.repo.search.impl.lucene;
import java.util.Vector;
import java.io.*;
import java.text.*;
import java.util.*;
import org.apache.lucene.index.Term;
import org.apache.lucene.analysis.*;
import org.apache.lucene.document.*;
import org.apache.lucene.search.*;
import org.apache.lucene.util.Parameter;
public class QueryParserTokenManager implements QueryParserConstants
{

View File

@@ -18,7 +18,6 @@ package org.alfresco.repo.search.impl.lucene.analysis;
import java.util.LinkedList;
import java.util.Queue;
import java.util.Stack;
import java.util.StringTokenizer;
import org.apache.lucene.analysis.TokenFilter;

View File

@@ -22,7 +22,6 @@ import java.io.Reader;
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.WhitespaceTokenizer;
import org.apache.lucene.analysis.standard.StandardTokenizer;
/**
* Simple tokeniser for floats.

View File

@@ -22,7 +22,6 @@ import java.io.Reader;
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.WhitespaceTokenizer;
import org.apache.lucene.analysis.standard.StandardTokenizer;
/**
* Simple tokeniser for integers.

View File

@@ -19,11 +19,9 @@ package org.alfresco.repo.search.impl.lucene.analysis;
import java.io.IOException;
import java.io.Reader;
import org.alfresco.error.AlfrescoRuntimeException;
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.WhitespaceTokenizer;
import org.apache.lucene.analysis.standard.StandardTokenizer;
/**
* Simple tokeniser for longs.

View File

@@ -0,0 +1,9 @@
package org.alfresco.repo.search.impl.lucene.analysis;
public class LowerCaseVerbatimAnalyser extends VerbatimAnalyser
{
public LowerCaseVerbatimAnalyser()
{
super(true);
}
}

View File

@@ -7,7 +7,7 @@ import java.util.HashMap;
import java.util.Locale;
import org.alfresco.i18n.I18NUtil;
import org.alfresco.repo.search.impl.lucene.LuceneQueryParser;
import org.alfresco.repo.search.MLAnalysisMode;
import org.alfresco.service.cmr.dictionary.DataTypeDefinition;
import org.alfresco.service.cmr.dictionary.DictionaryService;
import org.apache.log4j.Logger;
@@ -22,9 +22,12 @@ public class MLAnalayser extends Analyzer
private HashMap<Locale, Analyzer> analysers = new HashMap<Locale, Analyzer>();
public MLAnalayser(DictionaryService dictionaryService)
private MLAnalysisMode mlAnalaysisMode;
public MLAnalayser(DictionaryService dictionaryService, MLAnalysisMode mlAnalaysisMode)
{
this.dictionaryService = dictionaryService;
this.mlAnalaysisMode = mlAnalaysisMode;
}
@Override
@@ -107,7 +110,7 @@ public class MLAnalayser extends Analyzer
}
Locale locale = new Locale(language, country, varient);
// leave the reader where it is ....
return new MLTokenDuplicator(getAnalyser(locale).tokenStream(fieldName, breader), locale, breader);
return new MLTokenDuplicator(getAnalyser(locale).tokenStream(fieldName, breader), locale, breader, mlAnalaysisMode);
}
else
{

View File

@@ -3,39 +3,36 @@ package org.alfresco.repo.search.impl.lucene.analysis;
import java.io.IOException;
import java.io.Reader;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Locale;
import org.alfresco.repo.search.MLAnalysisMode;
import org.apache.log4j.Logger;
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
/**
* Create duplicate tokens for multilingual varients
*
* The forms are
*
* Tokens:
* Token - all languages
* {fr}Token - if a language is specified
* {fr_CA}Token - if a language and country is specified
* {fr_CA_Varient}Token - for all three
* Create duplicate tokens for multilingual varients The forms are Tokens: Token - all languages {fr}Token - if a
* language is specified {fr_CA}Token - if a language and country is specified {fr_CA_Varient}Token - for all three
* {fr__Varient}Token - for a language varient with no country
*
* @author andyh
*
*/
public class MLTokenDuplicator extends Tokenizer
{
private static Logger s_logger = Logger.getLogger(MLTokenDuplicator.class);
TokenStream source;
Locale locale;
Iterator<Token> it;
ArrayList<String> prefixes;
HashSet<String> prefixes;
public MLTokenDuplicator(TokenStream source, Locale locale, Reader reader)
public MLTokenDuplicator(TokenStream source, Locale locale, Reader reader, MLAnalysisMode mlAnalaysisMode)
{
super(reader);
this.source = source;
@@ -45,27 +42,92 @@ public class MLTokenDuplicator extends Tokenizer
boolean c = locale.getCountry().length() != 0;
boolean v = locale.getVariant().length() != 0;
prefixes = new ArrayList<String>(4);
prefixes.add("");
prefixes = new HashSet<String>(4);
if (mlAnalaysisMode.includesAll())
{
prefixes.add("");
}
if (l)
if (mlAnalaysisMode.includesExact())
{
StringBuffer result = new StringBuffer();
result.append("{").append(locale.getLanguage()).append("}");
result.append("{").append(locale.toString()).append("}");
prefixes.add(result.toString());
result.deleteCharAt(result.length()-1);
}
if (c || (l && v))
if (mlAnalaysisMode.includesContaining())
{
if (v)
{
result.append('_').append(locale.getCountry()).append("}");
Locale noVarient = new Locale(locale.getLanguage(), locale.getCountry(), "");
StringBuffer result = new StringBuffer();
result.append("{").append(noVarient.toString()).append("}");
prefixes.add(result.toString());
result.deleteCharAt(result.length()-1);
}
if (v && (l || c))
{
result.append('_').append(locale.getVariant()).append("}");
Locale noCountry = new Locale(locale.getLanguage(), "", "");
result = new StringBuffer();
result.append("{").append(noCountry.toString()).append("}");
prefixes.add(result.toString());
}
if (c)
{
Locale noCountry = new Locale(locale.getLanguage(), "", "");
StringBuffer result = new StringBuffer();
result.append("{").append(noCountry.toString()).append("}");
prefixes.add(result.toString());
}
}
if (mlAnalaysisMode.includesContained())
{
// varients have not contained
if (!v)
{
if (!c)
{
if (!l)
{
// All
for (Locale toAdd : Locale.getAvailableLocales())
{
StringBuffer result = new StringBuffer();
result.append("{").append(toAdd.toString()).append("}");
prefixes.add(result.toString());
}
}
else
{
// All that match language
for (Locale toAdd : Locale.getAvailableLocales())
{
if (locale.getLanguage().equals(toAdd.getLanguage()))
{
StringBuffer result = new StringBuffer();
result.append("{").append(toAdd.toString()).append("}");
prefixes.add(result.toString());
}
}
}
}
else
{
// All that match language and country
for (Locale toAdd : Locale.getAvailableLocales())
{
if ((locale.getLanguage().equals(toAdd.getLanguage()))
&& (locale.getCountry().equals(toAdd.getCountry())))
{
StringBuffer result = new StringBuffer();
result.append("{").append(toAdd.toString()).append("}");
prefixes.add(result.toString());
}
}
}
}
}
if(s_logger.isDebugEnabled())
{
s_logger.debug("Locale "+ locale +" using "+mlAnalaysisMode+" is "+prefixes);
}
}
@@ -81,7 +143,7 @@ public class MLTokenDuplicator extends Tokenizer
{
return null;
}
if(it.hasNext())
if (it.hasNext())
{
return it.next();
}
@@ -101,10 +163,10 @@ public class MLTokenDuplicator extends Tokenizer
}
ArrayList<Token> tokens = new ArrayList<Token>(prefixes.size());
for(String prefix : prefixes)
for (String prefix : prefixes)
{
Token newToken = new Token(prefix+token.termText(), token.startOffset(), token.endOffset(), token.type());
if(tokens.size() == 0)
Token newToken = new Token(prefix + token.termText(), token.startOffset(), token.endOffset(), token.type());
if (tokens.size() == 0)
{
newToken.setPositionIncrement(token.getPositionIncrement());
}
@@ -118,5 +180,4 @@ public class MLTokenDuplicator extends Tokenizer
}
}

View File

@@ -19,11 +19,11 @@ package org.alfresco.repo.search.impl.lucene.analysis;
import java.io.IOException;
import java.io.StringReader;
import junit.framework.TestCase;
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.TokenStream;
import junit.framework.TestCase;
public class PathTokenFilterTest extends TestCase
{

View File

@@ -5,18 +5,23 @@ import java.io.Reader;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
public class VerbatimAnalyser
extends Analyzer
public class VerbatimAnalyser extends Analyzer
{
boolean lowerCase;
public VerbatimAnalyser()
{
lowerCase = false;
}
public VerbatimAnalyser(boolean lowerCase)
{
super();
this.lowerCase = lowerCase;
}
public TokenStream tokenStream(String fieldName, Reader reader)
{
return new VerbatimTokenFilter(reader);
return new VerbatimTokenFilter(reader, lowerCase);
}
}

View File

@@ -10,9 +10,12 @@ public class VerbatimTokenFilter extends Tokenizer
{
boolean readInput = true;
VerbatimTokenFilter(Reader in)
boolean lowerCase;
VerbatimTokenFilter(Reader in, boolean lowerCase)
{
super(in);
this.lowerCase = lowerCase;
}
@Override
@@ -31,6 +34,10 @@ public class VerbatimTokenFilter extends Tokenizer
}
String token = buffer.toString();
if(lowerCase)
{
token = token.toLowerCase();
}
return new Token(token, 0, token.length() - 1, "VERBATIM");
}
else

View File

@@ -18,8 +18,10 @@ package org.alfresco.service.cmr.search;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Locale;
import java.util.Set;
import org.alfresco.repo.search.MLAnalysisMode;
import org.alfresco.service.cmr.repository.Path;
@@ -95,6 +97,10 @@ public class SearchParameters extends SearchStatement
private int limit = DEFAULT_LIMIT;
private HashSet<String> allAttributes = new HashSet<String>();
private HashSet<String> textAttributes = new HashSet<String>();
/**
* Default constructor
*/
@@ -352,6 +358,52 @@ public class SearchParameters extends SearchStatement
return Collections.unmodifiableList(locales);
}
/**
* Add a locale to include for multi-lingual text searches.
* If non are set, the default is to use the user's locale.
*
* @param locale
*/
public void addTextAttribute(String attribute)
{
textAttributes.add(attribute);
}
/**
* Get the locales used for multi-lingual text searches.
*
* @return
*/
public Set<String> getTextAttributes()
{
return Collections.unmodifiableSet(textAttributes);
}
/**
* Add a locale to include for multi-lingual text searches.
* If non are set, the default is to use the user's locale.
*
* @param locale
*/
public void addAllAttribute(String attribute)
{
allAttributes.add(attribute);
}
/**
* Get the locales used for multi-lingual text searches.
*
* @return
*/
public Set<String> getAllAttributes()
{
return Collections.unmodifiableSet(allAttributes);
}
/**
* A helper class for sort definition. Encapsulated using the lucene sortType, field name and a flag for
* ascending/descending.