Next stage of multi lingual searches

git-svn-id: https://svn.alfresco.com/repos/alfresco-enterprise/alfresco/HEAD/root@4609 c4b6b30b-aa2e-2d43-bbcb-ca4b014f7261
This commit is contained in:
Andrew Hind
2006-12-14 16:09:43 +00:00
parent fee38b6eb3
commit 9a5df4b303
27 changed files with 1365 additions and 554 deletions

View File

@@ -13,30 +13,144 @@ public enum MLAnalysisMode
/** /**
* Only exact locale is used. * Only exact locale is used.
*/ */
LOCALE_ONLY, LOCALE_ONLY
{
public boolean includesAll()
{
return false;
}
public boolean includesContained()
{
return false;
}
public boolean includesContaining()
{
return false;
}
public boolean includesExact()
{
return true;
}
},
/** /**
* Only the exact locale and no local === all lnaguages * Only the exact locale and no local === all lnaguages
*/ */
LOCALE_AND_ALL, LOCALE_AND_ALL
{
public boolean includesAll()
{
return true;
}
public boolean includesContained()
{
return false;
}
public boolean includesContaining()
{
return false;
}
public boolean includesExact()
{
return true;
}
},
/** /**
* Expand the locale to include all the locales that contain it. * Expand the locale to include all the locales that contain it.
* en_GB would be en_GB, en, but not all languages * en_GB would be en_GB, en, but not all languages
*/ */
LOCALE_AND_ALL_CONTAINING_LOCALES, LOCALE_AND_ALL_CONTAINING_LOCALES
{
public boolean includesAll()
{
return false;
}
public boolean includesContained()
{
return false;
}
public boolean includesContaining()
{
return true;
}
public boolean includesExact()
{
return true;
}
},
/** /**
* Expand the locale to include all the locales that contain it. * Expand the locale to include all the locales that contain it.
* en_GB would be en_GB, en, and all. * en_GB would be en_GB, en, and all.
*/ */
LOCALE_AND_ALL_CONTAINING_LOCALES_AND_ALL, LOCALE_AND_ALL_CONTAINING_LOCALES_AND_ALL
{
public boolean includesAll()
{
return true;
}
public boolean includesContained()
{
return false;
}
public boolean includesContaining()
{
return true;
}
public boolean includesExact()
{
return true;
}
},
/** /**
* Expand to all the locales that are contained by this. * Expand to all the locales that are contained by this.
* en would expand to en, en_GB, en_US, .... * en would expand to en, en_GB, en_US, ....
*/ */
LOCAL_AND_ALL_CONTAINED_LOCALES; LOCALE_AND_ALL_CONTAINED_LOCALES
{
public boolean includesAll()
{
return false;
}
public boolean includesContained()
{
return true;
}
public boolean includesContaining()
{
return false;
}
public boolean includesExact()
{
return true;
}
},
/**
* No prefix only
*/
ALL_ONLY
{
public boolean includesAll()
{
return true;
}
public boolean includesContained()
{
return false;
}
public boolean includesContaining()
{
return false;
}
public boolean includesExact()
{
return false;
}
};
public static MLAnalysisMode getMLAnalysisMode(String mode) public static MLAnalysisMode getMLAnalysisMode(String mode)
{ {
@@ -49,4 +163,13 @@ public enum MLAnalysisMode
} }
throw new AlfrescoRuntimeException("Unknown ML Analysis mode "+mode); throw new AlfrescoRuntimeException("Unknown ML Analysis mode "+mode);
} }
public abstract boolean includesAll();
public abstract boolean includesContained();
public abstract boolean includesContaining();
public abstract boolean includesExact();
} }

View File

@@ -17,7 +17,8 @@ package org.alfresco.repo.search.impl.lucene;
* limitations under the License. * limitations under the License.
*/ */
import java.io.*; import java.io.IOException;
import java.io.Reader;
/** An efficient implementation of JavaCC's CharStream interface. <p>Note that /** An efficient implementation of JavaCC's CharStream interface. <p>Note that
* this does not do line-number counting, but instead keeps track of the * this does not do line-number counting, but instead keeps track of the

View File

@@ -21,21 +21,23 @@ import java.util.HashMap;
import java.util.Map; import java.util.Map;
import org.alfresco.repo.search.MLAnalysisMode; import org.alfresco.repo.search.MLAnalysisMode;
import org.alfresco.repo.search.impl.lucene.analysis.AlfrescoStandardAnalyser;
import org.alfresco.repo.search.impl.lucene.analysis.LongAnalyser;
import org.alfresco.repo.search.impl.lucene.analysis.MLAnalayser; import org.alfresco.repo.search.impl.lucene.analysis.MLAnalayser;
import org.alfresco.repo.search.impl.lucene.analysis.PathAnalyser; import org.alfresco.repo.search.impl.lucene.analysis.PathAnalyser;
import org.alfresco.repo.search.impl.lucene.analysis.VerbatimAnalyser; import org.alfresco.repo.search.impl.lucene.analysis.VerbatimAnalyser;
import org.alfresco.service.cmr.dictionary.DataTypeDefinition;
import org.alfresco.service.cmr.dictionary.DictionaryService; import org.alfresco.service.cmr.dictionary.DictionaryService;
import org.alfresco.service.cmr.dictionary.PropertyDefinition; import org.alfresco.service.cmr.dictionary.PropertyDefinition;
import org.alfresco.service.cmr.dictionary.DataTypeDefinition;
import org.alfresco.service.namespace.QName; import org.alfresco.service.namespace.QName;
import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.WhitespaceAnalyzer; import org.apache.lucene.analysis.WhitespaceAnalyzer;
import org.alfresco.repo.search.impl.lucene.analysis.AlfrescoStandardAnalyser;
/** /**
* Analyse properties according to the property definition. The default is to use the standard tokeniser. The tokeniser should not have been called when indexing properties that * Analyse properties according to the property definition. The default is to use the standard tokeniser. The tokeniser
* require no tokenisation. (tokenise should be set to false when adding the field to the document) * should not have been called when indexing properties that require no tokenisation. (tokenise should be set to false
* when adding the field to the document)
* *
* @author andyh * @author andyh
*/ */
@@ -82,7 +84,7 @@ public class LuceneAnalyser extends Analyzer
// Treat multilingual as a special case. // Treat multilingual as a special case.
// If multilingual then we need to find the correct tokeniser. // If multilingual then we need to find the correct tokeniser.
// This is done dynamically by reading a language code at the start of the reader. // This is done dynamically by reading a language code at the start of the reader.
if (fieldName.startsWith("@") && !fieldName.endsWith(".mimetype")) if (fieldName.startsWith("@"))
{ {
QName propertyQName = QName.createQName(fieldName.substring(1)); QName propertyQName = QName.createQName(fieldName.substring(1));
PropertyDefinition propertyDef = dictionaryService.getProperty(propertyQName); PropertyDefinition propertyDef = dictionaryService.getProperty(propertyQName);
@@ -90,7 +92,7 @@ public class LuceneAnalyser extends Analyzer
{ {
if (propertyDef.getDataType().getName().equals(DataTypeDefinition.MLTEXT)) if (propertyDef.getDataType().getName().equals(DataTypeDefinition.MLTEXT))
{ {
MLAnalayser analyser = new MLAnalayser(dictionaryService); MLAnalayser analyser = new MLAnalayser(dictionaryService, mlAlaysisMode);
return analyser.tokenStream(fieldName, reader); return analyser.tokenStream(fieldName, reader);
} }
} }
@@ -133,17 +135,20 @@ public class LuceneAnalyser extends Analyzer
{ {
analyser = new WhitespaceAnalyzer(); analyser = new WhitespaceAnalyzer();
} }
else if (fieldName.equals("TEXT"))
{
DataTypeDefinition dataType = dictionaryService.getDataType(DataTypeDefinition.TEXT);
analyser = loadAnalyzer(dataType);
}
else if (fieldName.startsWith("@")) else if (fieldName.startsWith("@"))
{ {
if (fieldName.endsWith(".mimetype")) if (fieldName.endsWith(".mimetype"))
{ {
analyser = new VerbatimAnalyser(); analyser = new VerbatimAnalyser();
} }
else if (fieldName.endsWith(".size"))
{
analyser = new LongAnalyser();
}
else if (fieldName.endsWith(".locale"))
{
analyser = new VerbatimAnalyser(true);
}
else else
{ {
QName propertyQName = QName.createQName(fieldName.substring(1)); QName propertyQName = QName.createQName(fieldName.substring(1));
@@ -153,7 +158,18 @@ public class LuceneAnalyser extends Analyzer
if (propertyDef.isTokenisedInIndex()) if (propertyDef.isTokenisedInIndex())
{ {
DataTypeDefinition dataType = propertyDef.getDataType(); DataTypeDefinition dataType = propertyDef.getDataType();
analyser = loadAnalyzer(dataType); if (dataType.getName().equals(DataTypeDefinition.CONTENT))
{
analyser = new MLAnalayser(dictionaryService, MLAnalysisMode.ALL_ONLY);
}
else if (dataType.getName().equals(DataTypeDefinition.TEXT))
{
analyser = new MLAnalayser(dictionaryService, MLAnalysisMode.ALL_ONLY);
}
else
{
analyser = loadAnalyzer(dataType);
}
} }
else else
{ {
@@ -208,7 +224,8 @@ public class LuceneAnalyser extends Analyzer
} }
/** /**
* For multilingual fields we separate the tokens for each instance to break phrase queries spanning different languages etc. * For multilingual fields we separate the tokens for each instance to break phrase queries spanning different
* languages etc.
*/ */
@Override @Override
public int getPositionIncrementGap(String fieldName) public int getPositionIncrementGap(String fieldName)

View File

@@ -21,7 +21,6 @@ import java.io.IOException;
import java.util.Set; import java.util.Set;
import org.alfresco.repo.search.IndexerException; import org.alfresco.repo.search.IndexerException;
import org.alfresco.repo.search.MLAnalysisMode;
import org.alfresco.repo.search.impl.lucene.index.IndexInfo; import org.alfresco.repo.search.impl.lucene.index.IndexInfo;
import org.alfresco.repo.search.impl.lucene.index.TransactionStatus; import org.alfresco.repo.search.impl.lucene.index.TransactionStatus;
import org.alfresco.repo.search.impl.lucene.index.IndexInfo.LockWork; import org.alfresco.repo.search.impl.lucene.index.IndexInfo.LockWork;
@@ -32,7 +31,6 @@ import org.apache.log4j.Logger;
import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Searcher;
/** /**
* Common support for abstracting the lucene indexer from its configuration and management requirements. * Common support for abstracting the lucene indexer from its configuration and management requirements.

View File

@@ -40,7 +40,6 @@ import org.alfresco.service.cmr.search.ResultSetRow;
import org.alfresco.service.namespace.NamespacePrefixResolver; import org.alfresco.service.namespace.NamespacePrefixResolver;
import org.alfresco.service.namespace.QName; import org.alfresco.service.namespace.QName;
import org.alfresco.util.ISO9075; import org.alfresco.util.ISO9075;
import org.bouncycastle.crypto.paddings.ISO7816d4Padding;
public class LuceneCategoryServiceImpl implements CategoryService public class LuceneCategoryServiceImpl implements CategoryService
{ {

View File

@@ -131,9 +131,9 @@ public class LuceneIndexerAndSearcherFactory2 implements LuceneIndexerAndSearche
private String lockDirectory; private String lockDirectory;
private MLAnalysisMode defaultMLIndexAnalysisMode = MLAnalysisMode.LOCALE_ONLY; private MLAnalysisMode defaultMLIndexAnalysisMode = MLAnalysisMode.LOCALE_AND_ALL;
private MLAnalysisMode defaultMLSearchAnalysisMode = MLAnalysisMode.LOCALE_AND_ALL; private MLAnalysisMode defaultMLSearchAnalysisMode = MLAnalysisMode.LOCALE_AND_ALL_CONTAINING_LOCALES_AND_ALL;
/** /**
* Private constructor for the singleton TODO: FIt in with IOC * Private constructor for the singleton TODO: FIt in with IOC

View File

@@ -19,7 +19,9 @@ package org.alfresco.repo.search.impl.lucene;
import java.io.IOException; import java.io.IOException;
import java.io.InputStream; import java.io.InputStream;
import java.io.InputStreamReader; import java.io.InputStreamReader;
import java.io.Reader;
import java.io.Serializable; import java.io.Serializable;
import java.io.StringReader;
import java.io.UnsupportedEncodingException; import java.io.UnsupportedEncodingException;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Collection; import java.util.Collection;
@@ -85,7 +87,6 @@ import org.apache.lucene.search.BooleanClause.Occur;
* The implementation of the lucene based indexer. Supports basic transactional behaviour if used on its own. * The implementation of the lucene based indexer. Supports basic transactional behaviour if used on its own.
* *
* @author andyh * @author andyh
*
*/ */
public class LuceneIndexerImpl2 extends LuceneBase2 implements LuceneIndexer2 public class LuceneIndexerImpl2 extends LuceneBase2 implements LuceneIndexer2
{ {
@@ -119,9 +120,8 @@ public class LuceneIndexerImpl2 extends LuceneBase2 implements LuceneIndexer2
private long maxAtomicTransformationTime = 20; private long maxAtomicTransformationTime = 20;
/** /**
* A list of all deletions we have made - at merge these deletions need to be made against the main index. * A list of all deletions we have made - at merge these deletions need to be made against the main index. TODO:
* * Consider if this information needs to be persisted for recovery
* TODO: Consider if this information needs to be persisted for recovery
*/ */
private Set<NodeRef> deletions = new LinkedHashSet<NodeRef>(); private Set<NodeRef> deletions = new LinkedHashSet<NodeRef>();
@@ -140,8 +140,8 @@ public class LuceneIndexerImpl2 extends LuceneBase2 implements LuceneIndexer2
private boolean isModified = false; private boolean isModified = false;
/** /**
* Flag to indicte if we are doing an in transactional delta or a batch update to the index. If true, we are just fixing up non atomically indexed things from one or more other * Flag to indicte if we are doing an in transactional delta or a batch update to the index. If true, we are just
* updates. * fixing up non atomically indexed things from one or more other updates.
*/ */
private Boolean isFTSUpdate = null; private Boolean isFTSUpdate = null;
@@ -168,7 +168,6 @@ public class LuceneIndexerImpl2 extends LuceneBase2 implements LuceneIndexer2
/** /**
* Default construction * Default construction
*
*/ */
LuceneIndexerImpl2() LuceneIndexerImpl2()
{ {
@@ -216,7 +215,6 @@ public class LuceneIndexerImpl2 extends LuceneBase2 implements LuceneIndexer2
/** /**
* Utility method to check we are in the correct state to do work Also keeps track of the dirty flag. * Utility method to check we are in the correct state to do work Also keeps track of the dirty flag.
*
*/ */
private void checkAbleToDoWork(boolean isFTS, boolean isModified) private void checkAbleToDoWork(boolean isFTS, boolean isModified)
@@ -508,7 +506,7 @@ public class LuceneIndexerImpl2 extends LuceneBase2 implements LuceneIndexer2
case Status.STATUS_ACTIVE: case Status.STATUS_ACTIVE:
// special case - commit from active // special case - commit from active
prepare(); prepare();
// drop through to do the commit; // drop through to do the commit;
default: default:
if (status != Status.STATUS_PREPARED) if (status != Status.STATUS_PREPARED)
{ {
@@ -585,37 +583,36 @@ public class LuceneIndexerImpl2 extends LuceneBase2 implements LuceneIndexer2
for (Helper helper : toFTSIndex) for (Helper helper : toFTSIndex)
{ {
//BooleanQuery query = new BooleanQuery(); // BooleanQuery query = new BooleanQuery();
//query.add(new TermQuery(new Term("ID", helper.nodeRef.toString())), true, false); // query.add(new TermQuery(new Term("ID", helper.nodeRef.toString())), true, false);
//query.add(new TermQuery(new Term("TX", helper.tx)), true, false); // query.add(new TermQuery(new Term("TX", helper.tx)), true, false);
//query.add(new TermQuery(new Term("ISNODE", "T")), false, false); // query.add(new TermQuery(new Term("ISNODE", "T")), false, false);
deletions.add(helper.nodeRef); deletions.add(helper.nodeRef);
// try
// try // {
// { // Hits hits = mainSearcher.search(query);
// Hits hits = mainSearcher.search(query); // if (hits.length() > 0)
// if (hits.length() > 0) // {
// { // for (int i = 0; i < hits.length(); i++)
// for (int i = 0; i < hits.length(); i++) // {
// { // mainReader.delete(hits.id(i));
// mainReader.delete(hits.id(i)); // }
// } // }
// } // else
// else // {
// { // hits = deltaSearcher.search(query);
// hits = deltaSearcher.search(query); // for (int i = 0; i < hits.length(); i++)
// for (int i = 0; i < hits.length(); i++) // {
// { // deltaReader.delete(hits.id(i));
// deltaReader.delete(hits.id(i)); // }
// } // }
// } // }
// } // catch (IOException e)
// catch (IOException e) // {
// { // throw new LuceneIndexException("Failed to delete an FTS update from the original index", e);
// throw new LuceneIndexException("Failed to delete an FTS update from the original index", e); // }
// }
} }
} }
@@ -688,11 +685,8 @@ public class LuceneIndexerImpl2 extends LuceneBase2 implements LuceneIndexer2
} }
/** /**
* Prepare to commit * Prepare to commit At the moment this makes sure we have all the locks TODO: This is not doing proper
* * serialisation against the index as would a data base transaction.
* At the moment this makes sure we have all the locks
*
* TODO: This is not doing proper serialisation against the index as would a data base transaction.
* *
* @return * @return
*/ */
@@ -766,7 +760,6 @@ public class LuceneIndexerImpl2 extends LuceneBase2 implements LuceneIndexer2
/** /**
* Roll back the index changes (this just means they are never added) * Roll back the index changes (this just means they are never added)
*
*/ */
public void rollback() throws LuceneIndexException public void rollback() throws LuceneIndexException
@@ -781,7 +774,7 @@ public class LuceneIndexerImpl2 extends LuceneBase2 implements LuceneIndexer2
case Status.STATUS_ROLLEDBACK: case Status.STATUS_ROLLEDBACK:
throw new IndexerException("Unable to roll back: Transaction is already rolled back"); throw new IndexerException("Unable to roll back: Transaction is already rolled back");
case Status.STATUS_COMMITTING: case Status.STATUS_COMMITTING:
// Can roll back during commit // Can roll back during commit
default: default:
status = Status.STATUS_ROLLING_BACK; status = Status.STATUS_ROLLING_BACK;
// if (isModified()) // if (isModified())
@@ -807,8 +800,8 @@ public class LuceneIndexerImpl2 extends LuceneBase2 implements LuceneIndexer2
} }
/** /**
* Mark this index for roll back only. This action can not be reversed. It will reject all other work and only allow roll back. * Mark this index for roll back only. This action can not be reversed. It will reject all other work and only allow
* * roll back.
*/ */
public void setRollbackOnly() public void setRollbackOnly()
@@ -1242,7 +1235,8 @@ public class LuceneIndexerImpl2 extends LuceneBase2 implements LuceneIndexer2
Document xdoc = new Document(); Document xdoc = new Document();
xdoc.add(new Field("ID", nodeRef.toString(), Field.Store.YES, Field.Index.UN_TOKENIZED, Field.TermVector.NO)); xdoc.add(new Field("ID", nodeRef.toString(), Field.Store.YES, Field.Index.UN_TOKENIZED, Field.TermVector.NO));
xdoc.add(new Field("TX", nodeStatus.getChangeTxnId(), Field.Store.YES, Field.Index.UN_TOKENIZED, Field.TermVector.NO)); xdoc.add(new Field("TX", nodeStatus.getChangeTxnId(), Field.Store.YES, Field.Index.UN_TOKENIZED,
Field.TermVector.NO));
boolean isAtomic = true; boolean isAtomic = true;
for (QName propertyName : properties.keySet()) for (QName propertyName : properties.keySet())
{ {
@@ -1298,8 +1292,10 @@ public class LuceneIndexerImpl2 extends LuceneBase2 implements LuceneIndexer2
qNameBuffer.append(";/"); qNameBuffer.append(";/");
} }
qNameBuffer.append(ISO9075.getXPathName(qNameRef.getQName())); qNameBuffer.append(ISO9075.getXPathName(qNameRef.getQName()));
xdoc.add(new Field("PARENT", qNameRef.getParentRef().toString(), Field.Store.YES, Field.Index.UN_TOKENIZED, Field.TermVector.NO)); xdoc.add(new Field("PARENT", qNameRef.getParentRef().toString(), Field.Store.YES,
xdoc.add(new Field("ASSOCTYPEQNAME", ISO9075.getXPathName(qNameRef.getTypeQName()), Field.Store.YES, Field.Index.NO, Field.TermVector.NO)); Field.Index.UN_TOKENIZED, Field.TermVector.NO));
xdoc.add(new Field("ASSOCTYPEQNAME", ISO9075.getXPathName(qNameRef.getTypeQName()),
Field.Store.YES, Field.Index.NO, Field.TermVector.NO));
xdoc.add(new Field("LINKASPECT", (pair.getSecond() == null) ? "" : ISO9075.getXPathName(pair xdoc.add(new Field("LINKASPECT", (pair.getSecond() == null) ? "" : ISO9075.getXPathName(pair
.getSecond()), Field.Store.YES, Field.Index.UN_TOKENIZED, Field.TermVector.NO)); .getSecond()), Field.Store.YES, Field.Index.UN_TOKENIZED, Field.TermVector.NO));
} }
@@ -1322,17 +1318,22 @@ public class LuceneIndexerImpl2 extends LuceneBase2 implements LuceneIndexer2
if (directPaths.contains(pair.getFirst())) if (directPaths.contains(pair.getFirst()))
{ {
Document directoryEntry = new Document(); Document directoryEntry = new Document();
directoryEntry.add(new Field("ID", nodeRef.toString(), Field.Store.YES, Field.Index.UN_TOKENIZED, Field.TermVector.NO)); directoryEntry.add(new Field("ID", nodeRef.toString(), Field.Store.YES,
directoryEntry.add(new Field("PATH", pathString, Field.Store.YES, Field.Index.TOKENIZED, Field.TermVector.NO)); Field.Index.UN_TOKENIZED, Field.TermVector.NO));
directoryEntry.add(new Field("PATH", pathString, Field.Store.YES, Field.Index.TOKENIZED,
Field.TermVector.NO));
for (NodeRef parent : getParents(pair.getFirst())) for (NodeRef parent : getParents(pair.getFirst()))
{ {
directoryEntry.add(new Field("ANCESTOR", parent.toString(), Field.Store.NO, Field.Index.UN_TOKENIZED, Field.TermVector.NO)); directoryEntry.add(new Field("ANCESTOR", parent.toString(), Field.Store.NO,
Field.Index.UN_TOKENIZED, Field.TermVector.NO));
} }
directoryEntry.add(new Field("ISCONTAINER", "T", Field.Store.YES, Field.Index.UN_TOKENIZED, Field.TermVector.NO)); directoryEntry.add(new Field("ISCONTAINER", "T", Field.Store.YES, Field.Index.UN_TOKENIZED,
Field.TermVector.NO));
if (isCategory(getDictionaryService().getType(nodeService.getType(nodeRef)))) if (isCategory(getDictionaryService().getType(nodeService.getType(nodeRef))))
{ {
directoryEntry.add(new Field("ISCATEGORY", "T", Field.Store.YES, Field.Index.UN_TOKENIZED, Field.TermVector.NO)); directoryEntry.add(new Field("ISCATEGORY", "T", Field.Store.YES,
Field.Index.UN_TOKENIZED, Field.TermVector.NO));
} }
docs.add(directoryEntry); docs.add(directoryEntry);
@@ -1350,7 +1351,8 @@ public class LuceneIndexerImpl2 extends LuceneBase2 implements LuceneIndexer2
xdoc.add(new Field("PATH", "", Field.Store.YES, Field.Index.TOKENIZED, Field.TermVector.NO)); xdoc.add(new Field("PATH", "", Field.Store.YES, Field.Index.TOKENIZED, Field.TermVector.NO));
xdoc.add(new Field("QNAME", "", Field.Store.YES, Field.Index.TOKENIZED, Field.TermVector.NO)); xdoc.add(new Field("QNAME", "", Field.Store.YES, Field.Index.TOKENIZED, Field.TermVector.NO));
xdoc.add(new Field("ISROOT", "T", Field.Store.NO, Field.Index.UN_TOKENIZED, Field.TermVector.NO)); xdoc.add(new Field("ISROOT", "T", Field.Store.NO, Field.Index.UN_TOKENIZED, Field.TermVector.NO));
xdoc.add(new Field("PRIMARYASSOCTYPEQNAME", ISO9075.getXPathName(ContentModel.ASSOC_CHILDREN), Field.Store.YES, Field.Index.NO, Field.TermVector.NO)); xdoc.add(new Field("PRIMARYASSOCTYPEQNAME", ISO9075.getXPathName(ContentModel.ASSOC_CHILDREN),
Field.Store.YES, Field.Index.NO, Field.TermVector.NO));
xdoc.add(new Field("ISNODE", "T", Field.Store.NO, Field.Index.UN_TOKENIZED, Field.TermVector.NO)); xdoc.add(new Field("ISNODE", "T", Field.Store.NO, Field.Index.UN_TOKENIZED, Field.TermVector.NO));
docs.add(xdoc); docs.add(xdoc);
@@ -1358,36 +1360,45 @@ public class LuceneIndexerImpl2 extends LuceneBase2 implements LuceneIndexer2
else else
// not a root node // not a root node
{ {
xdoc.add(new Field("QNAME", qNameBuffer.toString(),Field.Store.YES, Field.Index.TOKENIZED, Field.TermVector.NO)); xdoc.add(new Field("QNAME", qNameBuffer.toString(), Field.Store.YES, Field.Index.TOKENIZED,
Field.TermVector.NO));
// xdoc.add(new Field("PARENT", parentBuffer.toString(), true, true, // xdoc.add(new Field("PARENT", parentBuffer.toString(), true, true,
// true)); // true));
ChildAssociationRef primary = nodeService.getPrimaryParent(nodeRef); ChildAssociationRef primary = nodeService.getPrimaryParent(nodeRef);
xdoc.add(new Field("PRIMARYPARENT", primary.getParentRef().toString(), Field.Store.YES, Field.Index.UN_TOKENIZED, Field.TermVector.NO)); xdoc.add(new Field("PRIMARYPARENT", primary.getParentRef().toString(), Field.Store.YES,
xdoc.add(new Field("PRIMARYASSOCTYPEQNAME", ISO9075.getXPathName(primary.getTypeQName()), Field.Store.YES, Field.Index.NO, Field.TermVector.NO)); Field.Index.UN_TOKENIZED, Field.TermVector.NO));
xdoc.add(new Field("PRIMARYASSOCTYPEQNAME", ISO9075.getXPathName(primary.getTypeQName()), Field.Store.YES,
Field.Index.NO, Field.TermVector.NO));
QName typeQName = nodeService.getType(nodeRef); QName typeQName = nodeService.getType(nodeRef);
xdoc.add(new Field("TYPE", ISO9075.getXPathName(typeQName), Field.Store.YES, Field.Index.UN_TOKENIZED, Field.TermVector.NO)); xdoc.add(new Field("TYPE", ISO9075.getXPathName(typeQName), Field.Store.YES, Field.Index.UN_TOKENIZED,
Field.TermVector.NO));
for (QName classRef : nodeService.getAspects(nodeRef)) for (QName classRef : nodeService.getAspects(nodeRef))
{ {
xdoc.add(new Field("ASPECT", ISO9075.getXPathName(classRef), Field.Store.YES, Field.Index.UN_TOKENIZED, Field.TermVector.NO)); xdoc.add(new Field("ASPECT", ISO9075.getXPathName(classRef), Field.Store.YES, Field.Index.UN_TOKENIZED,
Field.TermVector.NO));
} }
xdoc.add(new Field("ISROOT", "F", Field.Store.NO, Field.Index.UN_TOKENIZED, Field.TermVector.NO)); xdoc.add(new Field("ISROOT", "F", Field.Store.NO, Field.Index.UN_TOKENIZED, Field.TermVector.NO));
xdoc.add(new Field("ISNODE", "T", Field.Store.NO, Field.Index.UN_TOKENIZED, Field.TermVector.NO)); xdoc.add(new Field("ISNODE", "T", Field.Store.NO, Field.Index.UN_TOKENIZED, Field.TermVector.NO));
if (isAtomic || indexAllProperties) if (isAtomic || indexAllProperties)
{ {
xdoc.add(new Field("FTSSTATUS", "Clean", Field.Store.NO, Field.Index.UN_TOKENIZED, Field.TermVector.NO)); xdoc
.add(new Field("FTSSTATUS", "Clean", Field.Store.NO, Field.Index.UN_TOKENIZED,
Field.TermVector.NO));
} }
else else
{ {
if (isNew) if (isNew)
{ {
xdoc.add(new Field("FTSSTATUS", "New", Field.Store.NO, Field.Index.UN_TOKENIZED, Field.TermVector.NO)); xdoc.add(new Field("FTSSTATUS", "New", Field.Store.NO, Field.Index.UN_TOKENIZED,
Field.TermVector.NO));
} }
else else
{ {
xdoc.add(new Field("FTSSTATUS", "Dirty", Field.Store.NO, Field.Index.UN_TOKENIZED, Field.TermVector.NO)); xdoc.add(new Field("FTSSTATUS", "Dirty", Field.Store.NO, Field.Index.UN_TOKENIZED,
Field.TermVector.NO));
} }
} }
@@ -1446,6 +1457,7 @@ public class LuceneIndexerImpl2 extends LuceneBase2 implements LuceneIndexer2
boolean atomic = true; boolean atomic = true;
boolean isContent = false; boolean isContent = false;
boolean isMultiLingual = false; boolean isMultiLingual = false;
boolean isText = false;
PropertyDefinition propertyDef = getDictionaryService().getProperty(propertyName); PropertyDefinition propertyDef = getDictionaryService().getProperty(propertyName);
if (propertyDef != null) if (propertyDef != null)
@@ -1456,6 +1468,7 @@ public class LuceneIndexerImpl2 extends LuceneBase2 implements LuceneIndexer2
atomic = propertyDef.isIndexedAtomically(); atomic = propertyDef.isIndexedAtomically();
isContent = propertyDef.getDataType().getName().equals(DataTypeDefinition.CONTENT); isContent = propertyDef.getDataType().getName().equals(DataTypeDefinition.CONTENT);
isMultiLingual = propertyDef.getDataType().getName().equals(DataTypeDefinition.MLTEXT); isMultiLingual = propertyDef.getDataType().getName().equals(DataTypeDefinition.MLTEXT);
isText = propertyDef.getDataType().getName().equals(DataTypeDefinition.TEXT);
} }
if (value == null) if (value == null)
{ {
@@ -1493,7 +1506,19 @@ public class LuceneIndexerImpl2 extends LuceneBase2 implements LuceneIndexer2
continue; continue;
} }
// store mimetype in index - even if content does not index it is useful // store mimetype in index - even if content does not index it is useful
doc.add(new Field(attributeName + ".mimetype", contentData.getMimetype(), Field.Store.NO, Field.Index.UN_TOKENIZED, Field.TermVector.NO)); // Added szie and locale - size needs to be tokenised correctly
doc.add(new Field(attributeName + ".mimetype", contentData.getMimetype(), Field.Store.NO,
Field.Index.UN_TOKENIZED, Field.TermVector.NO));
doc.add(new Field(attributeName + ".size", Long.toString(contentData.getSize()), Field.Store.NO,
Field.Index.TOKENIZED, Field.TermVector.NO));
// TODO: Use the node locale in preferanced to the system locale
Locale locale = contentData.getLocale();
if (locale == null)
{
locale = Locale.getDefault();
}
doc.add(new Field(attributeName + ".locale", locale.toString().toLowerCase(), Field.Store.NO,
Field.Index.UN_TOKENIZED, Field.TermVector.NO));
ContentReader reader = contentService.getReader(nodeRef, propertyName); ContentReader reader = contentService.getReader(nodeRef, propertyName);
if (reader != null && reader.exists()) if (reader != null && reader.exists())
@@ -1519,8 +1544,10 @@ public class LuceneIndexerImpl2 extends LuceneBase2 implements LuceneIndexer2
// don't index from the reader // don't index from the reader
readerReady = false; readerReady = false;
// not indexed: no transformation // not indexed: no transformation
//doc.add(new Field("TEXT", NOT_INDEXED_NO_TRANSFORMATION, Field.Store.NO, Field.Index.TOKENIZED, Field.TermVector.NO)); // doc.add(new Field("TEXT", NOT_INDEXED_NO_TRANSFORMATION, Field.Store.NO,
doc.add(new Field(attributeName, NOT_INDEXED_NO_TRANSFORMATION, Field.Store.NO, Field.Index.TOKENIZED, Field.TermVector.NO)); // Field.Index.TOKENIZED, Field.TermVector.NO));
doc.add(new Field(attributeName, NOT_INDEXED_NO_TRANSFORMATION, Field.Store.NO,
Field.Index.TOKENIZED, Field.TermVector.NO));
} }
else if (indexAtomicPropertiesOnly else if (indexAtomicPropertiesOnly
&& transformer.getTransformationTime() > maxAtomicTransformationTime) && transformer.getTransformationTime() > maxAtomicTransformationTime)
@@ -1554,8 +1581,10 @@ public class LuceneIndexerImpl2 extends LuceneBase2 implements LuceneIndexer2
readerReady = false; readerReady = false;
// not indexed: transformation // not indexed: transformation
// failed // failed
//doc.add(new Field("TEXT", NOT_INDEXED_TRANSFORMATION_FAILED, Field.Store.NO, Field.Index.TOKENIZED, Field.TermVector.NO)); // doc.add(new Field("TEXT", NOT_INDEXED_TRANSFORMATION_FAILED, Field.Store.NO,
doc.add(new Field(attributeName, NOT_INDEXED_TRANSFORMATION_FAILED, Field.Store.NO, Field.Index.TOKENIZED, Field.TermVector.NO)); // Field.Index.TOKENIZED, Field.TermVector.NO));
doc.add(new Field(attributeName, NOT_INDEXED_TRANSFORMATION_FAILED, Field.Store.NO,
Field.Index.TOKENIZED, Field.TermVector.NO));
} }
} }
} }
@@ -1564,16 +1593,16 @@ public class LuceneIndexerImpl2 extends LuceneBase2 implements LuceneIndexer2
if (readerReady) if (readerReady)
{ {
InputStreamReader isr = null; InputStreamReader isr = null;
//InputStream ris = reader.getContentInputStream(); // InputStream ris = reader.getContentInputStream();
//try // try
//{ // {
// isr = new InputStreamReader(ris, "UTF-8"); // isr = new InputStreamReader(ris, "UTF-8");
// } // }
//catch (UnsupportedEncodingException e) // catch (UnsupportedEncodingException e)
// { // {
// isr = new InputStreamReader(ris); // isr = new InputStreamReader(ris);
//} // }
//doc.add(new Field("TEXT", isr, Field.TermVector.NO)); // doc.add(new Field("TEXT", isr, Field.TermVector.NO));
InputStream ris = reader.getReader().getContentInputStream(); InputStream ris = reader.getReader().getContentInputStream();
try try
@@ -1584,10 +1613,11 @@ public class LuceneIndexerImpl2 extends LuceneBase2 implements LuceneIndexer2
{ {
isr = new InputStreamReader(ris); isr = new InputStreamReader(ris);
} }
StringBuilder builder = new StringBuilder();
doc.add(new Field("@" builder.append("\u0000").append(locale.toString()).append("\u0000");
+ QName.createQName(propertyName.getNamespaceURI(), ISO9075.encode(propertyName StringReader prefix = new StringReader(builder.toString());
.getLocalName())), isr, Field.TermVector.NO)); Reader multiReader = new MultiReader(prefix, isr);
doc.add(new Field(attributeName, multiReader, Field.TermVector.NO));
} }
} }
else else
@@ -1601,17 +1631,19 @@ public class LuceneIndexerImpl2 extends LuceneBase2 implements LuceneIndexer2
+ (reader == null ? " --- " : Boolean.toString(reader.exists()))); + (reader == null ? " --- " : Boolean.toString(reader.exists())));
} }
// not indexed: content missing // not indexed: content missing
doc.add(new Field("TEXT", NOT_INDEXED_CONTENT_MISSING, Field.Store.NO, Field.Index.TOKENIZED, Field.TermVector.NO)); doc.add(new Field("TEXT", NOT_INDEXED_CONTENT_MISSING, Field.Store.NO, Field.Index.TOKENIZED,
doc.add(new Field(attributeName, NOT_INDEXED_CONTENT_MISSING, Field.Store.NO, Field.Index.TOKENIZED, Field.TermVector.NO)); Field.TermVector.NO));
doc.add(new Field(attributeName, NOT_INDEXED_CONTENT_MISSING, Field.Store.NO,
Field.Index.TOKENIZED, Field.TermVector.NO));
} }
} }
else else
{ {
Field.Store fieldStore = store ? Field.Store.YES : Field.Store.NO; Field.Store fieldStore = store ? Field.Store.YES : Field.Store.NO;
Field.Index fieldIndex; Field.Index fieldIndex;
if(index ) if (index)
{ {
if(tokenise) if (tokenise)
{ {
fieldIndex = Field.Index.TOKENIZED; fieldIndex = Field.Index.TOKENIZED;
} }
@@ -1624,21 +1656,33 @@ public class LuceneIndexerImpl2 extends LuceneBase2 implements LuceneIndexer2
{ {
fieldIndex = Field.Index.NO; fieldIndex = Field.Index.NO;
} }
if(isMultiLingual) if (isMultiLingual)
{ {
MLText mlText = DefaultTypeConverter.INSTANCE.convert(MLText.class, value); MLText mlText = DefaultTypeConverter.INSTANCE.convert(MLText.class, value);
for(Locale locale : mlText.getLocales()) for (Locale locale : mlText.getLocales())
{ {
String localeString = mlText.getValue(locale); String localeString = mlText.getValue(locale);
doc.add(new Field(attributeName, "\u0000" + locale.toString() +"\u0000" + localeString, fieldStore, fieldIndex, Field.TermVector.NO)); StringBuilder builder = new StringBuilder();
builder.append("\u0000").append(locale.toString()).append("\u0000").append(localeString);
doc.add(new Field(attributeName, builder.toString(), fieldStore, fieldIndex,
Field.TermVector.NO));
} }
} }
else if(isText)
{
// TODO: Use the node locale in preferanced to the system locale
Locale locale = Locale.getDefault();
StringBuilder builder = new StringBuilder();
builder.append("\u0000").append(locale.toString()).append("\u0000").append(strValue);
doc.add(new Field(attributeName, builder.toString(), fieldStore, fieldIndex,
Field.TermVector.NO));
}
else else
{ {
doc.add(new Field(attributeName, strValue, fieldStore, fieldIndex, Field.TermVector.NO)); doc.add(new Field(attributeName, strValue, fieldStore, fieldIndex, Field.TermVector.NO));
} }
} }
} }
@@ -2000,19 +2044,14 @@ public class LuceneIndexerImpl2 extends LuceneBase2 implements LuceneIndexer2
public boolean getDeleteOnlyNodes() public boolean getDeleteOnlyNodes()
{ {
if(isFTSUpdate != null) if (isFTSUpdate != null)
{ {
return isFTSUpdate.booleanValue(); return isFTSUpdate.booleanValue();
} }
else else
{ {
return false; return false;
} }
} }
} }

View File

@@ -23,6 +23,7 @@ import java.util.Collections;
import java.util.HashSet; import java.util.HashSet;
import java.util.List; import java.util.List;
import java.util.Locale; import java.util.Locale;
import java.util.Set;
import org.alfresco.i18n.I18NUtil; import org.alfresco.i18n.I18NUtil;
import org.alfresco.repo.search.SearcherException; import org.alfresco.repo.search.SearcherException;
@@ -32,6 +33,7 @@ import org.alfresco.service.cmr.dictionary.DataTypeDefinition;
import org.alfresco.service.cmr.dictionary.DictionaryService; import org.alfresco.service.cmr.dictionary.DictionaryService;
import org.alfresco.service.cmr.dictionary.PropertyDefinition; import org.alfresco.service.cmr.dictionary.PropertyDefinition;
import org.alfresco.service.cmr.dictionary.TypeDefinition; import org.alfresco.service.cmr.dictionary.TypeDefinition;
import org.alfresco.service.cmr.search.SearchParameters;
import org.alfresco.service.namespace.NamespacePrefixResolver; import org.alfresco.service.namespace.NamespacePrefixResolver;
import org.alfresco.service.namespace.QName; import org.alfresco.service.namespace.QName;
import org.apache.log4j.Logger; import org.apache.log4j.Logger;
@@ -55,7 +57,7 @@ public class LuceneQueryParser extends QueryParser
private DictionaryService dictionaryService; private DictionaryService dictionaryService;
private List<Locale> locales; private SearchParameters searchParameters;
/** /**
* Parses a query string, returning a {@link org.apache.lucene.search.Query}. * Parses a query string, returning a {@link org.apache.lucene.search.Query}.
@@ -71,7 +73,7 @@ public class LuceneQueryParser extends QueryParser
*/ */
static public Query parse(String query, String field, Analyzer analyzer, static public Query parse(String query, String field, Analyzer analyzer,
NamespacePrefixResolver namespacePrefixResolver, DictionaryService dictionaryService, NamespacePrefixResolver namespacePrefixResolver, DictionaryService dictionaryService,
Operator defaultOperator, List<Locale> locales) throws ParseException Operator defaultOperator, SearchParameters searchParameters) throws ParseException
{ {
if (s_logger.isDebugEnabled()) if (s_logger.isDebugEnabled())
{ {
@@ -81,14 +83,19 @@ public class LuceneQueryParser extends QueryParser
parser.setDefaultOperator(defaultOperator); parser.setDefaultOperator(defaultOperator);
parser.setNamespacePrefixResolver(namespacePrefixResolver); parser.setNamespacePrefixResolver(namespacePrefixResolver);
parser.setDictionaryService(dictionaryService); parser.setDictionaryService(dictionaryService);
parser.setLocales(locales); parser.setSearchParameters(searchParameters);
// TODO: Apply locale contstraints at the top level if required for the non ML doc types. // TODO: Apply locale contstraints at the top level if required for the non ML doc types.
return parser.parse(query); Query result = parser.parse(query);
if (s_logger.isDebugEnabled())
{
s_logger.debug("Query " + query + " is\n\t" + result.toString());
}
return result;
} }
private void setLocales(List<Locale> locales) private void setSearchParameters(SearchParameters searchParameters)
{ {
this.locales = locales; this.searchParameters = searchParameters;
} }
public void setNamespacePrefixResolver(NamespacePrefixResolver namespacePrefixResolver) public void setNamespacePrefixResolver(NamespacePrefixResolver namespacePrefixResolver)
@@ -141,15 +148,31 @@ public class LuceneQueryParser extends QueryParser
} }
else if (field.equals("TEXT")) else if (field.equals("TEXT"))
{ {
Collection<QName> contentAttributes = dictionaryService.getAllProperties(DataTypeDefinition.CONTENT); Set<String> text = searchParameters.getTextAttributes();
BooleanQuery query = new BooleanQuery(); if ((text == null) || (text.size() == 0))
for (QName qname : contentAttributes)
{ {
// The super implementation will create phrase queries etc if required Collection<QName> contentAttributes = dictionaryService
Query part = super.getFieldQuery("@" + qname.toString(), queryText); .getAllProperties(DataTypeDefinition.CONTENT);
query.add(part, Occur.SHOULD); BooleanQuery query = new BooleanQuery();
for (QName qname : contentAttributes)
{
// The super implementation will create phrase queries etc if required
Query part = getFieldQuery("@" + qname.toString(), queryText);
query.add(part, Occur.SHOULD);
}
return query;
} }
return query; else
{
BooleanQuery query = new BooleanQuery();
for (String fieldName : text)
{
Query part = getFieldQuery(fieldName, queryText);
query.add(part, Occur.SHOULD);
}
return query;
}
} }
else if (field.equals("ID")) else if (field.equals("ID"))
{ {
@@ -232,6 +255,39 @@ public class LuceneQueryParser extends QueryParser
} }
return booleanQuery; return booleanQuery;
} }
else if (field.equals("EXACTTYPE"))
{
TypeDefinition target;
if (queryText.startsWith("{"))
{
target = dictionaryService.getType(QName.createQName(queryText));
}
else
{
int colonPosition = queryText.indexOf(':');
if (colonPosition == -1)
{
// use the default namespace
target = dictionaryService.getType(QName.createQName(namespacePrefixResolver
.getNamespaceURI(""), queryText));
}
else
{
// find the prefix
target = dictionaryService.getType(QName.createQName(namespacePrefixResolver
.getNamespaceURI(queryText.substring(0, colonPosition)), queryText
.substring(colonPosition + 1)));
}
}
if (target == null)
{
throw new SearcherException("Invalid type: " + queryText);
}
QName targetQName = target.getName();
TermQuery termQuery = new TermQuery(new Term("TYPE", targetQName.toString()));
return termQuery;
}
else if (field.equals("ASPECT")) else if (field.equals("ASPECT"))
{ {
AspectDefinition target; AspectDefinition target;
@@ -281,100 +337,133 @@ public class LuceneQueryParser extends QueryParser
} }
return booleanQuery; return booleanQuery;
} }
else if (field.startsWith("@")) else if (field.equals("EXACTASPECT"))
{ {
// Expand prefixes AspectDefinition target;
if (queryText.startsWith("{"))
String expandedFieldName = field;
// Check for any prefixes and expand to the full uri
if (field.charAt(1) != '{')
{ {
int colonPosition = field.indexOf(':'); target = dictionaryService.getAspect(QName.createQName(queryText));
}
else
{
int colonPosition = queryText.indexOf(':');
if (colonPosition == -1) if (colonPosition == -1)
{ {
// use the default namespace // use the default namespace
expandedFieldName = "@{" target = dictionaryService.getAspect(QName.createQName(namespacePrefixResolver
+ namespacePrefixResolver.getNamespaceURI("") + "}" + field.substring(1); .getNamespaceURI(""), queryText));
} }
else else
{ {
// find the prefix // find the prefix
expandedFieldName = "@{" target = dictionaryService.getAspect(QName.createQName(namespacePrefixResolver
+ namespacePrefixResolver.getNamespaceURI(field.substring(1, colonPosition)) + "}" .getNamespaceURI(queryText.substring(0, colonPosition)), queryText
+ field.substring(colonPosition + 1); .substring(colonPosition + 1)));
} }
} }
// Mime type QName targetQName = target.getName();
if (expandedFieldName.endsWith(".mimetype")) TermQuery termQuery = new TermQuery(new Term("ASPECT", targetQName.toString()));
return termQuery;
}
else if (field.startsWith("@"))
{
return attributeQueryBuilder(field, queryText, new FieldQuery());
}
else if (field.equals("ALL"))
{
Set<String> all = searchParameters.getAllAttributes();
if ((all == null) || (all.size() == 0))
{ {
QName propertyQName = QName.createQName(expandedFieldName.substring(1, Collection<QName> contentAttributes = dictionaryService.getAllProperties(null);
expandedFieldName.length() - 9)); BooleanQuery query = new BooleanQuery();
PropertyDefinition propertyDef = dictionaryService.getProperty(propertyQName); for (QName qname : contentAttributes)
if ((propertyDef != null)
&& (propertyDef.getDataType().getName().equals(DataTypeDefinition.CONTENT)))
{ {
return super.getFieldQuery(expandedFieldName, queryText); // The super implementation will create phrase queries etc if required
Query part = getFieldQuery("@" + qname.toString(), queryText);
if (part != null)
{
query.add(part, Occur.SHOULD);
}
} }
return query;
}
else if (expandedFieldName.endsWith(".size"))
{
QName propertyQName = QName.createQName(expandedFieldName.substring(1,
expandedFieldName.length() - 5));
PropertyDefinition propertyDef = dictionaryService.getProperty(propertyQName);
if ((propertyDef != null)
&& (propertyDef.getDataType().getName().equals(DataTypeDefinition.CONTENT)))
{
return super.getFieldQuery(expandedFieldName, queryText);
}
}
else if (expandedFieldName.endsWith(".locale"))
{
QName propertyQName = QName.createQName(expandedFieldName.substring(1,
expandedFieldName.length() - 7));
PropertyDefinition propertyDef = dictionaryService.getProperty(propertyQName);
if ((propertyDef != null)
&& (propertyDef.getDataType().getName().equals(DataTypeDefinition.CONTENT)))
{
return super.getFieldQuery(expandedFieldName, queryText);
}
}
// Already in expanded form
// ML
QName propertyQName = QName.createQName(expandedFieldName.substring(1));
PropertyDefinition propertyDef = dictionaryService.getProperty(propertyQName);
if ((propertyDef != null) && (propertyDef.getDataType().getName().equals(DataTypeDefinition.MLTEXT)))
{
// Build a sub query for each locale and or the results together - the analysis will take care of
// cross language matching for each entry
BooleanQuery booleanQuery = new BooleanQuery();
for (Locale locale : (((locales == null) || (locales.size() == 0)) ? Collections
.singletonList(I18NUtil.getLocale()) : locales))
{
StringBuilder builder = new StringBuilder(queryText.length() + 10);
builder.append("\u0000").append(locale.toString()).append("\u0000").append(queryText);
Query subQuery = super.getFieldQuery(expandedFieldName, builder.toString());
booleanQuery.add(subQuery, Occur.SHOULD);
}
return booleanQuery;
} }
else else
{ {
return super.getFieldQuery(expandedFieldName, queryText); BooleanQuery query = new BooleanQuery();
for (String fieldName : all)
{
Query part = getFieldQuery(fieldName, queryText);
if (part != null)
{
query.add(part, Occur.SHOULD);
}
}
return query;
} }
} }
else if (field.equals("ISNULL"))
{
String qnameString = expandFieldName(queryText);
QName qname = QName.createQName(qnameString);
PropertyDefinition pd = dictionaryService.getProperty(qname);
if (pd != null)
{
QName container = pd.getContainerClass().getName();
BooleanQuery query = new BooleanQuery();
Query typeQuery = getFieldQuery("TYPE", container.toString());
query.add(typeQuery, Occur.MUST);
Query presenceQuery = getWildcardQuery("@" + qname.toString(), "*");
query.add(presenceQuery, Occur.MUST_NOT);
return query;
}
else
{
return super.getFieldQuery(field, queryText);
}
}
else if (field.equals("ISNOTNULL"))
{
String qnameString = expandFieldName(queryText);
QName qname = QName.createQName(qnameString);
PropertyDefinition pd = dictionaryService.getProperty(qname);
if (pd != null)
{
QName container = pd.getContainerClass().getName();
BooleanQuery query = new BooleanQuery();
Query typeQuery = getFieldQuery("TYPE", container.toString());
query.add(typeQuery, Occur.MUST);
Query presenceQuery = getWildcardQuery("@" + qname.toString(), "*");
query.add(presenceQuery, Occur.MUST);
return query;
}
else
{
return super.getFieldQuery(field, queryText);
}
}
else if (dictionaryService.getDataType(QName.createQName(expandFieldName(field))) != null)
{
Collection<QName> contentAttributes = dictionaryService.getAllProperties(dictionaryService.getDataType(
QName.createQName(expandFieldName(field))).getName());
BooleanQuery query = new BooleanQuery();
for (QName qname : contentAttributes)
{
// The super implementation will create phrase queries etc if required
Query part = getFieldQuery("@" + qname.toString(), queryText);
query.add(part, Occur.SHOULD);
}
return query;
}
else else
{ {
return super.getFieldQuery(field, queryText); return super.getFieldQuery(field, queryText);
} }
} }
catch (SAXPathException e) catch (SAXPathException e)
{ {
@@ -391,24 +480,7 @@ public class LuceneQueryParser extends QueryParser
{ {
if (field.startsWith("@")) if (field.startsWith("@"))
{ {
String fieldName = field; String fieldName = expandAttributeFieldName(field);
// Check for any prefixes and expand to the full uri
if (field.charAt(1) != '{')
{
int colonPosition = field.indexOf(':');
if (colonPosition == -1)
{
// use the default namespace
fieldName = "@{" + namespacePrefixResolver.getNamespaceURI("") + "}" + field.substring(1);
}
else
{
// find the prefix
fieldName = "@{"
+ namespacePrefixResolver.getNamespaceURI(field.substring(1, colonPosition)) + "}"
+ field.substring(colonPosition + 1);
}
}
return new RangeQuery(new Term(fieldName, getToken(fieldName, part1)), new Term(fieldName, getToken( return new RangeQuery(new Term(fieldName, getToken(fieldName, part1)), new Term(fieldName, getToken(
fieldName, part2)), inclusive); fieldName, part2)), inclusive);
@@ -420,6 +492,52 @@ public class LuceneQueryParser extends QueryParser
} }
private String expandAttributeFieldName(String field)
{
String fieldName = field;
// Check for any prefixes and expand to the full uri
if (field.charAt(1) != '{')
{
int colonPosition = field.indexOf(':');
if (colonPosition == -1)
{
// use the default namespace
fieldName = "@{" + namespacePrefixResolver.getNamespaceURI("") + "}" + field.substring(1);
}
else
{
// find the prefix
fieldName = "@{"
+ namespacePrefixResolver.getNamespaceURI(field.substring(1, colonPosition)) + "}"
+ field.substring(colonPosition + 1);
}
}
return fieldName;
}
private String expandFieldName(String field)
{
String fieldName = field;
// Check for any prefixes and expand to the full uri
if (field.charAt(0) != '{')
{
int colonPosition = field.indexOf(':');
if (colonPosition == -1)
{
// use the default namespace
fieldName = "{" + namespacePrefixResolver.getNamespaceURI("") + "}" + field;
}
else
{
// find the prefix
fieldName = "{"
+ namespacePrefixResolver.getNamespaceURI(field.substring(0, colonPosition)) + "}"
+ field.substring(colonPosition + 1);
}
}
return fieldName;
}
private String getToken(String field, String value) private String getToken(String field, String value)
{ {
TokenStream source = analyzer.tokenStream(field, new StringReader(value)); TokenStream source = analyzer.tokenStream(field, new StringReader(value));
@@ -457,67 +575,8 @@ public class LuceneQueryParser extends QueryParser
{ {
if (field.startsWith("@")) if (field.startsWith("@"))
{ {
// Expand prefixes return attributeQueryBuilder(field, termStr, new PrefixQuery());
String expandedFieldName = field;
// Check for any prefixes and expand to the full uri
if (field.charAt(1) != '{')
{
int colonPosition = field.indexOf(':');
if (colonPosition == -1)
{
// use the default namespace
expandedFieldName = "@{" + namespacePrefixResolver.getNamespaceURI("") + "}" + field.substring(1);
}
else
{
// find the prefix
expandedFieldName = "@{"
+ namespacePrefixResolver.getNamespaceURI(field.substring(1, colonPosition)) + "}"
+ field.substring(colonPosition + 1);
}
}
// Mime type
if (expandedFieldName.endsWith(".mimetype"))
{
QName propertyQName = QName.createQName(expandedFieldName.substring(1, expandedFieldName.length() - 9));
PropertyDefinition propertyDef = dictionaryService.getProperty(propertyQName);
if ((propertyDef != null) && (propertyDef.getDataType().getName().equals(DataTypeDefinition.CONTENT)))
{
return super.getPrefixQuery(expandedFieldName, termStr);
}
}
// Already in expanded form
// ML
QName propertyQName = QName.createQName(expandedFieldName.substring(1));
PropertyDefinition propertyDef = dictionaryService.getProperty(propertyQName);
if ((propertyDef != null) && (propertyDef.getDataType().getName().equals(DataTypeDefinition.MLTEXT)))
{
// Build a sub query for each locale and or the results together - the analysis will take care of
// cross language matching for each entry
BooleanQuery booleanQuery = new BooleanQuery();
for (Locale locale : (((locales == null) || (locales.size() == 0)) ? Collections.singletonList(I18NUtil
.getLocale()) : locales))
{
StringBuilder builder = new StringBuilder(termStr.length() + 10);
builder.append("\u0000").append(locale.toString()).append("\u0000").append(termStr);
Query subQuery = super.getPrefixQuery(expandedFieldName, builder.toString());
booleanQuery.add(subQuery, Occur.SHOULD);
}
return booleanQuery;
}
else
{
return super.getPrefixQuery(expandedFieldName, termStr);
}
} }
else if (field.equals("TEXT")) else if (field.equals("TEXT"))
{ {
Collection<QName> contentAttributes = dictionaryService.getAllProperties(DataTypeDefinition.CONTENT); Collection<QName> contentAttributes = dictionaryService.getAllProperties(DataTypeDefinition.CONTENT);
@@ -525,15 +584,14 @@ public class LuceneQueryParser extends QueryParser
for (QName qname : contentAttributes) for (QName qname : contentAttributes)
{ {
// The super implementation will create phrase queries etc if required // The super implementation will create phrase queries etc if required
Query part = super.getPrefixQuery("@" + qname.toString(), termStr); Query part = getPrefixQuery("@" + qname.toString(), termStr);
query.add(part, Occur.SHOULD); query.add(part, Occur.SHOULD);
} }
return query; return query;
} }
else else
{ {
return super.getFieldQuery(field, termStr); return super.getPrefixQuery(field, termStr);
} }
} }
@@ -542,65 +600,7 @@ public class LuceneQueryParser extends QueryParser
{ {
if (field.startsWith("@")) if (field.startsWith("@"))
{ {
// Expand prefixes return attributeQueryBuilder(field, termStr, new WildcardQuery());
String expandedFieldName = field;
// Check for any prefixes and expand to the full uri
if (field.charAt(1) != '{')
{
int colonPosition = field.indexOf(':');
if (colonPosition == -1)
{
// use the default namespace
expandedFieldName = "@{" + namespacePrefixResolver.getNamespaceURI("") + "}" + field.substring(1);
}
else
{
// find the prefix
expandedFieldName = "@{"
+ namespacePrefixResolver.getNamespaceURI(field.substring(1, colonPosition)) + "}"
+ field.substring(colonPosition + 1);
}
}
// Mime type
if (expandedFieldName.endsWith(".mimetype"))
{
QName propertyQName = QName.createQName(expandedFieldName.substring(1, expandedFieldName.length() - 9));
PropertyDefinition propertyDef = dictionaryService.getProperty(propertyQName);
if ((propertyDef != null) && (propertyDef.getDataType().getName().equals(DataTypeDefinition.CONTENT)))
{
return super.getWildcardQuery(expandedFieldName, termStr);
}
}
// Already in expanded form
// ML
QName propertyQName = QName.createQName(expandedFieldName.substring(1));
PropertyDefinition propertyDef = dictionaryService.getProperty(propertyQName);
if ((propertyDef != null) && (propertyDef.getDataType().getName().equals(DataTypeDefinition.MLTEXT)))
{
// Build a sub query for each locale and or the results together - the analysis will take care of
// cross language matching for each entry
BooleanQuery booleanQuery = new BooleanQuery();
for (Locale locale : (((locales == null) || (locales.size() == 0)) ? Collections.singletonList(I18NUtil
.getLocale()) : locales))
{
StringBuilder builder = new StringBuilder(termStr.length() + 10);
builder.append("\u0000").append(locale.toString()).append("\u0000").append(termStr);
Query subQuery = super.getWildcardQuery(expandedFieldName, builder.toString());
booleanQuery.add(subQuery, Occur.SHOULD);
}
return booleanQuery;
}
else
{
return super.getWildcardQuery(expandedFieldName, termStr);
}
} }
else if (field.equals("TEXT")) else if (field.equals("TEXT"))
@@ -610,11 +610,10 @@ public class LuceneQueryParser extends QueryParser
for (QName qname : contentAttributes) for (QName qname : contentAttributes)
{ {
// The super implementation will create phrase queries etc if required // The super implementation will create phrase queries etc if required
Query part = super.getWildcardQuery("@" + qname.toString(), termStr); Query part = getWildcardQuery("@" + qname.toString(), termStr);
query.add(part, Occur.SHOULD); query.add(part, Occur.SHOULD);
} }
return query; return query;
} }
else else
{ {
@@ -627,65 +626,7 @@ public class LuceneQueryParser extends QueryParser
{ {
if (field.startsWith("@")) if (field.startsWith("@"))
{ {
// Expand prefixes return attributeQueryBuilder(field, termStr, new FuzzyQuery(minSimilarity));
String expandedFieldName = field;
// Check for any prefixes and expand to the full uri
if (field.charAt(1) != '{')
{
int colonPosition = field.indexOf(':');
if (colonPosition == -1)
{
// use the default namespace
expandedFieldName = "@{" + namespacePrefixResolver.getNamespaceURI("") + "}" + field.substring(1);
}
else
{
// find the prefix
expandedFieldName = "@{"
+ namespacePrefixResolver.getNamespaceURI(field.substring(1, colonPosition)) + "}"
+ field.substring(colonPosition + 1);
}
}
// Mime type
if (expandedFieldName.endsWith(".mimetype"))
{
QName propertyQName = QName.createQName(expandedFieldName.substring(1, expandedFieldName.length() - 9));
PropertyDefinition propertyDef = dictionaryService.getProperty(propertyQName);
if ((propertyDef != null) && (propertyDef.getDataType().getName().equals(DataTypeDefinition.CONTENT)))
{
return super.getFuzzyQuery(expandedFieldName, termStr, minSimilarity);
}
}
// Already in expanded form
// ML
QName propertyQName = QName.createQName(expandedFieldName.substring(1));
PropertyDefinition propertyDef = dictionaryService.getProperty(propertyQName);
if ((propertyDef != null) && (propertyDef.getDataType().getName().equals(DataTypeDefinition.MLTEXT)))
{
// Build a sub query for each locale and or the results together - the analysis will take care of
// cross language matching for each entry
BooleanQuery booleanQuery = new BooleanQuery();
for (Locale locale : (((locales == null) || (locales.size() == 0)) ? Collections.singletonList(I18NUtil
.getLocale()) : locales))
{
StringBuilder builder = new StringBuilder(termStr.length() + 10);
builder.append("\u0000").append(locale.toString()).append("\u0000").append(termStr);
Query subQuery = super.getFuzzyQuery(expandedFieldName, builder.toString(), minSimilarity);
booleanQuery.add(subQuery, Occur.SHOULD);
}
return booleanQuery;
}
else
{
return super.getFuzzyQuery(expandedFieldName, termStr, minSimilarity);
}
} }
else if (field.equals("TEXT")) else if (field.equals("TEXT"))
@@ -695,11 +636,10 @@ public class LuceneQueryParser extends QueryParser
for (QName qname : contentAttributes) for (QName qname : contentAttributes)
{ {
// The super implementation will create phrase queries etc if required // The super implementation will create phrase queries etc if required
Query part = super.getFuzzyQuery("@" + qname.toString(), termStr, minSimilarity); Query part = getFuzzyQuery("@" + qname.toString(), termStr, minSimilarity);
query.add(part, Occur.SHOULD); query.add(part, Occur.SHOULD);
} }
return query; return query;
} }
else else
{ {
@@ -712,4 +652,155 @@ public class LuceneQueryParser extends QueryParser
this.dictionaryService = dictionaryService; this.dictionaryService = dictionaryService;
} }
public Query getSuperFieldQuery(String field, String queryText) throws ParseException
{
return super.getFieldQuery(field, queryText);
}
public Query getSuperFuzzyQuery(String field, String termStr, float minSimilarity) throws ParseException
{
return super.getFuzzyQuery(field, termStr, minSimilarity);
}
public Query getSuperPrefixQuery(String field, String termStr) throws ParseException
{
return super.getPrefixQuery(field, termStr);
}
public Query getSuperWildcardQuery(String field, String termStr) throws ParseException
{
return super.getWildcardQuery(field, termStr);
}
interface SubQuery
{
Query getQuery(String field, String queryText) throws ParseException;
}
class FieldQuery implements SubQuery
{
public Query getQuery(String field, String queryText) throws ParseException
{
return getSuperFieldQuery(field, queryText);
}
}
class FuzzyQuery implements SubQuery
{
float minSimilarity;
FuzzyQuery(float minSimilarity)
{
this.minSimilarity = minSimilarity;
}
public Query getQuery(String field, String termStr) throws ParseException
{
return getSuperFuzzyQuery(field, termStr, minSimilarity);
}
}
class PrefixQuery implements SubQuery
{
public Query getQuery(String field, String termStr) throws ParseException
{
return getSuperPrefixQuery(field, termStr);
}
}
class WildcardQuery implements SubQuery
{
public Query getQuery(String field, String termStr) throws ParseException
{
return getSuperWildcardQuery(field, termStr);
}
}
private Query attributeQueryBuilder(String field, String queryText, SubQuery subQueryBuilder) throws ParseException
{
// Expand prefixes
String expandedFieldName = expandAttributeFieldName(field);
// Mime type
if (expandedFieldName.endsWith(".mimetype"))
{
QName propertyQName = QName.createQName(expandedFieldName.substring(1, expandedFieldName.length() - 9));
PropertyDefinition propertyDef = dictionaryService.getProperty(propertyQName);
if ((propertyDef != null) && (propertyDef.getDataType().getName().equals(DataTypeDefinition.CONTENT)))
{
return subQueryBuilder.getQuery(expandedFieldName, queryText);
}
}
else if (expandedFieldName.endsWith(".size"))
{
QName propertyQName = QName.createQName(expandedFieldName.substring(1, expandedFieldName.length() - 5));
PropertyDefinition propertyDef = dictionaryService.getProperty(propertyQName);
if ((propertyDef != null) && (propertyDef.getDataType().getName().equals(DataTypeDefinition.CONTENT)))
{
return subQueryBuilder.getQuery(expandedFieldName, queryText);
}
}
else if (expandedFieldName.endsWith(".locale"))
{
QName propertyQName = QName.createQName(expandedFieldName.substring(1, expandedFieldName.length() - 7));
PropertyDefinition propertyDef = dictionaryService.getProperty(propertyQName);
if ((propertyDef != null) && (propertyDef.getDataType().getName().equals(DataTypeDefinition.CONTENT)))
{
return subQueryBuilder.getQuery(expandedFieldName, queryText);
}
}
// Already in expanded form
// ML
QName propertyQName = QName.createQName(expandedFieldName.substring(1));
PropertyDefinition propertyDef = dictionaryService.getProperty(propertyQName);
if ((propertyDef != null) && (propertyDef.getDataType().getName().equals(DataTypeDefinition.MLTEXT)))
{
// Build a sub query for each locale and or the results together - the analysis will take care of
// cross language matching for each entry
BooleanQuery booleanQuery = new BooleanQuery();
List<Locale> locales = searchParameters.getLocales();
for (Locale locale : (((locales == null) || (locales.size() == 0)) ? Collections.singletonList(I18NUtil
.getLocale()) : locales))
{
StringBuilder builder = new StringBuilder(queryText.length() + 10);
builder.append("\u0000").append(locale.toString()).append("\u0000").append(queryText);
Query subQuery = subQueryBuilder.getQuery(expandedFieldName, builder.toString());
booleanQuery.add(subQuery, Occur.SHOULD);
}
return booleanQuery;
}
// Content
else if ((propertyDef != null) && (propertyDef.getDataType().getName().equals(DataTypeDefinition.CONTENT)))
{
// Build a sub query for each locale and or the results together -
// - add an explicit condition for the locale
BooleanQuery booleanQuery = new BooleanQuery();
List<Locale> locales = searchParameters.getLocales();
for (Locale locale : (((locales == null) || (locales.size() == 0)) ? Collections.singletonList(I18NUtil
.getLocale()) : locales))
{
BooleanQuery subQuery = new BooleanQuery();
Query contentQuery = subQueryBuilder.getQuery(expandedFieldName, queryText);
subQuery.add(contentQuery, Occur.MUST);
StringBuilder builder = new StringBuilder();
builder.append(expandedFieldName).append(".locale");
Query localeQuery = getFieldQuery(builder.toString(), locale.toString());
subQuery.add(localeQuery, Occur.MUST);
booleanQuery.add(subQuery, Occur.SHOULD);
}
return booleanQuery;
}
else
{
return subQueryBuilder.getQuery(expandedFieldName, queryText);
}
}
} }

View File

@@ -215,7 +215,7 @@ public class LuceneSearcherImpl2 extends LuceneBase2 implements LuceneSearcher2
} }
Query query = LuceneQueryParser.parse(parameterisedQueryString, DEFAULT_FIELD, new LuceneAnalyser( Query query = LuceneQueryParser.parse(parameterisedQueryString, DEFAULT_FIELD, new LuceneAnalyser(
dictionaryService, searchParameters.getMlAnalaysisMode() == null ? getLuceneConfig().getDefaultMLSearchAnalysisMode() : searchParameters.getMlAnalaysisMode()), namespacePrefixResolver, dictionaryService, defaultOperator, searchParameters.getLocales()); dictionaryService, searchParameters.getMlAnalaysisMode() == null ? getLuceneConfig().getDefaultMLSearchAnalysisMode() : searchParameters.getMlAnalaysisMode()), namespacePrefixResolver, dictionaryService, defaultOperator, searchParameters);
ClosingIndexSearcher searcher = getSearcher(indexer); ClosingIndexSearcher searcher = getSearcher(indexer);
if (searcher == null) if (searcher == null)
{ {

View File

@@ -42,6 +42,7 @@ import org.alfresco.repo.dictionary.DictionaryNamespaceComponent;
import org.alfresco.repo.dictionary.M2Model; import org.alfresco.repo.dictionary.M2Model;
import org.alfresco.repo.dictionary.NamespaceDAOImpl; import org.alfresco.repo.dictionary.NamespaceDAOImpl;
import org.alfresco.repo.node.BaseNodeServiceTest; import org.alfresco.repo.node.BaseNodeServiceTest;
import org.alfresco.repo.search.MLAnalysisMode;
import org.alfresco.repo.search.QueryParameterDefImpl; import org.alfresco.repo.search.QueryParameterDefImpl;
import org.alfresco.repo.search.QueryRegisterComponent; import org.alfresco.repo.search.QueryRegisterComponent;
import org.alfresco.repo.search.impl.lucene.fts.FullTextSearchIndexer; import org.alfresco.repo.search.impl.lucene.fts.FullTextSearchIndexer;
@@ -1895,6 +1896,16 @@ public class LuceneTest2 extends TestCase
+ testType.toPrefixString(namespacePrefixResolver) + "\"", null, null); + testType.toPrefixString(namespacePrefixResolver) + "\"", null, null);
assertEquals(1, results.length()); assertEquals(1, results.length());
results.close(); results.close();
results = searcher.query(rootNodeRef.getStoreRef(), "lucene", "EXACTTYPE:\"" + testType.toString() + "\"", null,
null);
assertEquals(1, results.length());
results.close();
results = searcher.query(rootNodeRef.getStoreRef(), "lucene", "EXACTTYPE:\""
+ testType.toPrefixString(namespacePrefixResolver) + "\"", null, null);
assertEquals(1, results.length());
results.close();
results = searcher.query(rootNodeRef.getStoreRef(), "lucene", "TYPE:\"" + testSuperType.toString() + "\"", results = searcher.query(rootNodeRef.getStoreRef(), "lucene", "TYPE:\"" + testSuperType.toString() + "\"",
null, null); null, null);
@@ -1905,6 +1916,16 @@ public class LuceneTest2 extends TestCase
+ testSuperType.toPrefixString(namespacePrefixResolver) + "\"", null, null); + testSuperType.toPrefixString(namespacePrefixResolver) + "\"", null, null);
assertEquals(13, results.length()); assertEquals(13, results.length());
results.close(); results.close();
results = searcher.query(rootNodeRef.getStoreRef(), "lucene", "EXACTTYPE:\"" + testSuperType.toString() + "\"",
null, null);
assertEquals(12, results.length());
results.close();
results = searcher.query(rootNodeRef.getStoreRef(), "lucene", "EXACTTYPE:\""
+ testSuperType.toPrefixString(namespacePrefixResolver) + "\"", null, null);
assertEquals(12, results.length());
results.close();
results = searcher.query(rootNodeRef.getStoreRef(), "lucene", "ASPECT:\"" + testAspect.toString() + "\"", null, results = searcher.query(rootNodeRef.getStoreRef(), "lucene", "ASPECT:\"" + testAspect.toString() + "\"", null,
null); null);
@@ -1926,6 +1947,28 @@ public class LuceneTest2 extends TestCase
assertEquals(1, results.length()); assertEquals(1, results.length());
results.close(); results.close();
// Test for AR-384
results = searcher.query(rootNodeRef.getStoreRef(), "lucene", "TEXT:fox AND TYPE:\""
+ ContentModel.PROP_CONTENT.toString() + "\"", null, null);
assertEquals(1, results.length());
results.close();
results = searcher.query(rootNodeRef.getStoreRef(), "lucene", "TEXT:fo AND TYPE:\""
+ ContentModel.PROP_CONTENT.toString() + "\"", null, null);
assertEquals(0, results.length());
results.close();
// Test stop words are equivalent
results = searcher.query(rootNodeRef.getStoreRef(), "lucene", "TEXT:\"over the lazy\"", null, null);
assertEquals(1, results.length());
results.close();
results = searcher.query(rootNodeRef.getStoreRef(), "lucene", "TEXT:\"over a lazy\"", null, null);
assertEquals(1, results.length());
results.close();
// FTS test // FTS test
results = searcher.query(rootNodeRef.getStoreRef(), "lucene", "TEXT:\"fox\"", null, null); results = searcher.query(rootNodeRef.getStoreRef(), "lucene", "TEXT:\"fox\"", null, null);
@@ -1943,42 +1986,167 @@ public class LuceneTest2 extends TestCase
assertEquals(1, results.length()); assertEquals(1, results.length());
results.close(); results.close();
results = searcher.query(rootNodeRef.getStoreRef(), "lucene", "@"
+ LuceneQueryParser.escape(ContentModel.PROP_CONTENT.toString()) + ".locale:\"en_GB\"", null, null);
assertEquals(1, results.length());
results.close();
results = searcher.query(rootNodeRef.getStoreRef(), "lucene", "@"
+ LuceneQueryParser.escape(ContentModel.PROP_CONTENT.toString()) + ".locale:en_*", null, null);
assertEquals(1, results.length());
results.close();
results = searcher.query(rootNodeRef.getStoreRef(), "lucene", "@"
+ LuceneQueryParser.escape(ContentModel.PROP_CONTENT.toString()) + ".locale:e*_GB", null, null);
assertEquals(1, results.length());
results.close();
results = searcher.query(rootNodeRef.getStoreRef(), "lucene", "@"
+ LuceneQueryParser.escape(ContentModel.PROP_CONTENT.toString()) + ".size:\"90\"", null, null);
assertEquals(1, results.length());
results.close();
QName queryQName = QName.createQName("alf:test1", namespacePrefixResolver); QName queryQName = QName.createQName("alf:test1", namespacePrefixResolver);
results = searcher.query(rootNodeRef.getStoreRef(), queryQName, null); results = searcher.query(rootNodeRef.getStoreRef(), queryQName, null);
assertEquals(1, results.length()); assertEquals(1, results.length());
results.close(); results.close();
// Direct ML tests // Configuration of TEXT
QName mlQName = QName.createQName(TEST_NAMESPACE, "ml");
SearchParameters sp = new SearchParameters(); SearchParameters sp = new SearchParameters();
sp.addStore(rootNodeRef.getStoreRef()); sp.addStore(rootNodeRef.getStoreRef());
sp.setLanguage("lucene"); sp.setLanguage("lucene");
sp.setQuery("@" + LuceneQueryParser.escape(ContentModel.PROP_CONTENT.toString()) + ":\"fox\"");
results = searcher.query(sp);
assertEquals(1, results.length());
results.close();
sp = new SearchParameters();
sp.addStore(rootNodeRef.getStoreRef());
sp.setLanguage("lucene");
sp.setQuery("TEXT:\"fox\"");
results = searcher.query(sp);
assertEquals(1, results.length());
results.close();
sp = new SearchParameters();
sp.addStore(rootNodeRef.getStoreRef());
sp.setLanguage("lucene");
sp.setQuery("TEXT:\"fox\"");
sp.addTextAttribute("@"+ContentModel.PROP_NAME.toString());
results = searcher.query(sp);
assertEquals(0, results.length());
results.close();
sp.addTextAttribute("@"+ContentModel.PROP_CONTENT.toString());
results = searcher.query(sp);
assertEquals(1, results.length());
results.close();
// ALL and its configuration
sp = new SearchParameters();
sp.addStore(rootNodeRef.getStoreRef());
sp.setLanguage("lucene");
sp.setQuery("ALL:\"fox\"");
results = searcher.query(sp);
assertEquals(1, results.length());
results.close();
sp = new SearchParameters();
sp.addStore(rootNodeRef.getStoreRef());
sp.setLanguage("lucene");
sp.setQuery("ALL:\"fox\"");
sp.addAllAttribute("@"+ContentModel.PROP_NAME.toString());
results = searcher.query(sp);
assertEquals(0, results.length());
results.close();
sp.addAllAttribute("@"+ContentModel.PROP_CONTENT.toString());
results = searcher.query(sp);
assertEquals(1, results.length());
results.close();
sp = new SearchParameters();
sp.addStore(rootNodeRef.getStoreRef());
sp.setLanguage("lucene");
sp.setQuery("ALL:\"5.6\"");
results = searcher.query(sp);
assertEquals(1, results.length());
results.close();
// Search by data type
sp = new SearchParameters();
sp.addStore(rootNodeRef.getStoreRef());
sp.setLanguage("lucene");
sp.setQuery("d\\:double:\"5.6\"");
results = searcher.query(sp);
assertEquals(1, results.length());
results.close();
sp = new SearchParameters();
sp.addStore(rootNodeRef.getStoreRef());
sp.setLanguage("lucene");
sp.setQuery("d\\:content:\"fox\"");
results = searcher.query(sp);
assertEquals(1, results.length());
results.close();
// Direct ML tests
QName mlQName = QName.createQName(TEST_NAMESPACE, "ml");
sp = new SearchParameters();
sp.addStore(rootNodeRef.getStoreRef());
sp.setLanguage("lucene");
sp.setMlAnalaysisMode(MLAnalysisMode.ALL_ONLY);
sp.setQuery("@" + LuceneQueryParser.escape(mlQName.toString()) + ":banana"); sp.setQuery("@" + LuceneQueryParser.escape(mlQName.toString()) + ":banana");
results = searcher.query(sp); results = searcher.query(sp);
assertEquals(1, results.length()); assertEquals(1, results.length());
results.close(); results.close();
sp.addLocale(Locale.UK);
results = searcher.query(sp);
assertEquals(1, results.length());
results.close();
sp = new SearchParameters(); sp = new SearchParameters();
sp.addStore(rootNodeRef.getStoreRef()); sp.addStore(rootNodeRef.getStoreRef());
sp.setLanguage("lucene"); sp.setLanguage("lucene");
sp.setQuery("@" + LuceneQueryParser.escape(mlQName.toString()) + ":banana"); sp.setQuery("@" + LuceneQueryParser.escape(mlQName.toString()) + ":banana");
sp.addLocale(Locale.ENGLISH);
results = searcher.query(sp); results = searcher.query(sp);
assertEquals(1, results.length()); assertEquals(1, results.length());
results.close(); results.close();
sp = new SearchParameters();
sp.addStore(rootNodeRef.getStoreRef());
sp.setLanguage("lucene");
sp.setQuery("@" + LuceneQueryParser.escape(mlQName.toString()) + ":banana");
sp.addLocale(Locale.UK);
results = searcher.query(sp);
assertEquals(1, results.length());
results.close();
sp = new SearchParameters();
sp.addStore(rootNodeRef.getStoreRef());
sp.setLanguage("lucene");
sp.setQuery("@" + LuceneQueryParser.escape(mlQName.toString()) + ":banana");
sp.setMlAnalaysisMode(MLAnalysisMode.LOCALE_AND_ALL_CONTAINING_LOCALES);
sp.addLocale(Locale.UK);
results = searcher.query(sp);
assertEquals(1, results.length());
results.close();
sp = new SearchParameters();
sp.addStore(rootNodeRef.getStoreRef());
sp.setLanguage("lucene");
sp.setQuery("@" + LuceneQueryParser.escape(mlQName.toString()) + ":banana");
sp.addLocale(Locale.ENGLISH);
results = searcher.query(sp);
assertEquals(1, results.length());
results.close();
sp = new SearchParameters(); sp = new SearchParameters();
sp.addStore(rootNodeRef.getStoreRef()); sp.addStore(rootNodeRef.getStoreRef());
sp.setLanguage("lucene"); sp.setLanguage("lucene");
sp.setQuery("@" + LuceneQueryParser.escape(mlQName.toString()) + ":banane"); sp.setQuery("@" + LuceneQueryParser.escape(mlQName.toString()) + ":banane");
sp.addLocale(Locale.FRENCH); sp.addLocale(Locale.FRENCH);
results = searcher.query(sp); results = searcher.query(sp);
assertEquals(1, results.length()); assertEquals(1, results.length());
results.close(); results.close();
@@ -1987,79 +2155,98 @@ public class LuceneTest2 extends TestCase
sp.addStore(rootNodeRef.getStoreRef()); sp.addStore(rootNodeRef.getStoreRef());
sp.setLanguage("lucene"); sp.setLanguage("lucene");
sp.setQuery("@" + LuceneQueryParser.escape(mlQName.toString()) + ":香蕉"); sp.setQuery("@" + LuceneQueryParser.escape(mlQName.toString()) + ":香蕉");
sp.addLocale(Locale.CHINESE); sp.addLocale(Locale.CHINESE);
results = searcher.query(sp); results = searcher.query(sp);
assertEquals(1, results.length()); assertEquals(1, results.length());
results.close(); results.close();
sp = new SearchParameters(); sp = new SearchParameters();
sp.addStore(rootNodeRef.getStoreRef()); sp.addStore(rootNodeRef.getStoreRef());
sp.setLanguage("lucene"); sp.setLanguage("lucene");
sp.setQuery("@" + LuceneQueryParser.escape(mlQName.toString()) + ":banaan"); sp.setQuery("@" + LuceneQueryParser.escape(mlQName.toString()) + ":banaan");
sp.addLocale(new Locale("nl")); sp.addLocale(new Locale("nl"));
results = searcher.query(sp); results = searcher.query(sp);
assertEquals(1, results.length()); assertEquals(1, results.length());
results.close(); results.close();
sp = new SearchParameters(); sp = new SearchParameters();
sp.addStore(rootNodeRef.getStoreRef()); sp.addStore(rootNodeRef.getStoreRef());
sp.setLanguage("lucene"); sp.setLanguage("lucene");
sp.setQuery("@" + LuceneQueryParser.escape(mlQName.toString()) + ":banane"); sp.setQuery("@" + LuceneQueryParser.escape(mlQName.toString()) + ":banane");
sp.addLocale(Locale.GERMAN); sp.addLocale(Locale.GERMAN);
results = searcher.query(sp); results = searcher.query(sp);
assertEquals(1, results.length()); assertEquals(1, results.length());
results.close(); results.close();
sp = new SearchParameters(); sp = new SearchParameters();
sp.addStore(rootNodeRef.getStoreRef()); sp.addStore(rootNodeRef.getStoreRef());
sp.setLanguage("lucene"); sp.setLanguage("lucene");
sp.setQuery("@" + LuceneQueryParser.escape(mlQName.toString()) + ":μπανάνα"); sp.setQuery("@" + LuceneQueryParser.escape(mlQName.toString()) + ":μπανάνα");
sp.addLocale(new Locale("el")); sp.addLocale(new Locale("el"));
results = searcher.query(sp); results = searcher.query(sp);
assertEquals(1, results.length()); assertEquals(1, results.length());
results.close(); results.close();
sp = new SearchParameters(); sp = new SearchParameters();
sp.addStore(rootNodeRef.getStoreRef()); sp.addStore(rootNodeRef.getStoreRef());
sp.setLanguage("lucene"); sp.setLanguage("lucene");
sp.setQuery("@" + LuceneQueryParser.escape(mlQName.toString()) + ":banana"); sp.setQuery("@" + LuceneQueryParser.escape(mlQName.toString()) + ":banana");
sp.addLocale(Locale.ITALIAN); sp.addLocale(Locale.ITALIAN);
results = searcher.query(sp); results = searcher.query(sp);
assertEquals(1, results.length()); assertEquals(1, results.length());
results.close(); results.close();
sp = new SearchParameters(); sp = new SearchParameters();
sp.addStore(rootNodeRef.getStoreRef()); sp.addStore(rootNodeRef.getStoreRef());
sp.setLanguage("lucene"); sp.setLanguage("lucene");
sp.setQuery("@" + LuceneQueryParser.escape(mlQName.toString()) + ":バナナ"); sp.setQuery("@" + LuceneQueryParser.escape(mlQName.toString()) + ":バナナ");
sp.addLocale(new Locale("ja")); sp.addLocale(new Locale("ja"));
results = searcher.query(sp); results = searcher.query(sp);
assertEquals(1, results.length()); assertEquals(1, results.length());
results.close(); results.close();
sp = new SearchParameters(); sp = new SearchParameters();
sp.addStore(rootNodeRef.getStoreRef()); sp.addStore(rootNodeRef.getStoreRef());
sp.setLanguage("lucene"); sp.setLanguage("lucene");
sp.setQuery("@" + LuceneQueryParser.escape(mlQName.toString()) + ":바나나"); sp.setQuery("@" + LuceneQueryParser.escape(mlQName.toString()) + ":바나나");
sp.addLocale(new Locale("ko")); sp.addLocale(new Locale("ko"));
results = searcher.query(sp); results = searcher.query(sp);
assertEquals(1, results.length()); assertEquals(1, results.length());
results.close(); results.close();
sp = new SearchParameters(); sp = new SearchParameters();
sp.addStore(rootNodeRef.getStoreRef()); sp.addStore(rootNodeRef.getStoreRef());
sp.setLanguage("lucene"); sp.setLanguage("lucene");
sp.setQuery("@" + LuceneQueryParser.escape(mlQName.toString()) + ":banana"); sp.setQuery("@" + LuceneQueryParser.escape(mlQName.toString()) + ":banana");
sp.addLocale(new Locale("pt")); sp.addLocale(new Locale("pt"));
results = searcher.query(sp); results = searcher.query(sp);
assertEquals(1, results.length()); assertEquals(1, results.length());
results.close(); results.close();
sp = new SearchParameters(); sp = new SearchParameters();
sp.addStore(rootNodeRef.getStoreRef()); sp.addStore(rootNodeRef.getStoreRef());
sp.setLanguage("lucene"); sp.setLanguage("lucene");
sp.setQuery("@" + LuceneQueryParser.escape(mlQName.toString()) + ":банан"); sp.setQuery("@" + LuceneQueryParser.escape(mlQName.toString()) + ":банан");
sp.addLocale(new Locale("ru")); sp.addLocale(new Locale("ru"));
results = searcher.query(sp);
assertEquals(1, results.length());
results.close();
sp = new SearchParameters();
sp.addStore(rootNodeRef.getStoreRef());
sp.setLanguage("lucene");
sp.setQuery("@" + LuceneQueryParser.escape(mlQName.toString()) + ":plátano");
sp.addLocale(new Locale("es"));
results = searcher.query(sp);
assertEquals(1, results.length());
results.close();
// Test ISNULL/ISNOTNULL
sp = new SearchParameters();
sp.addStore(rootNodeRef.getStoreRef());
sp.setLanguage("lucene");
sp.setQuery("ISNULL:\"" + QName.createQName(TEST_NAMESPACE, "null").toString() + "\"");
results = searcher.query(sp); results = searcher.query(sp);
assertEquals(1, results.length()); assertEquals(1, results.length());
results.close(); results.close();
@@ -2067,18 +2254,35 @@ public class LuceneTest2 extends TestCase
sp = new SearchParameters(); sp = new SearchParameters();
sp.addStore(rootNodeRef.getStoreRef()); sp.addStore(rootNodeRef.getStoreRef());
sp.setLanguage("lucene"); sp.setLanguage("lucene");
sp.setQuery("@" + LuceneQueryParser.escape(mlQName.toString()) + ":plátano"); sp.setQuery("ISNULL:\"" + QName.createQName(TEST_NAMESPACE, "path-ista").toString() + "\"");
sp.addLocale(new Locale("es")); results = searcher.query(sp);
assertEquals(0, results.length());
results.close();
sp = new SearchParameters();
sp.addStore(rootNodeRef.getStoreRef());
sp.setLanguage("lucene");
sp.setQuery("ISNOTNULL:\"" + QName.createQName(TEST_NAMESPACE, "null").toString() + "\"");
results = searcher.query(sp);
assertEquals(0, results.length());
results.close();
sp = new SearchParameters();
sp.addStore(rootNodeRef.getStoreRef());
sp.setLanguage("lucene");
sp.setQuery("ISNOTNULL:\"" + QName.createQName(TEST_NAMESPACE, "path-ista").toString() + "\"");
results = searcher.query(sp); results = searcher.query(sp);
assertEquals(1, results.length()); assertEquals(1, results.length());
results.close(); results.close();
// Test non field queries
// Test non field queries
results = searcher.query(rootNodeRef.getStoreRef(), "lucene", "TEXT:fox", null, null); results = searcher.query(rootNodeRef.getStoreRef(), "lucene", "TEXT:fox", null, null);
assertEquals(1, results.length()); assertEquals(1, results.length());
results.close(); results.close();
results = searcher.query(rootNodeRef.getStoreRef(), "lucene", "TEXT:fo*", null, null); results = searcher.query(rootNodeRef.getStoreRef(), "lucene", "TEXT:fo*", null, null);
assertEquals(1, results.length()); assertEquals(1, results.length());
results.close(); results.close();
@@ -2090,50 +2294,50 @@ public class LuceneTest2 extends TestCase
results = searcher.query(rootNodeRef.getStoreRef(), "lucene", "TEXT:*ox", null, null); results = searcher.query(rootNodeRef.getStoreRef(), "lucene", "TEXT:*ox", null, null);
assertEquals(1, results.length()); assertEquals(1, results.length());
results.close(); results.close();
results = searcher.query(rootNodeRef.getStoreRef(), "lucene", "@"
+ LuceneQueryParser.escape(ContentModel.PROP_CONTENT.toString()) +":fox", null, null);
assertEquals(1, results.length());
results.close();
results = searcher.query(rootNodeRef.getStoreRef(), "lucene", "@"
+ LuceneQueryParser.escape(ContentModel.PROP_CONTENT.toString()) +":fo*", null, null);
assertEquals(1, results.length());
results.close();
results = searcher.query(rootNodeRef.getStoreRef(), "lucene", "@"
+ LuceneQueryParser.escape(ContentModel.PROP_CONTENT.toString()) +":f*x", null, null);
assertEquals(1, results.length());
results.close();
results = searcher.query(rootNodeRef.getStoreRef(), "lucene", "@"
+ LuceneQueryParser.escape(ContentModel.PROP_CONTENT.toString()) +":*ox", null, null);
assertEquals(1, results.length());
results.close();
results = searcher.query(rootNodeRef.getStoreRef(), "lucene", "@"
+ LuceneQueryParser.escape(ContentModel.PROP_CONTENT.toPrefixString(namespacePrefixResolver)) +":fox", null, null);
assertEquals(1, results.length());
results.close();
results = searcher.query(rootNodeRef.getStoreRef(), "lucene", "@"
+ LuceneQueryParser.escape(ContentModel.PROP_CONTENT.toPrefixString(namespacePrefixResolver)) +":fo*", null, null);
assertEquals(1, results.length());
results.close();
results = searcher.query(rootNodeRef.getStoreRef(), "lucene", "@"
+ LuceneQueryParser.escape(ContentModel.PROP_CONTENT.toPrefixString(namespacePrefixResolver)) +":f*x", null, null);
assertEquals(1, results.length());
results.close();
results = searcher.query(rootNodeRef.getStoreRef(), "lucene", "@"
+ LuceneQueryParser.escape(ContentModel.PROP_CONTENT.toPrefixString(namespacePrefixResolver)) +":*ox", null, null);
assertEquals(1, results.length());
results.close();
results = searcher.query(rootNodeRef.getStoreRef(), "lucene", "@"
+ LuceneQueryParser.escape(ContentModel.PROP_CONTENT.toString()) + ":fox", null, null);
assertEquals(1, results.length());
results.close();
results = searcher.query(rootNodeRef.getStoreRef(), "lucene", "@"
+ LuceneQueryParser.escape(ContentModel.PROP_CONTENT.toString()) + ":fo*", null, null);
assertEquals(1, results.length());
results.close();
results = searcher.query(rootNodeRef.getStoreRef(), "lucene", "@"
+ LuceneQueryParser.escape(ContentModel.PROP_CONTENT.toString()) + ":f*x", null, null);
assertEquals(1, results.length());
results.close();
results = searcher.query(rootNodeRef.getStoreRef(), "lucene", "@"
+ LuceneQueryParser.escape(ContentModel.PROP_CONTENT.toString()) + ":*ox", null, null);
assertEquals(1, results.length());
results.close();
results = searcher.query(rootNodeRef.getStoreRef(), "lucene", "@"
+ LuceneQueryParser.escape(ContentModel.PROP_CONTENT.toPrefixString(namespacePrefixResolver)) + ":fox",
null, null);
assertEquals(1, results.length());
results.close();
results = searcher.query(rootNodeRef.getStoreRef(), "lucene", "@"
+ LuceneQueryParser.escape(ContentModel.PROP_CONTENT.toPrefixString(namespacePrefixResolver)) + ":fo*",
null, null);
assertEquals(1, results.length());
results.close();
results = searcher.query(rootNodeRef.getStoreRef(), "lucene", "@"
+ LuceneQueryParser.escape(ContentModel.PROP_CONTENT.toPrefixString(namespacePrefixResolver)) + ":f*x",
null, null);
assertEquals(1, results.length());
results.close();
results = searcher.query(rootNodeRef.getStoreRef(), "lucene", "@"
+ LuceneQueryParser.escape(ContentModel.PROP_CONTENT.toPrefixString(namespacePrefixResolver)) + ":*ox",
null, null);
assertEquals(1, results.length());
results.close();
// Parameters // Parameters
@@ -3661,6 +3865,7 @@ public class LuceneTest2 extends TestCase
DynamicNamespacePrefixResolver nspr = new DynamicNamespacePrefixResolver(null); DynamicNamespacePrefixResolver nspr = new DynamicNamespacePrefixResolver(null);
nspr.registerNamespace(NamespaceService.ALFRESCO_PREFIX, NamespaceService.ALFRESCO_URI); nspr.registerNamespace(NamespaceService.ALFRESCO_PREFIX, NamespaceService.ALFRESCO_URI);
nspr.registerNamespace(NamespaceService.CONTENT_MODEL_PREFIX, NamespaceService.CONTENT_MODEL_1_0_URI); nspr.registerNamespace(NamespaceService.CONTENT_MODEL_PREFIX, NamespaceService.CONTENT_MODEL_1_0_URI);
nspr.registerNamespace(NamespaceService.DICTIONARY_MODEL_PREFIX, NamespaceService.DICTIONARY_MODEL_1_0_URI);
nspr.registerNamespace("namespace", "namespace"); nspr.registerNamespace("namespace", "namespace");
nspr.registerNamespace("test", TEST_NAMESPACE); nspr.registerNamespace("test", TEST_NAMESPACE);
nspr.registerNamespace(NamespaceService.DEFAULT_PREFIX, defaultURI); nspr.registerNamespace(NamespaceService.DEFAULT_PREFIX, defaultURI);

View File

@@ -238,6 +238,26 @@
<stored>true</stored> <stored>true</stored>
<tokenised>true</tokenised> <tokenised>true</tokenised>
</index> </index>
</property>
<property name="test:null">
<type>d:text</type>
<mandatory>false</mandatory>
<multiple>false</multiple>
<index enabled="true">
<atomic>true</atomic>
<stored>true</stored>
<tokenised>true</tokenised>
</index>
</property>
<property name="test:path-ista">
<type>d:path</type>
<mandatory>false</mandatory>
<multiple>false</multiple>
<index enabled="true">
<atomic>true</atomic>
<stored>true</stored>
<tokenised>true</tokenised>
</index>
</property> </property>
</properties> </properties>
<mandatory-aspects> <mandatory-aspects>

View File

@@ -16,6 +16,8 @@ package org.alfresco.repo.search.impl.lucene;
* limitations under the License. * limitations under the License.
*/ */
import java.util.Vector;
import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.BooleanQuery;
@@ -23,8 +25,6 @@ import org.apache.lucene.search.MultiPhraseQuery;
import org.apache.lucene.search.PhraseQuery; import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.Query; import org.apache.lucene.search.Query;
import java.util.Vector;
/** /**
* A QueryParser which constructs queries to search multiple fields. * A QueryParser which constructs queries to search multiple fields.
* *

View File

@@ -0,0 +1,93 @@
/**
*
*/
package org.alfresco.repo.search.impl.lucene;
import java.io.IOException;
import java.io.Reader;
class MultiReader extends Reader
{
Reader first;
Reader second;
boolean firstActive = true;
MultiReader(Reader first, Reader second)
{
this.first = first;
this.second = second;
}
@Override
public void close() throws IOException
{
IOException ioe = null;
try
{
first.close();
}
catch (IOException e)
{
ioe = e;
}
second.close();
if (ioe != null)
{
throw ioe;
}
}
@Override
public int read(char[] cbuf, int off, int len) throws IOException
{
synchronized (lock)
{
if ((off < 0) || (off > cbuf.length) || (len < 0) || ((off + len) > cbuf.length) || ((off + len) < 0))
{
throw new IndexOutOfBoundsException();
}
else if (len == 0)
{
return 0;
}
for(int i = 0; i < len; i++)
{
int c;
if(firstActive)
{
c = first.read();
if(c == -1)
{
firstActive = false;
c = second.read();
}
}
else
{
c = second.read();
}
if(c == -1)
{
if(i == 0)
{
return -1;
}
else
{
return i;
}
}
else
{
cbuf[off+i] = (char)c;
}
}
return len;
}
}
}

View File

@@ -0,0 +1,85 @@
package org.alfresco.repo.search.impl.lucene;
import java.io.IOException;
import java.io.Reader;
import java.io.StringReader;
import junit.framework.TestCase;
public class MultiReaderTest extends TestCase
{
public MultiReaderTest()
{
super();
}
public MultiReaderTest(String arg0)
{
super(arg0);
}
public void testMultiReader_single() throws IOException
{
String first = "my first string";
String second = "another little string";
StringReader one = new StringReader(first);
StringReader two = new StringReader(second);
Reader multiReader = new MultiReader(one, two);
StringBuilder builder = new StringBuilder();
int c;
while ((c = multiReader.read()) != -1)
{
builder.append((char) c);
}
assertEquals(builder.toString(), first + second);
}
public void testMultiReader_bits() throws IOException
{
String first = "my first string";
String second = "another little string";
StringReader one = new StringReader(first);
StringReader two = new StringReader(second);
Reader multiReader = new MultiReader(one, two);
StringBuilder builder = new StringBuilder();
for (int chunk = 1; chunk < 100; chunk++)
{
char[] c = new char[chunk];
int i = 0;
while (i != -1)
{
i = multiReader.read(c);
for (int j = 0; j < i; j++)
{
builder.append(c[j]);
}
}
assertEquals(builder.toString(), first + second);
}
}
public void testSkip() throws IOException
{
String first = "my first string";
String second = "another little string";
StringReader one = new StringReader(first);
StringReader two = new StringReader(second);
Reader multiReader = new MultiReader(one, two);
multiReader.skip(3);
String all = first + second;
assertEquals((char)multiReader.read(), all.charAt(3));
multiReader.skip(15);
assertEquals((char)multiReader.read(), all.charAt(3+15+1));
}
}

View File

@@ -1,14 +1,31 @@
/* Generated By:JavaCC: Do not edit this line. QueryParser.java */ /* Generated By:JavaCC: Do not edit this line. QueryParser.java */
package org.alfresco.repo.search.impl.lucene; package org.alfresco.repo.search.impl.lucene;
import java.io.IOException;
import java.io.StringReader;
import java.text.DateFormat;
import java.util.ArrayList;
import java.util.Calendar;
import java.util.Date;
import java.util.List;
import java.util.Locale;
import java.util.Vector; import java.util.Vector;
import java.io.*;
import java.text.*; import org.apache.lucene.analysis.Analyzer;
import java.util.*; import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.document.DateField;
import org.apache.lucene.document.DateTools;
import org.apache.lucene.index.Term; import org.apache.lucene.index.Term;
import org.apache.lucene.analysis.*; import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.document.*; import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.*; import org.apache.lucene.search.FuzzyQuery;
import org.apache.lucene.search.MultiPhraseQuery;
import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.PrefixQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.RangeQuery;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.WildcardQuery;
import org.apache.lucene.util.Parameter; import org.apache.lucene.util.Parameter;
/** /**

View File

@@ -1,14 +1,5 @@
/* Generated By:JavaCC: Do not edit this line. QueryParserTokenManager.java */ /* Generated By:JavaCC: Do not edit this line. QueryParserTokenManager.java */
package org.alfresco.repo.search.impl.lucene; package org.alfresco.repo.search.impl.lucene;
import java.util.Vector;
import java.io.*;
import java.text.*;
import java.util.*;
import org.apache.lucene.index.Term;
import org.apache.lucene.analysis.*;
import org.apache.lucene.document.*;
import org.apache.lucene.search.*;
import org.apache.lucene.util.Parameter;
public class QueryParserTokenManager implements QueryParserConstants public class QueryParserTokenManager implements QueryParserConstants
{ {

View File

@@ -18,7 +18,6 @@ package org.alfresco.repo.search.impl.lucene.analysis;
import java.util.LinkedList; import java.util.LinkedList;
import java.util.Queue; import java.util.Queue;
import java.util.Stack;
import java.util.StringTokenizer; import java.util.StringTokenizer;
import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenFilter;

View File

@@ -22,7 +22,6 @@ import java.io.Reader;
import org.apache.lucene.analysis.Token; import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.WhitespaceTokenizer; import org.apache.lucene.analysis.WhitespaceTokenizer;
import org.apache.lucene.analysis.standard.StandardTokenizer;
/** /**
* Simple tokeniser for floats. * Simple tokeniser for floats.

View File

@@ -22,7 +22,6 @@ import java.io.Reader;
import org.apache.lucene.analysis.Token; import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.WhitespaceTokenizer; import org.apache.lucene.analysis.WhitespaceTokenizer;
import org.apache.lucene.analysis.standard.StandardTokenizer;
/** /**
* Simple tokeniser for integers. * Simple tokeniser for integers.

View File

@@ -19,11 +19,9 @@ package org.alfresco.repo.search.impl.lucene.analysis;
import java.io.IOException; import java.io.IOException;
import java.io.Reader; import java.io.Reader;
import org.alfresco.error.AlfrescoRuntimeException;
import org.apache.lucene.analysis.Token; import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.WhitespaceTokenizer; import org.apache.lucene.analysis.WhitespaceTokenizer;
import org.apache.lucene.analysis.standard.StandardTokenizer;
/** /**
* Simple tokeniser for longs. * Simple tokeniser for longs.

View File

@@ -0,0 +1,9 @@
package org.alfresco.repo.search.impl.lucene.analysis;
public class LowerCaseVerbatimAnalyser extends VerbatimAnalyser
{
public LowerCaseVerbatimAnalyser()
{
super(true);
}
}

View File

@@ -7,7 +7,7 @@ import java.util.HashMap;
import java.util.Locale; import java.util.Locale;
import org.alfresco.i18n.I18NUtil; import org.alfresco.i18n.I18NUtil;
import org.alfresco.repo.search.impl.lucene.LuceneQueryParser; import org.alfresco.repo.search.MLAnalysisMode;
import org.alfresco.service.cmr.dictionary.DataTypeDefinition; import org.alfresco.service.cmr.dictionary.DataTypeDefinition;
import org.alfresco.service.cmr.dictionary.DictionaryService; import org.alfresco.service.cmr.dictionary.DictionaryService;
import org.apache.log4j.Logger; import org.apache.log4j.Logger;
@@ -21,10 +21,13 @@ public class MLAnalayser extends Analyzer
private DictionaryService dictionaryService; private DictionaryService dictionaryService;
private HashMap<Locale, Analyzer> analysers = new HashMap<Locale, Analyzer>(); private HashMap<Locale, Analyzer> analysers = new HashMap<Locale, Analyzer>();
private MLAnalysisMode mlAnalaysisMode;
public MLAnalayser(DictionaryService dictionaryService) public MLAnalayser(DictionaryService dictionaryService, MLAnalysisMode mlAnalaysisMode)
{ {
this.dictionaryService = dictionaryService; this.dictionaryService = dictionaryService;
this.mlAnalaysisMode = mlAnalaysisMode;
} }
@Override @Override
@@ -107,7 +110,7 @@ public class MLAnalayser extends Analyzer
} }
Locale locale = new Locale(language, country, varient); Locale locale = new Locale(language, country, varient);
// leave the reader where it is .... // leave the reader where it is ....
return new MLTokenDuplicator(getAnalyser(locale).tokenStream(fieldName, breader), locale, breader); return new MLTokenDuplicator(getAnalyser(locale).tokenStream(fieldName, breader), locale, breader, mlAnalaysisMode);
} }
else else
{ {

View File

@@ -3,39 +3,36 @@ package org.alfresco.repo.search.impl.lucene.analysis;
import java.io.IOException; import java.io.IOException;
import java.io.Reader; import java.io.Reader;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.HashSet;
import java.util.Iterator; import java.util.Iterator;
import java.util.Locale; import java.util.Locale;
import org.alfresco.repo.search.MLAnalysisMode;
import org.apache.log4j.Logger;
import org.apache.lucene.analysis.Token; import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.Tokenizer;
/** /**
* Create duplicate tokens for multilingual varients * Create duplicate tokens for multilingual varients The forms are Tokens: Token - all languages {fr}Token - if a
* * language is specified {fr_CA}Token - if a language and country is specified {fr_CA_Varient}Token - for all three
* The forms are
*
* Tokens:
* Token - all languages
* {fr}Token - if a language is specified
* {fr_CA}Token - if a language and country is specified
* {fr_CA_Varient}Token - for all three
* {fr__Varient}Token - for a language varient with no country * {fr__Varient}Token - for a language varient with no country
* *
* @author andyh * @author andyh
*
*/ */
public class MLTokenDuplicator extends Tokenizer public class MLTokenDuplicator extends Tokenizer
{ {
private static Logger s_logger = Logger.getLogger(MLTokenDuplicator.class);
TokenStream source; TokenStream source;
Locale locale; Locale locale;
Iterator<Token> it; Iterator<Token> it;
ArrayList<String> prefixes; HashSet<String> prefixes;
public MLTokenDuplicator(TokenStream source, Locale locale, Reader reader) public MLTokenDuplicator(TokenStream source, Locale locale, Reader reader, MLAnalysisMode mlAnalaysisMode)
{ {
super(reader); super(reader);
this.source = source; this.source = source;
@@ -45,27 +42,92 @@ public class MLTokenDuplicator extends Tokenizer
boolean c = locale.getCountry().length() != 0; boolean c = locale.getCountry().length() != 0;
boolean v = locale.getVariant().length() != 0; boolean v = locale.getVariant().length() != 0;
prefixes = new ArrayList<String>(4); prefixes = new HashSet<String>(4);
prefixes.add(""); if (mlAnalaysisMode.includesAll())
{
prefixes.add("");
}
if (l) if (mlAnalaysisMode.includesExact())
{ {
StringBuffer result = new StringBuffer(); StringBuffer result = new StringBuffer();
result.append("{").append(locale.getLanguage()).append("}"); result.append("{").append(locale.toString()).append("}");
prefixes.add(result.toString()); prefixes.add(result.toString());
result.deleteCharAt(result.length()-1); }
if (c || (l && v)) if (mlAnalaysisMode.includesContaining())
{
if (v)
{ {
result.append('_').append(locale.getCountry()).append("}"); Locale noVarient = new Locale(locale.getLanguage(), locale.getCountry(), "");
StringBuffer result = new StringBuffer();
result.append("{").append(noVarient.toString()).append("}");
prefixes.add(result.toString()); prefixes.add(result.toString());
result.deleteCharAt(result.length()-1);
} Locale noCountry = new Locale(locale.getLanguage(), "", "");
if (v && (l || c)) result = new StringBuffer();
{ result.append("{").append(noCountry.toString()).append("}");
result.append('_').append(locale.getVariant()).append("}");
prefixes.add(result.toString()); prefixes.add(result.toString());
} }
if (c)
{
Locale noCountry = new Locale(locale.getLanguage(), "", "");
StringBuffer result = new StringBuffer();
result.append("{").append(noCountry.toString()).append("}");
prefixes.add(result.toString());
}
}
if (mlAnalaysisMode.includesContained())
{
// varients have not contained
if (!v)
{
if (!c)
{
if (!l)
{
// All
for (Locale toAdd : Locale.getAvailableLocales())
{
StringBuffer result = new StringBuffer();
result.append("{").append(toAdd.toString()).append("}");
prefixes.add(result.toString());
}
}
else
{
// All that match language
for (Locale toAdd : Locale.getAvailableLocales())
{
if (locale.getLanguage().equals(toAdd.getLanguage()))
{
StringBuffer result = new StringBuffer();
result.append("{").append(toAdd.toString()).append("}");
prefixes.add(result.toString());
}
}
}
}
else
{
// All that match language and country
for (Locale toAdd : Locale.getAvailableLocales())
{
if ((locale.getLanguage().equals(toAdd.getLanguage()))
&& (locale.getCountry().equals(toAdd.getCountry())))
{
StringBuffer result = new StringBuffer();
result.append("{").append(toAdd.toString()).append("}");
prefixes.add(result.toString());
}
}
}
}
}
if(s_logger.isDebugEnabled())
{
s_logger.debug("Locale "+ locale +" using "+mlAnalaysisMode+" is "+prefixes);
} }
} }
@@ -81,7 +143,7 @@ public class MLTokenDuplicator extends Tokenizer
{ {
return null; return null;
} }
if(it.hasNext()) if (it.hasNext())
{ {
return it.next(); return it.next();
} }
@@ -99,12 +161,12 @@ public class MLTokenDuplicator extends Tokenizer
{ {
return null; return null;
} }
ArrayList<Token> tokens = new ArrayList<Token>(prefixes.size()); ArrayList<Token> tokens = new ArrayList<Token>(prefixes.size());
for(String prefix : prefixes) for (String prefix : prefixes)
{ {
Token newToken = new Token(prefix+token.termText(), token.startOffset(), token.endOffset(), token.type()); Token newToken = new Token(prefix + token.termText(), token.startOffset(), token.endOffset(), token.type());
if(tokens.size() == 0) if (tokens.size() == 0)
{ {
newToken.setPositionIncrement(token.getPositionIncrement()); newToken.setPositionIncrement(token.getPositionIncrement());
} }
@@ -118,5 +180,4 @@ public class MLTokenDuplicator extends Tokenizer
} }
} }

View File

@@ -19,11 +19,11 @@ package org.alfresco.repo.search.impl.lucene.analysis;
import java.io.IOException; import java.io.IOException;
import java.io.StringReader; import java.io.StringReader;
import junit.framework.TestCase;
import org.apache.lucene.analysis.Token; import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.TokenStream;
import junit.framework.TestCase;
public class PathTokenFilterTest extends TestCase public class PathTokenFilterTest extends TestCase
{ {

View File

@@ -5,18 +5,23 @@ import java.io.Reader;
import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.TokenStream;
public class VerbatimAnalyser public class VerbatimAnalyser extends Analyzer
extends Analyzer
{ {
boolean lowerCase;
public VerbatimAnalyser() public VerbatimAnalyser()
{
lowerCase = false;
}
public VerbatimAnalyser(boolean lowerCase)
{ {
super(); super();
this.lowerCase = lowerCase;
} }
public TokenStream tokenStream(String fieldName, Reader reader) public TokenStream tokenStream(String fieldName, Reader reader)
{ {
return new VerbatimTokenFilter(reader); return new VerbatimTokenFilter(reader, lowerCase);
} }
} }

View File

@@ -10,9 +10,12 @@ public class VerbatimTokenFilter extends Tokenizer
{ {
boolean readInput = true; boolean readInput = true;
VerbatimTokenFilter(Reader in) boolean lowerCase;
VerbatimTokenFilter(Reader in, boolean lowerCase)
{ {
super(in); super(in);
this.lowerCase = lowerCase;
} }
@Override @Override
@@ -31,6 +34,10 @@ public class VerbatimTokenFilter extends Tokenizer
} }
String token = buffer.toString(); String token = buffer.toString();
if(lowerCase)
{
token = token.toLowerCase();
}
return new Token(token, 0, token.length() - 1, "VERBATIM"); return new Token(token, 0, token.length() - 1, "VERBATIM");
} }
else else

View File

@@ -18,8 +18,10 @@ package org.alfresco.service.cmr.search;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Collections; import java.util.Collections;
import java.util.HashSet;
import java.util.List; import java.util.List;
import java.util.Locale; import java.util.Locale;
import java.util.Set;
import org.alfresco.repo.search.MLAnalysisMode; import org.alfresco.repo.search.MLAnalysisMode;
import org.alfresco.service.cmr.repository.Path; import org.alfresco.service.cmr.repository.Path;
@@ -94,6 +96,10 @@ public class SearchParameters extends SearchStatement
private PermissionEvaluationMode permissionEvaluation = PermissionEvaluationMode.EAGER; private PermissionEvaluationMode permissionEvaluation = PermissionEvaluationMode.EAGER;
private int limit = DEFAULT_LIMIT; private int limit = DEFAULT_LIMIT;
private HashSet<String> allAttributes = new HashSet<String>();
private HashSet<String> textAttributes = new HashSet<String>();
/** /**
* Default constructor * Default constructor
@@ -351,6 +357,52 @@ public class SearchParameters extends SearchStatement
{ {
return Collections.unmodifiableList(locales); return Collections.unmodifiableList(locales);
} }
/**
* Add a locale to include for multi-lingual text searches.
* If non are set, the default is to use the user's locale.
*
* @param locale
*/
public void addTextAttribute(String attribute)
{
textAttributes.add(attribute);
}
/**
* Get the locales used for multi-lingual text searches.
*
* @return
*/
public Set<String> getTextAttributes()
{
return Collections.unmodifiableSet(textAttributes);
}
/**
* Add a locale to include for multi-lingual text searches.
* If non are set, the default is to use the user's locale.
*
* @param locale
*/
public void addAllAttribute(String attribute)
{
allAttributes.add(attribute);
}
/**
* Get the locales used for multi-lingual text searches.
*
* @return
*/
public Set<String> getAllAttributes()
{
return Collections.unmodifiableSet(allAttributes);
}
/** /**
* A helper class for sort definition. Encapsulated using the lucene sortType, field name and a flag for * A helper class for sort definition. Encapsulated using the lucene sortType, field name and a flag for