diff --git a/source/java/org/alfresco/repo/search/MLAnalysisMode.java b/source/java/org/alfresco/repo/search/MLAnalysisMode.java index 9aae62f24e..cd7c32dd24 100644 --- a/source/java/org/alfresco/repo/search/MLAnalysisMode.java +++ b/source/java/org/alfresco/repo/search/MLAnalysisMode.java @@ -13,30 +13,144 @@ public enum MLAnalysisMode /** * Only exact locale is used. */ - LOCALE_ONLY, + LOCALE_ONLY + { + public boolean includesAll() + { + return false; + } + public boolean includesContained() + { + return false; + } + public boolean includesContaining() + { + return false; + } + public boolean includesExact() + { + return true; + } + + }, /** * Only the exact locale and no local === all lnaguages */ - LOCALE_AND_ALL, + LOCALE_AND_ALL + { + public boolean includesAll() + { + return true; + } + public boolean includesContained() + { + return false; + } + public boolean includesContaining() + { + return false; + } + public boolean includesExact() + { + return true; + } + }, /** * Expand the locale to include all the locales that contain it. * en_GB would be en_GB, en, but not all languages */ - LOCALE_AND_ALL_CONTAINING_LOCALES, + LOCALE_AND_ALL_CONTAINING_LOCALES + { + public boolean includesAll() + { + return false; + } + public boolean includesContained() + { + return false; + } + public boolean includesContaining() + { + return true; + } + public boolean includesExact() + { + return true; + } + }, /** * Expand the locale to include all the locales that contain it. * en_GB would be en_GB, en, and all. */ - LOCALE_AND_ALL_CONTAINING_LOCALES_AND_ALL, + LOCALE_AND_ALL_CONTAINING_LOCALES_AND_ALL + { + public boolean includesAll() + { + return true; + } + public boolean includesContained() + { + return false; + } + public boolean includesContaining() + { + return true; + } + public boolean includesExact() + { + return true; + } + }, /** * Expand to all the locales that are contained by this. * en would expand to en, en_GB, en_US, .... */ - LOCAL_AND_ALL_CONTAINED_LOCALES; + LOCALE_AND_ALL_CONTAINED_LOCALES + { + public boolean includesAll() + { + return false; + } + public boolean includesContained() + { + return true; + } + public boolean includesContaining() + { + return false; + } + public boolean includesExact() + { + return true; + } + }, + + /** + * No prefix only + */ + ALL_ONLY + { + public boolean includesAll() + { + return true; + } + public boolean includesContained() + { + return false; + } + public boolean includesContaining() + { + return false; + } + public boolean includesExact() + { + return false; + } + }; public static MLAnalysisMode getMLAnalysisMode(String mode) { @@ -49,4 +163,13 @@ public enum MLAnalysisMode } throw new AlfrescoRuntimeException("Unknown ML Analysis mode "+mode); } + + public abstract boolean includesAll(); + + public abstract boolean includesContained(); + + public abstract boolean includesContaining(); + + public abstract boolean includesExact(); + } diff --git a/source/java/org/alfresco/repo/search/impl/lucene/FastCharStream.java b/source/java/org/alfresco/repo/search/impl/lucene/FastCharStream.java index 762b46b1ea..04d659a096 100644 --- a/source/java/org/alfresco/repo/search/impl/lucene/FastCharStream.java +++ b/source/java/org/alfresco/repo/search/impl/lucene/FastCharStream.java @@ -17,7 +17,8 @@ package org.alfresco.repo.search.impl.lucene; * limitations under the License. */ -import java.io.*; +import java.io.IOException; +import java.io.Reader; /** An efficient implementation of JavaCC's CharStream interface.

Note that * this does not do line-number counting, but instead keeps track of the diff --git a/source/java/org/alfresco/repo/search/impl/lucene/LuceneAnalyser.java b/source/java/org/alfresco/repo/search/impl/lucene/LuceneAnalyser.java index 8aa403d7cd..5aa141b2dd 100644 --- a/source/java/org/alfresco/repo/search/impl/lucene/LuceneAnalyser.java +++ b/source/java/org/alfresco/repo/search/impl/lucene/LuceneAnalyser.java @@ -21,21 +21,23 @@ import java.util.HashMap; import java.util.Map; import org.alfresco.repo.search.MLAnalysisMode; +import org.alfresco.repo.search.impl.lucene.analysis.AlfrescoStandardAnalyser; +import org.alfresco.repo.search.impl.lucene.analysis.LongAnalyser; import org.alfresco.repo.search.impl.lucene.analysis.MLAnalayser; import org.alfresco.repo.search.impl.lucene.analysis.PathAnalyser; import org.alfresco.repo.search.impl.lucene.analysis.VerbatimAnalyser; +import org.alfresco.service.cmr.dictionary.DataTypeDefinition; import org.alfresco.service.cmr.dictionary.DictionaryService; import org.alfresco.service.cmr.dictionary.PropertyDefinition; -import org.alfresco.service.cmr.dictionary.DataTypeDefinition; import org.alfresco.service.namespace.QName; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.WhitespaceAnalyzer; -import org.alfresco.repo.search.impl.lucene.analysis.AlfrescoStandardAnalyser; /** - * Analyse properties according to the property definition. The default is to use the standard tokeniser. The tokeniser should not have been called when indexing properties that - * require no tokenisation. (tokenise should be set to false when adding the field to the document) + * Analyse properties according to the property definition. The default is to use the standard tokeniser. The tokeniser + * should not have been called when indexing properties that require no tokenisation. (tokenise should be set to false + * when adding the field to the document) * * @author andyh */ @@ -82,7 +84,7 @@ public class LuceneAnalyser extends Analyzer // Treat multilingual as a special case. // If multilingual then we need to find the correct tokeniser. // This is done dynamically by reading a language code at the start of the reader. - if (fieldName.startsWith("@") && !fieldName.endsWith(".mimetype")) + if (fieldName.startsWith("@")) { QName propertyQName = QName.createQName(fieldName.substring(1)); PropertyDefinition propertyDef = dictionaryService.getProperty(propertyQName); @@ -90,7 +92,7 @@ public class LuceneAnalyser extends Analyzer { if (propertyDef.getDataType().getName().equals(DataTypeDefinition.MLTEXT)) { - MLAnalayser analyser = new MLAnalayser(dictionaryService); + MLAnalayser analyser = new MLAnalayser(dictionaryService, mlAlaysisMode); return analyser.tokenStream(fieldName, reader); } } @@ -133,17 +135,20 @@ public class LuceneAnalyser extends Analyzer { analyser = new WhitespaceAnalyzer(); } - else if (fieldName.equals("TEXT")) - { - DataTypeDefinition dataType = dictionaryService.getDataType(DataTypeDefinition.TEXT); - analyser = loadAnalyzer(dataType); - } else if (fieldName.startsWith("@")) { if (fieldName.endsWith(".mimetype")) { analyser = new VerbatimAnalyser(); } + else if (fieldName.endsWith(".size")) + { + analyser = new LongAnalyser(); + } + else if (fieldName.endsWith(".locale")) + { + analyser = new VerbatimAnalyser(true); + } else { QName propertyQName = QName.createQName(fieldName.substring(1)); @@ -153,7 +158,18 @@ public class LuceneAnalyser extends Analyzer if (propertyDef.isTokenisedInIndex()) { DataTypeDefinition dataType = propertyDef.getDataType(); - analyser = loadAnalyzer(dataType); + if (dataType.getName().equals(DataTypeDefinition.CONTENT)) + { + analyser = new MLAnalayser(dictionaryService, MLAnalysisMode.ALL_ONLY); + } + else if (dataType.getName().equals(DataTypeDefinition.TEXT)) + { + analyser = new MLAnalayser(dictionaryService, MLAnalysisMode.ALL_ONLY); + } + else + { + analyser = loadAnalyzer(dataType); + } } else { @@ -208,7 +224,8 @@ public class LuceneAnalyser extends Analyzer } /** - * For multilingual fields we separate the tokens for each instance to break phrase queries spanning different languages etc. + * For multilingual fields we separate the tokens for each instance to break phrase queries spanning different + * languages etc. */ @Override public int getPositionIncrementGap(String fieldName) diff --git a/source/java/org/alfresco/repo/search/impl/lucene/LuceneBase2.java b/source/java/org/alfresco/repo/search/impl/lucene/LuceneBase2.java index a1096f23fa..df852a0d0a 100644 --- a/source/java/org/alfresco/repo/search/impl/lucene/LuceneBase2.java +++ b/source/java/org/alfresco/repo/search/impl/lucene/LuceneBase2.java @@ -21,7 +21,6 @@ import java.io.IOException; import java.util.Set; import org.alfresco.repo.search.IndexerException; -import org.alfresco.repo.search.MLAnalysisMode; import org.alfresco.repo.search.impl.lucene.index.IndexInfo; import org.alfresco.repo.search.impl.lucene.index.TransactionStatus; import org.alfresco.repo.search.impl.lucene.index.IndexInfo.LockWork; @@ -32,7 +31,6 @@ import org.apache.log4j.Logger; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.search.IndexSearcher; -import org.apache.lucene.search.Searcher; /** * Common support for abstracting the lucene indexer from its configuration and management requirements. diff --git a/source/java/org/alfresco/repo/search/impl/lucene/LuceneCategoryServiceImpl.java b/source/java/org/alfresco/repo/search/impl/lucene/LuceneCategoryServiceImpl.java index a3c7e394fd..9c2fddf2c8 100644 --- a/source/java/org/alfresco/repo/search/impl/lucene/LuceneCategoryServiceImpl.java +++ b/source/java/org/alfresco/repo/search/impl/lucene/LuceneCategoryServiceImpl.java @@ -40,7 +40,6 @@ import org.alfresco.service.cmr.search.ResultSetRow; import org.alfresco.service.namespace.NamespacePrefixResolver; import org.alfresco.service.namespace.QName; import org.alfresco.util.ISO9075; -import org.bouncycastle.crypto.paddings.ISO7816d4Padding; public class LuceneCategoryServiceImpl implements CategoryService { diff --git a/source/java/org/alfresco/repo/search/impl/lucene/LuceneIndexerAndSearcherFactory2.java b/source/java/org/alfresco/repo/search/impl/lucene/LuceneIndexerAndSearcherFactory2.java index da7e98ccee..5a37e53c6c 100644 --- a/source/java/org/alfresco/repo/search/impl/lucene/LuceneIndexerAndSearcherFactory2.java +++ b/source/java/org/alfresco/repo/search/impl/lucene/LuceneIndexerAndSearcherFactory2.java @@ -131,9 +131,9 @@ public class LuceneIndexerAndSearcherFactory2 implements LuceneIndexerAndSearche private String lockDirectory; - private MLAnalysisMode defaultMLIndexAnalysisMode = MLAnalysisMode.LOCALE_ONLY; + private MLAnalysisMode defaultMLIndexAnalysisMode = MLAnalysisMode.LOCALE_AND_ALL; - private MLAnalysisMode defaultMLSearchAnalysisMode = MLAnalysisMode.LOCALE_AND_ALL; + private MLAnalysisMode defaultMLSearchAnalysisMode = MLAnalysisMode.LOCALE_AND_ALL_CONTAINING_LOCALES_AND_ALL; /** * Private constructor for the singleton TODO: FIt in with IOC diff --git a/source/java/org/alfresco/repo/search/impl/lucene/LuceneIndexerImpl2.java b/source/java/org/alfresco/repo/search/impl/lucene/LuceneIndexerImpl2.java index 9ee498b1db..ac5b7eded1 100644 --- a/source/java/org/alfresco/repo/search/impl/lucene/LuceneIndexerImpl2.java +++ b/source/java/org/alfresco/repo/search/impl/lucene/LuceneIndexerImpl2.java @@ -19,7 +19,9 @@ package org.alfresco.repo.search.impl.lucene; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; +import java.io.Reader; import java.io.Serializable; +import java.io.StringReader; import java.io.UnsupportedEncodingException; import java.util.ArrayList; import java.util.Collection; @@ -85,7 +87,6 @@ import org.apache.lucene.search.BooleanClause.Occur; * The implementation of the lucene based indexer. Supports basic transactional behaviour if used on its own. * * @author andyh - * */ public class LuceneIndexerImpl2 extends LuceneBase2 implements LuceneIndexer2 { @@ -119,9 +120,8 @@ public class LuceneIndexerImpl2 extends LuceneBase2 implements LuceneIndexer2 private long maxAtomicTransformationTime = 20; /** - * A list of all deletions we have made - at merge these deletions need to be made against the main index. - * - * TODO: Consider if this information needs to be persisted for recovery + * A list of all deletions we have made - at merge these deletions need to be made against the main index. TODO: + * Consider if this information needs to be persisted for recovery */ private Set deletions = new LinkedHashSet(); @@ -140,8 +140,8 @@ public class LuceneIndexerImpl2 extends LuceneBase2 implements LuceneIndexer2 private boolean isModified = false; /** - * Flag to indicte if we are doing an in transactional delta or a batch update to the index. If true, we are just fixing up non atomically indexed things from one or more other - * updates. + * Flag to indicte if we are doing an in transactional delta or a batch update to the index. If true, we are just + * fixing up non atomically indexed things from one or more other updates. */ private Boolean isFTSUpdate = null; @@ -168,7 +168,6 @@ public class LuceneIndexerImpl2 extends LuceneBase2 implements LuceneIndexer2 /** * Default construction - * */ LuceneIndexerImpl2() { @@ -216,7 +215,6 @@ public class LuceneIndexerImpl2 extends LuceneBase2 implements LuceneIndexer2 /** * Utility method to check we are in the correct state to do work Also keeps track of the dirty flag. - * */ private void checkAbleToDoWork(boolean isFTS, boolean isModified) @@ -508,7 +506,7 @@ public class LuceneIndexerImpl2 extends LuceneBase2 implements LuceneIndexer2 case Status.STATUS_ACTIVE: // special case - commit from active prepare(); - // drop through to do the commit; + // drop through to do the commit; default: if (status != Status.STATUS_PREPARED) { @@ -585,37 +583,36 @@ public class LuceneIndexerImpl2 extends LuceneBase2 implements LuceneIndexer2 for (Helper helper : toFTSIndex) { - //BooleanQuery query = new BooleanQuery(); - //query.add(new TermQuery(new Term("ID", helper.nodeRef.toString())), true, false); - //query.add(new TermQuery(new Term("TX", helper.tx)), true, false); - //query.add(new TermQuery(new Term("ISNODE", "T")), false, false); + // BooleanQuery query = new BooleanQuery(); + // query.add(new TermQuery(new Term("ID", helper.nodeRef.toString())), true, false); + // query.add(new TermQuery(new Term("TX", helper.tx)), true, false); + // query.add(new TermQuery(new Term("ISNODE", "T")), false, false); deletions.add(helper.nodeRef); - - -// try -// { -// Hits hits = mainSearcher.search(query); -// if (hits.length() > 0) -// { -// for (int i = 0; i < hits.length(); i++) -// { -// mainReader.delete(hits.id(i)); -// } -// } -// else -// { -// hits = deltaSearcher.search(query); -// for (int i = 0; i < hits.length(); i++) -// { -// deltaReader.delete(hits.id(i)); -// } -// } -// } -// catch (IOException e) -// { -// throw new LuceneIndexException("Failed to delete an FTS update from the original index", e); -// } + + // try + // { + // Hits hits = mainSearcher.search(query); + // if (hits.length() > 0) + // { + // for (int i = 0; i < hits.length(); i++) + // { + // mainReader.delete(hits.id(i)); + // } + // } + // else + // { + // hits = deltaSearcher.search(query); + // for (int i = 0; i < hits.length(); i++) + // { + // deltaReader.delete(hits.id(i)); + // } + // } + // } + // catch (IOException e) + // { + // throw new LuceneIndexException("Failed to delete an FTS update from the original index", e); + // } } } @@ -688,11 +685,8 @@ public class LuceneIndexerImpl2 extends LuceneBase2 implements LuceneIndexer2 } /** - * Prepare to commit - * - * At the moment this makes sure we have all the locks - * - * TODO: This is not doing proper serialisation against the index as would a data base transaction. + * Prepare to commit At the moment this makes sure we have all the locks TODO: This is not doing proper + * serialisation against the index as would a data base transaction. * * @return */ @@ -766,7 +760,6 @@ public class LuceneIndexerImpl2 extends LuceneBase2 implements LuceneIndexer2 /** * Roll back the index changes (this just means they are never added) - * */ public void rollback() throws LuceneIndexException @@ -781,7 +774,7 @@ public class LuceneIndexerImpl2 extends LuceneBase2 implements LuceneIndexer2 case Status.STATUS_ROLLEDBACK: throw new IndexerException("Unable to roll back: Transaction is already rolled back"); case Status.STATUS_COMMITTING: - // Can roll back during commit + // Can roll back during commit default: status = Status.STATUS_ROLLING_BACK; // if (isModified()) @@ -807,8 +800,8 @@ public class LuceneIndexerImpl2 extends LuceneBase2 implements LuceneIndexer2 } /** - * Mark this index for roll back only. This action can not be reversed. It will reject all other work and only allow roll back. - * + * Mark this index for roll back only. This action can not be reversed. It will reject all other work and only allow + * roll back. */ public void setRollbackOnly() @@ -1242,7 +1235,8 @@ public class LuceneIndexerImpl2 extends LuceneBase2 implements LuceneIndexer2 Document xdoc = new Document(); xdoc.add(new Field("ID", nodeRef.toString(), Field.Store.YES, Field.Index.UN_TOKENIZED, Field.TermVector.NO)); - xdoc.add(new Field("TX", nodeStatus.getChangeTxnId(), Field.Store.YES, Field.Index.UN_TOKENIZED, Field.TermVector.NO)); + xdoc.add(new Field("TX", nodeStatus.getChangeTxnId(), Field.Store.YES, Field.Index.UN_TOKENIZED, + Field.TermVector.NO)); boolean isAtomic = true; for (QName propertyName : properties.keySet()) { @@ -1298,8 +1292,10 @@ public class LuceneIndexerImpl2 extends LuceneBase2 implements LuceneIndexer2 qNameBuffer.append(";/"); } qNameBuffer.append(ISO9075.getXPathName(qNameRef.getQName())); - xdoc.add(new Field("PARENT", qNameRef.getParentRef().toString(), Field.Store.YES, Field.Index.UN_TOKENIZED, Field.TermVector.NO)); - xdoc.add(new Field("ASSOCTYPEQNAME", ISO9075.getXPathName(qNameRef.getTypeQName()), Field.Store.YES, Field.Index.NO, Field.TermVector.NO)); + xdoc.add(new Field("PARENT", qNameRef.getParentRef().toString(), Field.Store.YES, + Field.Index.UN_TOKENIZED, Field.TermVector.NO)); + xdoc.add(new Field("ASSOCTYPEQNAME", ISO9075.getXPathName(qNameRef.getTypeQName()), + Field.Store.YES, Field.Index.NO, Field.TermVector.NO)); xdoc.add(new Field("LINKASPECT", (pair.getSecond() == null) ? "" : ISO9075.getXPathName(pair .getSecond()), Field.Store.YES, Field.Index.UN_TOKENIZED, Field.TermVector.NO)); } @@ -1322,17 +1318,22 @@ public class LuceneIndexerImpl2 extends LuceneBase2 implements LuceneIndexer2 if (directPaths.contains(pair.getFirst())) { Document directoryEntry = new Document(); - directoryEntry.add(new Field("ID", nodeRef.toString(), Field.Store.YES, Field.Index.UN_TOKENIZED, Field.TermVector.NO)); - directoryEntry.add(new Field("PATH", pathString, Field.Store.YES, Field.Index.TOKENIZED, Field.TermVector.NO)); + directoryEntry.add(new Field("ID", nodeRef.toString(), Field.Store.YES, + Field.Index.UN_TOKENIZED, Field.TermVector.NO)); + directoryEntry.add(new Field("PATH", pathString, Field.Store.YES, Field.Index.TOKENIZED, + Field.TermVector.NO)); for (NodeRef parent : getParents(pair.getFirst())) { - directoryEntry.add(new Field("ANCESTOR", parent.toString(), Field.Store.NO, Field.Index.UN_TOKENIZED, Field.TermVector.NO)); + directoryEntry.add(new Field("ANCESTOR", parent.toString(), Field.Store.NO, + Field.Index.UN_TOKENIZED, Field.TermVector.NO)); } - directoryEntry.add(new Field("ISCONTAINER", "T", Field.Store.YES, Field.Index.UN_TOKENIZED, Field.TermVector.NO)); + directoryEntry.add(new Field("ISCONTAINER", "T", Field.Store.YES, Field.Index.UN_TOKENIZED, + Field.TermVector.NO)); if (isCategory(getDictionaryService().getType(nodeService.getType(nodeRef)))) { - directoryEntry.add(new Field("ISCATEGORY", "T", Field.Store.YES, Field.Index.UN_TOKENIZED, Field.TermVector.NO)); + directoryEntry.add(new Field("ISCATEGORY", "T", Field.Store.YES, + Field.Index.UN_TOKENIZED, Field.TermVector.NO)); } docs.add(directoryEntry); @@ -1350,7 +1351,8 @@ public class LuceneIndexerImpl2 extends LuceneBase2 implements LuceneIndexer2 xdoc.add(new Field("PATH", "", Field.Store.YES, Field.Index.TOKENIZED, Field.TermVector.NO)); xdoc.add(new Field("QNAME", "", Field.Store.YES, Field.Index.TOKENIZED, Field.TermVector.NO)); xdoc.add(new Field("ISROOT", "T", Field.Store.NO, Field.Index.UN_TOKENIZED, Field.TermVector.NO)); - xdoc.add(new Field("PRIMARYASSOCTYPEQNAME", ISO9075.getXPathName(ContentModel.ASSOC_CHILDREN), Field.Store.YES, Field.Index.NO, Field.TermVector.NO)); + xdoc.add(new Field("PRIMARYASSOCTYPEQNAME", ISO9075.getXPathName(ContentModel.ASSOC_CHILDREN), + Field.Store.YES, Field.Index.NO, Field.TermVector.NO)); xdoc.add(new Field("ISNODE", "T", Field.Store.NO, Field.Index.UN_TOKENIZED, Field.TermVector.NO)); docs.add(xdoc); @@ -1358,36 +1360,45 @@ public class LuceneIndexerImpl2 extends LuceneBase2 implements LuceneIndexer2 else // not a root node { - xdoc.add(new Field("QNAME", qNameBuffer.toString(),Field.Store.YES, Field.Index.TOKENIZED, Field.TermVector.NO)); + xdoc.add(new Field("QNAME", qNameBuffer.toString(), Field.Store.YES, Field.Index.TOKENIZED, + Field.TermVector.NO)); // xdoc.add(new Field("PARENT", parentBuffer.toString(), true, true, // true)); ChildAssociationRef primary = nodeService.getPrimaryParent(nodeRef); - xdoc.add(new Field("PRIMARYPARENT", primary.getParentRef().toString(), Field.Store.YES, Field.Index.UN_TOKENIZED, Field.TermVector.NO)); - xdoc.add(new Field("PRIMARYASSOCTYPEQNAME", ISO9075.getXPathName(primary.getTypeQName()), Field.Store.YES, Field.Index.NO, Field.TermVector.NO)); + xdoc.add(new Field("PRIMARYPARENT", primary.getParentRef().toString(), Field.Store.YES, + Field.Index.UN_TOKENIZED, Field.TermVector.NO)); + xdoc.add(new Field("PRIMARYASSOCTYPEQNAME", ISO9075.getXPathName(primary.getTypeQName()), Field.Store.YES, + Field.Index.NO, Field.TermVector.NO)); QName typeQName = nodeService.getType(nodeRef); - xdoc.add(new Field("TYPE", ISO9075.getXPathName(typeQName), Field.Store.YES, Field.Index.UN_TOKENIZED, Field.TermVector.NO)); + xdoc.add(new Field("TYPE", ISO9075.getXPathName(typeQName), Field.Store.YES, Field.Index.UN_TOKENIZED, + Field.TermVector.NO)); for (QName classRef : nodeService.getAspects(nodeRef)) { - xdoc.add(new Field("ASPECT", ISO9075.getXPathName(classRef), Field.Store.YES, Field.Index.UN_TOKENIZED, Field.TermVector.NO)); + xdoc.add(new Field("ASPECT", ISO9075.getXPathName(classRef), Field.Store.YES, Field.Index.UN_TOKENIZED, + Field.TermVector.NO)); } xdoc.add(new Field("ISROOT", "F", Field.Store.NO, Field.Index.UN_TOKENIZED, Field.TermVector.NO)); xdoc.add(new Field("ISNODE", "T", Field.Store.NO, Field.Index.UN_TOKENIZED, Field.TermVector.NO)); if (isAtomic || indexAllProperties) { - xdoc.add(new Field("FTSSTATUS", "Clean", Field.Store.NO, Field.Index.UN_TOKENIZED, Field.TermVector.NO)); + xdoc + .add(new Field("FTSSTATUS", "Clean", Field.Store.NO, Field.Index.UN_TOKENIZED, + Field.TermVector.NO)); } else { if (isNew) { - xdoc.add(new Field("FTSSTATUS", "New", Field.Store.NO, Field.Index.UN_TOKENIZED, Field.TermVector.NO)); + xdoc.add(new Field("FTSSTATUS", "New", Field.Store.NO, Field.Index.UN_TOKENIZED, + Field.TermVector.NO)); } else { - xdoc.add(new Field("FTSSTATUS", "Dirty", Field.Store.NO, Field.Index.UN_TOKENIZED, Field.TermVector.NO)); + xdoc.add(new Field("FTSSTATUS", "Dirty", Field.Store.NO, Field.Index.UN_TOKENIZED, + Field.TermVector.NO)); } } @@ -1446,6 +1457,7 @@ public class LuceneIndexerImpl2 extends LuceneBase2 implements LuceneIndexer2 boolean atomic = true; boolean isContent = false; boolean isMultiLingual = false; + boolean isText = false; PropertyDefinition propertyDef = getDictionaryService().getProperty(propertyName); if (propertyDef != null) @@ -1456,6 +1468,7 @@ public class LuceneIndexerImpl2 extends LuceneBase2 implements LuceneIndexer2 atomic = propertyDef.isIndexedAtomically(); isContent = propertyDef.getDataType().getName().equals(DataTypeDefinition.CONTENT); isMultiLingual = propertyDef.getDataType().getName().equals(DataTypeDefinition.MLTEXT); + isText = propertyDef.getDataType().getName().equals(DataTypeDefinition.TEXT); } if (value == null) { @@ -1493,7 +1506,19 @@ public class LuceneIndexerImpl2 extends LuceneBase2 implements LuceneIndexer2 continue; } // store mimetype in index - even if content does not index it is useful - doc.add(new Field(attributeName + ".mimetype", contentData.getMimetype(), Field.Store.NO, Field.Index.UN_TOKENIZED, Field.TermVector.NO)); + // Added szie and locale - size needs to be tokenised correctly + doc.add(new Field(attributeName + ".mimetype", contentData.getMimetype(), Field.Store.NO, + Field.Index.UN_TOKENIZED, Field.TermVector.NO)); + doc.add(new Field(attributeName + ".size", Long.toString(contentData.getSize()), Field.Store.NO, + Field.Index.TOKENIZED, Field.TermVector.NO)); + // TODO: Use the node locale in preferanced to the system locale + Locale locale = contentData.getLocale(); + if (locale == null) + { + locale = Locale.getDefault(); + } + doc.add(new Field(attributeName + ".locale", locale.toString().toLowerCase(), Field.Store.NO, + Field.Index.UN_TOKENIZED, Field.TermVector.NO)); ContentReader reader = contentService.getReader(nodeRef, propertyName); if (reader != null && reader.exists()) @@ -1519,8 +1544,10 @@ public class LuceneIndexerImpl2 extends LuceneBase2 implements LuceneIndexer2 // don't index from the reader readerReady = false; // not indexed: no transformation - //doc.add(new Field("TEXT", NOT_INDEXED_NO_TRANSFORMATION, Field.Store.NO, Field.Index.TOKENIZED, Field.TermVector.NO)); - doc.add(new Field(attributeName, NOT_INDEXED_NO_TRANSFORMATION, Field.Store.NO, Field.Index.TOKENIZED, Field.TermVector.NO)); + // doc.add(new Field("TEXT", NOT_INDEXED_NO_TRANSFORMATION, Field.Store.NO, + // Field.Index.TOKENIZED, Field.TermVector.NO)); + doc.add(new Field(attributeName, NOT_INDEXED_NO_TRANSFORMATION, Field.Store.NO, + Field.Index.TOKENIZED, Field.TermVector.NO)); } else if (indexAtomicPropertiesOnly && transformer.getTransformationTime() > maxAtomicTransformationTime) @@ -1554,8 +1581,10 @@ public class LuceneIndexerImpl2 extends LuceneBase2 implements LuceneIndexer2 readerReady = false; // not indexed: transformation // failed - //doc.add(new Field("TEXT", NOT_INDEXED_TRANSFORMATION_FAILED, Field.Store.NO, Field.Index.TOKENIZED, Field.TermVector.NO)); - doc.add(new Field(attributeName, NOT_INDEXED_TRANSFORMATION_FAILED, Field.Store.NO, Field.Index.TOKENIZED, Field.TermVector.NO)); + // doc.add(new Field("TEXT", NOT_INDEXED_TRANSFORMATION_FAILED, Field.Store.NO, + // Field.Index.TOKENIZED, Field.TermVector.NO)); + doc.add(new Field(attributeName, NOT_INDEXED_TRANSFORMATION_FAILED, Field.Store.NO, + Field.Index.TOKENIZED, Field.TermVector.NO)); } } } @@ -1564,16 +1593,16 @@ public class LuceneIndexerImpl2 extends LuceneBase2 implements LuceneIndexer2 if (readerReady) { InputStreamReader isr = null; - //InputStream ris = reader.getContentInputStream(); - //try - //{ - // isr = new InputStreamReader(ris, "UTF-8"); + // InputStream ris = reader.getContentInputStream(); + // try + // { + // isr = new InputStreamReader(ris, "UTF-8"); // } - //catch (UnsupportedEncodingException e) - // { - // isr = new InputStreamReader(ris); - //} - //doc.add(new Field("TEXT", isr, Field.TermVector.NO)); + // catch (UnsupportedEncodingException e) + // { + // isr = new InputStreamReader(ris); + // } + // doc.add(new Field("TEXT", isr, Field.TermVector.NO)); InputStream ris = reader.getReader().getContentInputStream(); try @@ -1584,10 +1613,11 @@ public class LuceneIndexerImpl2 extends LuceneBase2 implements LuceneIndexer2 { isr = new InputStreamReader(ris); } - - doc.add(new Field("@" - + QName.createQName(propertyName.getNamespaceURI(), ISO9075.encode(propertyName - .getLocalName())), isr, Field.TermVector.NO)); + StringBuilder builder = new StringBuilder(); + builder.append("\u0000").append(locale.toString()).append("\u0000"); + StringReader prefix = new StringReader(builder.toString()); + Reader multiReader = new MultiReader(prefix, isr); + doc.add(new Field(attributeName, multiReader, Field.TermVector.NO)); } } else @@ -1601,17 +1631,19 @@ public class LuceneIndexerImpl2 extends LuceneBase2 implements LuceneIndexer2 + (reader == null ? " --- " : Boolean.toString(reader.exists()))); } // not indexed: content missing - doc.add(new Field("TEXT", NOT_INDEXED_CONTENT_MISSING, Field.Store.NO, Field.Index.TOKENIZED, Field.TermVector.NO)); - doc.add(new Field(attributeName, NOT_INDEXED_CONTENT_MISSING, Field.Store.NO, Field.Index.TOKENIZED, Field.TermVector.NO)); + doc.add(new Field("TEXT", NOT_INDEXED_CONTENT_MISSING, Field.Store.NO, Field.Index.TOKENIZED, + Field.TermVector.NO)); + doc.add(new Field(attributeName, NOT_INDEXED_CONTENT_MISSING, Field.Store.NO, + Field.Index.TOKENIZED, Field.TermVector.NO)); } } else { Field.Store fieldStore = store ? Field.Store.YES : Field.Store.NO; - Field.Index fieldIndex; - if(index ) + Field.Index fieldIndex; + if (index) { - if(tokenise) + if (tokenise) { fieldIndex = Field.Index.TOKENIZED; } @@ -1624,21 +1656,33 @@ public class LuceneIndexerImpl2 extends LuceneBase2 implements LuceneIndexer2 { fieldIndex = Field.Index.NO; } - - if(isMultiLingual) + + if (isMultiLingual) { MLText mlText = DefaultTypeConverter.INSTANCE.convert(MLText.class, value); - for(Locale locale : mlText.getLocales()) + for (Locale locale : mlText.getLocales()) { String localeString = mlText.getValue(locale); - doc.add(new Field(attributeName, "\u0000" + locale.toString() +"\u0000" + localeString, fieldStore, fieldIndex, Field.TermVector.NO)); + StringBuilder builder = new StringBuilder(); + builder.append("\u0000").append(locale.toString()).append("\u0000").append(localeString); + doc.add(new Field(attributeName, builder.toString(), fieldStore, fieldIndex, + Field.TermVector.NO)); } } + else if(isText) + { + // TODO: Use the node locale in preferanced to the system locale + Locale locale = Locale.getDefault(); + StringBuilder builder = new StringBuilder(); + builder.append("\u0000").append(locale.toString()).append("\u0000").append(strValue); + doc.add(new Field(attributeName, builder.toString(), fieldStore, fieldIndex, + Field.TermVector.NO)); + } else { doc.add(new Field(attributeName, strValue, fieldStore, fieldIndex, Field.TermVector.NO)); } - + } } @@ -2000,19 +2044,14 @@ public class LuceneIndexerImpl2 extends LuceneBase2 implements LuceneIndexer2 public boolean getDeleteOnlyNodes() { - if(isFTSUpdate != null) - { - return isFTSUpdate.booleanValue(); - } - else - { - return false; - } + if (isFTSUpdate != null) + { + return isFTSUpdate.booleanValue(); + } + else + { + return false; + } } - - - - - } diff --git a/source/java/org/alfresco/repo/search/impl/lucene/LuceneQueryParser.java b/source/java/org/alfresco/repo/search/impl/lucene/LuceneQueryParser.java index 23d9d3755c..5f22803c09 100644 --- a/source/java/org/alfresco/repo/search/impl/lucene/LuceneQueryParser.java +++ b/source/java/org/alfresco/repo/search/impl/lucene/LuceneQueryParser.java @@ -23,6 +23,7 @@ import java.util.Collections; import java.util.HashSet; import java.util.List; import java.util.Locale; +import java.util.Set; import org.alfresco.i18n.I18NUtil; import org.alfresco.repo.search.SearcherException; @@ -32,6 +33,7 @@ import org.alfresco.service.cmr.dictionary.DataTypeDefinition; import org.alfresco.service.cmr.dictionary.DictionaryService; import org.alfresco.service.cmr.dictionary.PropertyDefinition; import org.alfresco.service.cmr.dictionary.TypeDefinition; +import org.alfresco.service.cmr.search.SearchParameters; import org.alfresco.service.namespace.NamespacePrefixResolver; import org.alfresco.service.namespace.QName; import org.apache.log4j.Logger; @@ -55,7 +57,7 @@ public class LuceneQueryParser extends QueryParser private DictionaryService dictionaryService; - private List locales; + private SearchParameters searchParameters; /** * Parses a query string, returning a {@link org.apache.lucene.search.Query}. @@ -71,7 +73,7 @@ public class LuceneQueryParser extends QueryParser */ static public Query parse(String query, String field, Analyzer analyzer, NamespacePrefixResolver namespacePrefixResolver, DictionaryService dictionaryService, - Operator defaultOperator, List locales) throws ParseException + Operator defaultOperator, SearchParameters searchParameters) throws ParseException { if (s_logger.isDebugEnabled()) { @@ -81,14 +83,19 @@ public class LuceneQueryParser extends QueryParser parser.setDefaultOperator(defaultOperator); parser.setNamespacePrefixResolver(namespacePrefixResolver); parser.setDictionaryService(dictionaryService); - parser.setLocales(locales); + parser.setSearchParameters(searchParameters); // TODO: Apply locale contstraints at the top level if required for the non ML doc types. - return parser.parse(query); + Query result = parser.parse(query); + if (s_logger.isDebugEnabled()) + { + s_logger.debug("Query " + query + " is\n\t" + result.toString()); + } + return result; } - private void setLocales(List locales) + private void setSearchParameters(SearchParameters searchParameters) { - this.locales = locales; + this.searchParameters = searchParameters; } public void setNamespacePrefixResolver(NamespacePrefixResolver namespacePrefixResolver) @@ -141,15 +148,31 @@ public class LuceneQueryParser extends QueryParser } else if (field.equals("TEXT")) { - Collection contentAttributes = dictionaryService.getAllProperties(DataTypeDefinition.CONTENT); - BooleanQuery query = new BooleanQuery(); - for (QName qname : contentAttributes) + Set text = searchParameters.getTextAttributes(); + if ((text == null) || (text.size() == 0)) { - // The super implementation will create phrase queries etc if required - Query part = super.getFieldQuery("@" + qname.toString(), queryText); - query.add(part, Occur.SHOULD); + Collection contentAttributes = dictionaryService + .getAllProperties(DataTypeDefinition.CONTENT); + BooleanQuery query = new BooleanQuery(); + for (QName qname : contentAttributes) + { + // The super implementation will create phrase queries etc if required + Query part = getFieldQuery("@" + qname.toString(), queryText); + query.add(part, Occur.SHOULD); + } + return query; } - return query; + else + { + BooleanQuery query = new BooleanQuery(); + for (String fieldName : text) + { + Query part = getFieldQuery(fieldName, queryText); + query.add(part, Occur.SHOULD); + } + return query; + } + } else if (field.equals("ID")) { @@ -232,6 +255,39 @@ public class LuceneQueryParser extends QueryParser } return booleanQuery; } + else if (field.equals("EXACTTYPE")) + { + TypeDefinition target; + if (queryText.startsWith("{")) + { + target = dictionaryService.getType(QName.createQName(queryText)); + } + else + { + int colonPosition = queryText.indexOf(':'); + if (colonPosition == -1) + { + // use the default namespace + target = dictionaryService.getType(QName.createQName(namespacePrefixResolver + .getNamespaceURI(""), queryText)); + } + else + { + // find the prefix + target = dictionaryService.getType(QName.createQName(namespacePrefixResolver + .getNamespaceURI(queryText.substring(0, colonPosition)), queryText + .substring(colonPosition + 1))); + } + } + if (target == null) + { + throw new SearcherException("Invalid type: " + queryText); + } + QName targetQName = target.getName(); + TermQuery termQuery = new TermQuery(new Term("TYPE", targetQName.toString())); + return termQuery; + + } else if (field.equals("ASPECT")) { AspectDefinition target; @@ -281,100 +337,133 @@ public class LuceneQueryParser extends QueryParser } return booleanQuery; } - else if (field.startsWith("@")) + else if (field.equals("EXACTASPECT")) { - // Expand prefixes - - String expandedFieldName = field; - // Check for any prefixes and expand to the full uri - if (field.charAt(1) != '{') + AspectDefinition target; + if (queryText.startsWith("{")) { - int colonPosition = field.indexOf(':'); + target = dictionaryService.getAspect(QName.createQName(queryText)); + } + else + { + int colonPosition = queryText.indexOf(':'); if (colonPosition == -1) { // use the default namespace - expandedFieldName = "@{" - + namespacePrefixResolver.getNamespaceURI("") + "}" + field.substring(1); + target = dictionaryService.getAspect(QName.createQName(namespacePrefixResolver + .getNamespaceURI(""), queryText)); } else { // find the prefix - expandedFieldName = "@{" - + namespacePrefixResolver.getNamespaceURI(field.substring(1, colonPosition)) + "}" - + field.substring(colonPosition + 1); + target = dictionaryService.getAspect(QName.createQName(namespacePrefixResolver + .getNamespaceURI(queryText.substring(0, colonPosition)), queryText + .substring(colonPosition + 1))); } } - // Mime type - if (expandedFieldName.endsWith(".mimetype")) + QName targetQName = target.getName(); + TermQuery termQuery = new TermQuery(new Term("ASPECT", targetQName.toString())); + + return termQuery; + } + else if (field.startsWith("@")) + { + return attributeQueryBuilder(field, queryText, new FieldQuery()); + } + else if (field.equals("ALL")) + { + Set all = searchParameters.getAllAttributes(); + if ((all == null) || (all.size() == 0)) { - QName propertyQName = QName.createQName(expandedFieldName.substring(1, - expandedFieldName.length() - 9)); - PropertyDefinition propertyDef = dictionaryService.getProperty(propertyQName); - if ((propertyDef != null) - && (propertyDef.getDataType().getName().equals(DataTypeDefinition.CONTENT))) + Collection contentAttributes = dictionaryService.getAllProperties(null); + BooleanQuery query = new BooleanQuery(); + for (QName qname : contentAttributes) { - return super.getFieldQuery(expandedFieldName, queryText); + // The super implementation will create phrase queries etc if required + Query part = getFieldQuery("@" + qname.toString(), queryText); + if (part != null) + { + query.add(part, Occur.SHOULD); + } } - - } - else if (expandedFieldName.endsWith(".size")) - { - QName propertyQName = QName.createQName(expandedFieldName.substring(1, - expandedFieldName.length() - 5)); - PropertyDefinition propertyDef = dictionaryService.getProperty(propertyQName); - if ((propertyDef != null) - && (propertyDef.getDataType().getName().equals(DataTypeDefinition.CONTENT))) - { - return super.getFieldQuery(expandedFieldName, queryText); - } - - } - else if (expandedFieldName.endsWith(".locale")) - { - QName propertyQName = QName.createQName(expandedFieldName.substring(1, - expandedFieldName.length() - 7)); - PropertyDefinition propertyDef = dictionaryService.getProperty(propertyQName); - if ((propertyDef != null) - && (propertyDef.getDataType().getName().equals(DataTypeDefinition.CONTENT))) - { - return super.getFieldQuery(expandedFieldName, queryText); - } - - } - - - // Already in expanded form - - // ML - - QName propertyQName = QName.createQName(expandedFieldName.substring(1)); - PropertyDefinition propertyDef = dictionaryService.getProperty(propertyQName); - if ((propertyDef != null) && (propertyDef.getDataType().getName().equals(DataTypeDefinition.MLTEXT))) - { - // Build a sub query for each locale and or the results together - the analysis will take care of - // cross language matching for each entry - BooleanQuery booleanQuery = new BooleanQuery(); - for (Locale locale : (((locales == null) || (locales.size() == 0)) ? Collections - .singletonList(I18NUtil.getLocale()) : locales)) - { - StringBuilder builder = new StringBuilder(queryText.length() + 10); - builder.append("\u0000").append(locale.toString()).append("\u0000").append(queryText); - Query subQuery = super.getFieldQuery(expandedFieldName, builder.toString()); - booleanQuery.add(subQuery, Occur.SHOULD); - } - return booleanQuery; + return query; } else { - return super.getFieldQuery(expandedFieldName, queryText); + BooleanQuery query = new BooleanQuery(); + for (String fieldName : all) + { + Query part = getFieldQuery(fieldName, queryText); + if (part != null) + { + query.add(part, Occur.SHOULD); + } + } + return query; } } + else if (field.equals("ISNULL")) + { + String qnameString = expandFieldName(queryText); + QName qname = QName.createQName(qnameString); + PropertyDefinition pd = dictionaryService.getProperty(qname); + if (pd != null) + { + QName container = pd.getContainerClass().getName(); + BooleanQuery query = new BooleanQuery(); + Query typeQuery = getFieldQuery("TYPE", container.toString()); + query.add(typeQuery, Occur.MUST); + Query presenceQuery = getWildcardQuery("@" + qname.toString(), "*"); + query.add(presenceQuery, Occur.MUST_NOT); + return query; + } + else + { + return super.getFieldQuery(field, queryText); + } + + } + else if (field.equals("ISNOTNULL")) + { + String qnameString = expandFieldName(queryText); + QName qname = QName.createQName(qnameString); + PropertyDefinition pd = dictionaryService.getProperty(qname); + if (pd != null) + { + QName container = pd.getContainerClass().getName(); + BooleanQuery query = new BooleanQuery(); + Query typeQuery = getFieldQuery("TYPE", container.toString()); + query.add(typeQuery, Occur.MUST); + Query presenceQuery = getWildcardQuery("@" + qname.toString(), "*"); + query.add(presenceQuery, Occur.MUST); + return query; + } + else + { + return super.getFieldQuery(field, queryText); + } + + } + else if (dictionaryService.getDataType(QName.createQName(expandFieldName(field))) != null) + { + Collection contentAttributes = dictionaryService.getAllProperties(dictionaryService.getDataType( + QName.createQName(expandFieldName(field))).getName()); + BooleanQuery query = new BooleanQuery(); + for (QName qname : contentAttributes) + { + // The super implementation will create phrase queries etc if required + Query part = getFieldQuery("@" + qname.toString(), queryText); + query.add(part, Occur.SHOULD); + } + return query; + } else { return super.getFieldQuery(field, queryText); } + } catch (SAXPathException e) { @@ -391,24 +480,7 @@ public class LuceneQueryParser extends QueryParser { if (field.startsWith("@")) { - String fieldName = field; - // Check for any prefixes and expand to the full uri - if (field.charAt(1) != '{') - { - int colonPosition = field.indexOf(':'); - if (colonPosition == -1) - { - // use the default namespace - fieldName = "@{" + namespacePrefixResolver.getNamespaceURI("") + "}" + field.substring(1); - } - else - { - // find the prefix - fieldName = "@{" - + namespacePrefixResolver.getNamespaceURI(field.substring(1, colonPosition)) + "}" - + field.substring(colonPosition + 1); - } - } + String fieldName = expandAttributeFieldName(field); return new RangeQuery(new Term(fieldName, getToken(fieldName, part1)), new Term(fieldName, getToken( fieldName, part2)), inclusive); @@ -420,6 +492,52 @@ public class LuceneQueryParser extends QueryParser } + private String expandAttributeFieldName(String field) + { + String fieldName = field; + // Check for any prefixes and expand to the full uri + if (field.charAt(1) != '{') + { + int colonPosition = field.indexOf(':'); + if (colonPosition == -1) + { + // use the default namespace + fieldName = "@{" + namespacePrefixResolver.getNamespaceURI("") + "}" + field.substring(1); + } + else + { + // find the prefix + fieldName = "@{" + + namespacePrefixResolver.getNamespaceURI(field.substring(1, colonPosition)) + "}" + + field.substring(colonPosition + 1); + } + } + return fieldName; + } + + private String expandFieldName(String field) + { + String fieldName = field; + // Check for any prefixes and expand to the full uri + if (field.charAt(0) != '{') + { + int colonPosition = field.indexOf(':'); + if (colonPosition == -1) + { + // use the default namespace + fieldName = "{" + namespacePrefixResolver.getNamespaceURI("") + "}" + field; + } + else + { + // find the prefix + fieldName = "{" + + namespacePrefixResolver.getNamespaceURI(field.substring(0, colonPosition)) + "}" + + field.substring(colonPosition + 1); + } + } + return fieldName; + } + private String getToken(String field, String value) { TokenStream source = analyzer.tokenStream(field, new StringReader(value)); @@ -457,67 +575,8 @@ public class LuceneQueryParser extends QueryParser { if (field.startsWith("@")) { - // Expand prefixes - - String expandedFieldName = field; - // Check for any prefixes and expand to the full uri - if (field.charAt(1) != '{') - { - int colonPosition = field.indexOf(':'); - if (colonPosition == -1) - { - // use the default namespace - expandedFieldName = "@{" + namespacePrefixResolver.getNamespaceURI("") + "}" + field.substring(1); - } - else - { - // find the prefix - expandedFieldName = "@{" - + namespacePrefixResolver.getNamespaceURI(field.substring(1, colonPosition)) + "}" - + field.substring(colonPosition + 1); - } - } - - // Mime type - if (expandedFieldName.endsWith(".mimetype")) - { - QName propertyQName = QName.createQName(expandedFieldName.substring(1, expandedFieldName.length() - 9)); - PropertyDefinition propertyDef = dictionaryService.getProperty(propertyQName); - if ((propertyDef != null) && (propertyDef.getDataType().getName().equals(DataTypeDefinition.CONTENT))) - { - return super.getPrefixQuery(expandedFieldName, termStr); - } - - } - - // Already in expanded form - - // ML - - QName propertyQName = QName.createQName(expandedFieldName.substring(1)); - PropertyDefinition propertyDef = dictionaryService.getProperty(propertyQName); - if ((propertyDef != null) && (propertyDef.getDataType().getName().equals(DataTypeDefinition.MLTEXT))) - { - // Build a sub query for each locale and or the results together - the analysis will take care of - // cross language matching for each entry - BooleanQuery booleanQuery = new BooleanQuery(); - for (Locale locale : (((locales == null) || (locales.size() == 0)) ? Collections.singletonList(I18NUtil - .getLocale()) : locales)) - { - StringBuilder builder = new StringBuilder(termStr.length() + 10); - builder.append("\u0000").append(locale.toString()).append("\u0000").append(termStr); - Query subQuery = super.getPrefixQuery(expandedFieldName, builder.toString()); - booleanQuery.add(subQuery, Occur.SHOULD); - } - return booleanQuery; - } - else - { - return super.getPrefixQuery(expandedFieldName, termStr); - } - + return attributeQueryBuilder(field, termStr, new PrefixQuery()); } - else if (field.equals("TEXT")) { Collection contentAttributes = dictionaryService.getAllProperties(DataTypeDefinition.CONTENT); @@ -525,15 +584,14 @@ public class LuceneQueryParser extends QueryParser for (QName qname : contentAttributes) { // The super implementation will create phrase queries etc if required - Query part = super.getPrefixQuery("@" + qname.toString(), termStr); + Query part = getPrefixQuery("@" + qname.toString(), termStr); query.add(part, Occur.SHOULD); } return query; - } else { - return super.getFieldQuery(field, termStr); + return super.getPrefixQuery(field, termStr); } } @@ -542,65 +600,7 @@ public class LuceneQueryParser extends QueryParser { if (field.startsWith("@")) { - // Expand prefixes - - String expandedFieldName = field; - // Check for any prefixes and expand to the full uri - if (field.charAt(1) != '{') - { - int colonPosition = field.indexOf(':'); - if (colonPosition == -1) - { - // use the default namespace - expandedFieldName = "@{" + namespacePrefixResolver.getNamespaceURI("") + "}" + field.substring(1); - } - else - { - // find the prefix - expandedFieldName = "@{" - + namespacePrefixResolver.getNamespaceURI(field.substring(1, colonPosition)) + "}" - + field.substring(colonPosition + 1); - } - } - - // Mime type - if (expandedFieldName.endsWith(".mimetype")) - { - QName propertyQName = QName.createQName(expandedFieldName.substring(1, expandedFieldName.length() - 9)); - PropertyDefinition propertyDef = dictionaryService.getProperty(propertyQName); - if ((propertyDef != null) && (propertyDef.getDataType().getName().equals(DataTypeDefinition.CONTENT))) - { - return super.getWildcardQuery(expandedFieldName, termStr); - } - - } - - // Already in expanded form - - // ML - - QName propertyQName = QName.createQName(expandedFieldName.substring(1)); - PropertyDefinition propertyDef = dictionaryService.getProperty(propertyQName); - if ((propertyDef != null) && (propertyDef.getDataType().getName().equals(DataTypeDefinition.MLTEXT))) - { - // Build a sub query for each locale and or the results together - the analysis will take care of - // cross language matching for each entry - BooleanQuery booleanQuery = new BooleanQuery(); - for (Locale locale : (((locales == null) || (locales.size() == 0)) ? Collections.singletonList(I18NUtil - .getLocale()) : locales)) - { - StringBuilder builder = new StringBuilder(termStr.length() + 10); - builder.append("\u0000").append(locale.toString()).append("\u0000").append(termStr); - Query subQuery = super.getWildcardQuery(expandedFieldName, builder.toString()); - booleanQuery.add(subQuery, Occur.SHOULD); - } - return booleanQuery; - } - else - { - return super.getWildcardQuery(expandedFieldName, termStr); - } - + return attributeQueryBuilder(field, termStr, new WildcardQuery()); } else if (field.equals("TEXT")) @@ -610,11 +610,10 @@ public class LuceneQueryParser extends QueryParser for (QName qname : contentAttributes) { // The super implementation will create phrase queries etc if required - Query part = super.getWildcardQuery("@" + qname.toString(), termStr); + Query part = getWildcardQuery("@" + qname.toString(), termStr); query.add(part, Occur.SHOULD); } return query; - } else { @@ -627,65 +626,7 @@ public class LuceneQueryParser extends QueryParser { if (field.startsWith("@")) { - // Expand prefixes - - String expandedFieldName = field; - // Check for any prefixes and expand to the full uri - if (field.charAt(1) != '{') - { - int colonPosition = field.indexOf(':'); - if (colonPosition == -1) - { - // use the default namespace - expandedFieldName = "@{" + namespacePrefixResolver.getNamespaceURI("") + "}" + field.substring(1); - } - else - { - // find the prefix - expandedFieldName = "@{" - + namespacePrefixResolver.getNamespaceURI(field.substring(1, colonPosition)) + "}" - + field.substring(colonPosition + 1); - } - } - - // Mime type - if (expandedFieldName.endsWith(".mimetype")) - { - QName propertyQName = QName.createQName(expandedFieldName.substring(1, expandedFieldName.length() - 9)); - PropertyDefinition propertyDef = dictionaryService.getProperty(propertyQName); - if ((propertyDef != null) && (propertyDef.getDataType().getName().equals(DataTypeDefinition.CONTENT))) - { - return super.getFuzzyQuery(expandedFieldName, termStr, minSimilarity); - } - - } - - // Already in expanded form - - // ML - - QName propertyQName = QName.createQName(expandedFieldName.substring(1)); - PropertyDefinition propertyDef = dictionaryService.getProperty(propertyQName); - if ((propertyDef != null) && (propertyDef.getDataType().getName().equals(DataTypeDefinition.MLTEXT))) - { - // Build a sub query for each locale and or the results together - the analysis will take care of - // cross language matching for each entry - BooleanQuery booleanQuery = new BooleanQuery(); - for (Locale locale : (((locales == null) || (locales.size() == 0)) ? Collections.singletonList(I18NUtil - .getLocale()) : locales)) - { - StringBuilder builder = new StringBuilder(termStr.length() + 10); - builder.append("\u0000").append(locale.toString()).append("\u0000").append(termStr); - Query subQuery = super.getFuzzyQuery(expandedFieldName, builder.toString(), minSimilarity); - booleanQuery.add(subQuery, Occur.SHOULD); - } - return booleanQuery; - } - else - { - return super.getFuzzyQuery(expandedFieldName, termStr, minSimilarity); - } - + return attributeQueryBuilder(field, termStr, new FuzzyQuery(minSimilarity)); } else if (field.equals("TEXT")) @@ -695,11 +636,10 @@ public class LuceneQueryParser extends QueryParser for (QName qname : contentAttributes) { // The super implementation will create phrase queries etc if required - Query part = super.getFuzzyQuery("@" + qname.toString(), termStr, minSimilarity); + Query part = getFuzzyQuery("@" + qname.toString(), termStr, minSimilarity); query.add(part, Occur.SHOULD); } return query; - } else { @@ -712,4 +652,155 @@ public class LuceneQueryParser extends QueryParser this.dictionaryService = dictionaryService; } + public Query getSuperFieldQuery(String field, String queryText) throws ParseException + { + return super.getFieldQuery(field, queryText); + } + + public Query getSuperFuzzyQuery(String field, String termStr, float minSimilarity) throws ParseException + { + return super.getFuzzyQuery(field, termStr, minSimilarity); + } + + public Query getSuperPrefixQuery(String field, String termStr) throws ParseException + { + return super.getPrefixQuery(field, termStr); + } + + public Query getSuperWildcardQuery(String field, String termStr) throws ParseException + { + return super.getWildcardQuery(field, termStr); + } + + interface SubQuery + { + Query getQuery(String field, String queryText) throws ParseException; + } + + class FieldQuery implements SubQuery + { + public Query getQuery(String field, String queryText) throws ParseException + { + return getSuperFieldQuery(field, queryText); + } + } + + class FuzzyQuery implements SubQuery + { + float minSimilarity; + + FuzzyQuery(float minSimilarity) + { + this.minSimilarity = minSimilarity; + } + + public Query getQuery(String field, String termStr) throws ParseException + { + return getSuperFuzzyQuery(field, termStr, minSimilarity); + } + } + + class PrefixQuery implements SubQuery + { + public Query getQuery(String field, String termStr) throws ParseException + { + return getSuperPrefixQuery(field, termStr); + } + } + + class WildcardQuery implements SubQuery + { + public Query getQuery(String field, String termStr) throws ParseException + { + return getSuperWildcardQuery(field, termStr); + } + } + + private Query attributeQueryBuilder(String field, String queryText, SubQuery subQueryBuilder) throws ParseException + { + // Expand prefixes + + String expandedFieldName = expandAttributeFieldName(field); + + // Mime type + if (expandedFieldName.endsWith(".mimetype")) + { + QName propertyQName = QName.createQName(expandedFieldName.substring(1, expandedFieldName.length() - 9)); + PropertyDefinition propertyDef = dictionaryService.getProperty(propertyQName); + if ((propertyDef != null) && (propertyDef.getDataType().getName().equals(DataTypeDefinition.CONTENT))) + { + return subQueryBuilder.getQuery(expandedFieldName, queryText); + } + + } + else if (expandedFieldName.endsWith(".size")) + { + QName propertyQName = QName.createQName(expandedFieldName.substring(1, expandedFieldName.length() - 5)); + PropertyDefinition propertyDef = dictionaryService.getProperty(propertyQName); + if ((propertyDef != null) && (propertyDef.getDataType().getName().equals(DataTypeDefinition.CONTENT))) + { + return subQueryBuilder.getQuery(expandedFieldName, queryText); + } + + } + else if (expandedFieldName.endsWith(".locale")) + { + QName propertyQName = QName.createQName(expandedFieldName.substring(1, expandedFieldName.length() - 7)); + PropertyDefinition propertyDef = dictionaryService.getProperty(propertyQName); + if ((propertyDef != null) && (propertyDef.getDataType().getName().equals(DataTypeDefinition.CONTENT))) + { + return subQueryBuilder.getQuery(expandedFieldName, queryText); + } + + } + + // Already in expanded form + + // ML + + QName propertyQName = QName.createQName(expandedFieldName.substring(1)); + PropertyDefinition propertyDef = dictionaryService.getProperty(propertyQName); + if ((propertyDef != null) && (propertyDef.getDataType().getName().equals(DataTypeDefinition.MLTEXT))) + { + // Build a sub query for each locale and or the results together - the analysis will take care of + // cross language matching for each entry + BooleanQuery booleanQuery = new BooleanQuery(); + List locales = searchParameters.getLocales(); + for (Locale locale : (((locales == null) || (locales.size() == 0)) ? Collections.singletonList(I18NUtil + .getLocale()) : locales)) + { + StringBuilder builder = new StringBuilder(queryText.length() + 10); + builder.append("\u0000").append(locale.toString()).append("\u0000").append(queryText); + Query subQuery = subQueryBuilder.getQuery(expandedFieldName, builder.toString()); + booleanQuery.add(subQuery, Occur.SHOULD); + } + return booleanQuery; + } + // Content + else if ((propertyDef != null) && (propertyDef.getDataType().getName().equals(DataTypeDefinition.CONTENT))) + { + // Build a sub query for each locale and or the results together - + // - add an explicit condition for the locale + BooleanQuery booleanQuery = new BooleanQuery(); + List locales = searchParameters.getLocales(); + for (Locale locale : (((locales == null) || (locales.size() == 0)) ? Collections.singletonList(I18NUtil + .getLocale()) : locales)) + { + BooleanQuery subQuery = new BooleanQuery(); + Query contentQuery = subQueryBuilder.getQuery(expandedFieldName, queryText); + subQuery.add(contentQuery, Occur.MUST); + StringBuilder builder = new StringBuilder(); + builder.append(expandedFieldName).append(".locale"); + Query localeQuery = getFieldQuery(builder.toString(), locale.toString()); + subQuery.add(localeQuery, Occur.MUST); + booleanQuery.add(subQuery, Occur.SHOULD); + } + return booleanQuery; + } + else + { + return subQueryBuilder.getQuery(expandedFieldName, queryText); + } + } + } diff --git a/source/java/org/alfresco/repo/search/impl/lucene/LuceneSearcherImpl2.java b/source/java/org/alfresco/repo/search/impl/lucene/LuceneSearcherImpl2.java index 6941bffed5..3d4c7be65e 100644 --- a/source/java/org/alfresco/repo/search/impl/lucene/LuceneSearcherImpl2.java +++ b/source/java/org/alfresco/repo/search/impl/lucene/LuceneSearcherImpl2.java @@ -215,7 +215,7 @@ public class LuceneSearcherImpl2 extends LuceneBase2 implements LuceneSearcher2 } Query query = LuceneQueryParser.parse(parameterisedQueryString, DEFAULT_FIELD, new LuceneAnalyser( - dictionaryService, searchParameters.getMlAnalaysisMode() == null ? getLuceneConfig().getDefaultMLSearchAnalysisMode() : searchParameters.getMlAnalaysisMode()), namespacePrefixResolver, dictionaryService, defaultOperator, searchParameters.getLocales()); + dictionaryService, searchParameters.getMlAnalaysisMode() == null ? getLuceneConfig().getDefaultMLSearchAnalysisMode() : searchParameters.getMlAnalaysisMode()), namespacePrefixResolver, dictionaryService, defaultOperator, searchParameters); ClosingIndexSearcher searcher = getSearcher(indexer); if (searcher == null) { diff --git a/source/java/org/alfresco/repo/search/impl/lucene/LuceneTest2.java b/source/java/org/alfresco/repo/search/impl/lucene/LuceneTest2.java index b2734713f6..858cf16be1 100644 --- a/source/java/org/alfresco/repo/search/impl/lucene/LuceneTest2.java +++ b/source/java/org/alfresco/repo/search/impl/lucene/LuceneTest2.java @@ -42,6 +42,7 @@ import org.alfresco.repo.dictionary.DictionaryNamespaceComponent; import org.alfresco.repo.dictionary.M2Model; import org.alfresco.repo.dictionary.NamespaceDAOImpl; import org.alfresco.repo.node.BaseNodeServiceTest; +import org.alfresco.repo.search.MLAnalysisMode; import org.alfresco.repo.search.QueryParameterDefImpl; import org.alfresco.repo.search.QueryRegisterComponent; import org.alfresco.repo.search.impl.lucene.fts.FullTextSearchIndexer; @@ -1895,6 +1896,16 @@ public class LuceneTest2 extends TestCase + testType.toPrefixString(namespacePrefixResolver) + "\"", null, null); assertEquals(1, results.length()); results.close(); + + results = searcher.query(rootNodeRef.getStoreRef(), "lucene", "EXACTTYPE:\"" + testType.toString() + "\"", null, + null); + assertEquals(1, results.length()); + results.close(); + + results = searcher.query(rootNodeRef.getStoreRef(), "lucene", "EXACTTYPE:\"" + + testType.toPrefixString(namespacePrefixResolver) + "\"", null, null); + assertEquals(1, results.length()); + results.close(); results = searcher.query(rootNodeRef.getStoreRef(), "lucene", "TYPE:\"" + testSuperType.toString() + "\"", null, null); @@ -1905,6 +1916,16 @@ public class LuceneTest2 extends TestCase + testSuperType.toPrefixString(namespacePrefixResolver) + "\"", null, null); assertEquals(13, results.length()); results.close(); + + results = searcher.query(rootNodeRef.getStoreRef(), "lucene", "EXACTTYPE:\"" + testSuperType.toString() + "\"", + null, null); + assertEquals(12, results.length()); + results.close(); + + results = searcher.query(rootNodeRef.getStoreRef(), "lucene", "EXACTTYPE:\"" + + testSuperType.toPrefixString(namespacePrefixResolver) + "\"", null, null); + assertEquals(12, results.length()); + results.close(); results = searcher.query(rootNodeRef.getStoreRef(), "lucene", "ASPECT:\"" + testAspect.toString() + "\"", null, null); @@ -1926,6 +1947,28 @@ public class LuceneTest2 extends TestCase assertEquals(1, results.length()); results.close(); + // Test for AR-384 + + results = searcher.query(rootNodeRef.getStoreRef(), "lucene", "TEXT:fox AND TYPE:\"" + + ContentModel.PROP_CONTENT.toString() + "\"", null, null); + assertEquals(1, results.length()); + results.close(); + + results = searcher.query(rootNodeRef.getStoreRef(), "lucene", "TEXT:fo AND TYPE:\"" + + ContentModel.PROP_CONTENT.toString() + "\"", null, null); + assertEquals(0, results.length()); + results.close(); + + // Test stop words are equivalent + + results = searcher.query(rootNodeRef.getStoreRef(), "lucene", "TEXT:\"over the lazy\"", null, null); + assertEquals(1, results.length()); + results.close(); + + results = searcher.query(rootNodeRef.getStoreRef(), "lucene", "TEXT:\"over a lazy\"", null, null); + assertEquals(1, results.length()); + results.close(); + // FTS test results = searcher.query(rootNodeRef.getStoreRef(), "lucene", "TEXT:\"fox\"", null, null); @@ -1943,42 +1986,167 @@ public class LuceneTest2 extends TestCase assertEquals(1, results.length()); results.close(); + results = searcher.query(rootNodeRef.getStoreRef(), "lucene", "@" + + LuceneQueryParser.escape(ContentModel.PROP_CONTENT.toString()) + ".locale:\"en_GB\"", null, null); + assertEquals(1, results.length()); + results.close(); + + results = searcher.query(rootNodeRef.getStoreRef(), "lucene", "@" + + LuceneQueryParser.escape(ContentModel.PROP_CONTENT.toString()) + ".locale:en_*", null, null); + assertEquals(1, results.length()); + results.close(); + + results = searcher.query(rootNodeRef.getStoreRef(), "lucene", "@" + + LuceneQueryParser.escape(ContentModel.PROP_CONTENT.toString()) + ".locale:e*_GB", null, null); + assertEquals(1, results.length()); + results.close(); + + results = searcher.query(rootNodeRef.getStoreRef(), "lucene", "@" + + LuceneQueryParser.escape(ContentModel.PROP_CONTENT.toString()) + ".size:\"90\"", null, null); + assertEquals(1, results.length()); + results.close(); + QName queryQName = QName.createQName("alf:test1", namespacePrefixResolver); results = searcher.query(rootNodeRef.getStoreRef(), queryQName, null); assertEquals(1, results.length()); results.close(); - // Direct ML tests - - QName mlQName = QName.createQName(TEST_NAMESPACE, "ml"); + // Configuration of TEXT SearchParameters sp = new SearchParameters(); sp.addStore(rootNodeRef.getStoreRef()); sp.setLanguage("lucene"); + sp.setQuery("@" + LuceneQueryParser.escape(ContentModel.PROP_CONTENT.toString()) + ":\"fox\""); + results = searcher.query(sp); + assertEquals(1, results.length()); + results.close(); + + sp = new SearchParameters(); + sp.addStore(rootNodeRef.getStoreRef()); + sp.setLanguage("lucene"); + sp.setQuery("TEXT:\"fox\""); + results = searcher.query(sp); + assertEquals(1, results.length()); + results.close(); + + sp = new SearchParameters(); + sp.addStore(rootNodeRef.getStoreRef()); + sp.setLanguage("lucene"); + sp.setQuery("TEXT:\"fox\""); + sp.addTextAttribute("@"+ContentModel.PROP_NAME.toString()); + results = searcher.query(sp); + assertEquals(0, results.length()); + results.close(); + + sp.addTextAttribute("@"+ContentModel.PROP_CONTENT.toString()); + results = searcher.query(sp); + assertEquals(1, results.length()); + results.close(); + + // ALL and its configuration + + sp = new SearchParameters(); + sp.addStore(rootNodeRef.getStoreRef()); + sp.setLanguage("lucene"); + sp.setQuery("ALL:\"fox\""); + results = searcher.query(sp); + assertEquals(1, results.length()); + results.close(); + + sp = new SearchParameters(); + sp.addStore(rootNodeRef.getStoreRef()); + sp.setLanguage("lucene"); + sp.setQuery("ALL:\"fox\""); + sp.addAllAttribute("@"+ContentModel.PROP_NAME.toString()); + results = searcher.query(sp); + assertEquals(0, results.length()); + results.close(); + + sp.addAllAttribute("@"+ContentModel.PROP_CONTENT.toString()); + results = searcher.query(sp); + assertEquals(1, results.length()); + results.close(); + + sp = new SearchParameters(); + sp.addStore(rootNodeRef.getStoreRef()); + sp.setLanguage("lucene"); + sp.setQuery("ALL:\"5.6\""); + results = searcher.query(sp); + assertEquals(1, results.length()); + results.close(); + + // Search by data type + + sp = new SearchParameters(); + sp.addStore(rootNodeRef.getStoreRef()); + sp.setLanguage("lucene"); + sp.setQuery("d\\:double:\"5.6\""); + results = searcher.query(sp); + assertEquals(1, results.length()); + results.close(); + + sp = new SearchParameters(); + sp.addStore(rootNodeRef.getStoreRef()); + sp.setLanguage("lucene"); + sp.setQuery("d\\:content:\"fox\""); + results = searcher.query(sp); + assertEquals(1, results.length()); + results.close(); + + // Direct ML tests + + QName mlQName = QName.createQName(TEST_NAMESPACE, "ml"); + + sp = new SearchParameters(); + sp.addStore(rootNodeRef.getStoreRef()); + sp.setLanguage("lucene"); + sp.setMlAnalaysisMode(MLAnalysisMode.ALL_ONLY); sp.setQuery("@" + LuceneQueryParser.escape(mlQName.toString()) + ":banana"); results = searcher.query(sp); assertEquals(1, results.length()); results.close(); - sp.addLocale(Locale.UK); - results = searcher.query(sp); - assertEquals(1, results.length()); - results.close(); - sp = new SearchParameters(); sp.addStore(rootNodeRef.getStoreRef()); sp.setLanguage("lucene"); sp.setQuery("@" + LuceneQueryParser.escape(mlQName.toString()) + ":banana"); - sp.addLocale(Locale.ENGLISH); results = searcher.query(sp); assertEquals(1, results.length()); results.close(); - + + sp = new SearchParameters(); + sp.addStore(rootNodeRef.getStoreRef()); + sp.setLanguage("lucene"); + sp.setQuery("@" + LuceneQueryParser.escape(mlQName.toString()) + ":banana"); + sp.addLocale(Locale.UK); + results = searcher.query(sp); + assertEquals(1, results.length()); + results.close(); + + sp = new SearchParameters(); + sp.addStore(rootNodeRef.getStoreRef()); + sp.setLanguage("lucene"); + sp.setQuery("@" + LuceneQueryParser.escape(mlQName.toString()) + ":banana"); + sp.setMlAnalaysisMode(MLAnalysisMode.LOCALE_AND_ALL_CONTAINING_LOCALES); + sp.addLocale(Locale.UK); + results = searcher.query(sp); + assertEquals(1, results.length()); + results.close(); + + sp = new SearchParameters(); + sp.addStore(rootNodeRef.getStoreRef()); + sp.setLanguage("lucene"); + sp.setQuery("@" + LuceneQueryParser.escape(mlQName.toString()) + ":banana"); + sp.addLocale(Locale.ENGLISH); + results = searcher.query(sp); + assertEquals(1, results.length()); + results.close(); + sp = new SearchParameters(); sp.addStore(rootNodeRef.getStoreRef()); sp.setLanguage("lucene"); sp.setQuery("@" + LuceneQueryParser.escape(mlQName.toString()) + ":banane"); - sp.addLocale(Locale.FRENCH); + sp.addLocale(Locale.FRENCH); results = searcher.query(sp); assertEquals(1, results.length()); results.close(); @@ -1987,79 +2155,98 @@ public class LuceneTest2 extends TestCase sp.addStore(rootNodeRef.getStoreRef()); sp.setLanguage("lucene"); sp.setQuery("@" + LuceneQueryParser.escape(mlQName.toString()) + ":香蕉"); - sp.addLocale(Locale.CHINESE); + sp.addLocale(Locale.CHINESE); results = searcher.query(sp); assertEquals(1, results.length()); results.close(); - + sp = new SearchParameters(); sp.addStore(rootNodeRef.getStoreRef()); sp.setLanguage("lucene"); sp.setQuery("@" + LuceneQueryParser.escape(mlQName.toString()) + ":banaan"); - sp.addLocale(new Locale("nl")); + sp.addLocale(new Locale("nl")); results = searcher.query(sp); assertEquals(1, results.length()); results.close(); - + sp = new SearchParameters(); sp.addStore(rootNodeRef.getStoreRef()); sp.setLanguage("lucene"); sp.setQuery("@" + LuceneQueryParser.escape(mlQName.toString()) + ":banane"); - sp.addLocale(Locale.GERMAN); + sp.addLocale(Locale.GERMAN); results = searcher.query(sp); assertEquals(1, results.length()); results.close(); - + sp = new SearchParameters(); sp.addStore(rootNodeRef.getStoreRef()); sp.setLanguage("lucene"); sp.setQuery("@" + LuceneQueryParser.escape(mlQName.toString()) + ":μπανάνα"); - sp.addLocale(new Locale("el")); + sp.addLocale(new Locale("el")); results = searcher.query(sp); assertEquals(1, results.length()); results.close(); - + sp = new SearchParameters(); sp.addStore(rootNodeRef.getStoreRef()); sp.setLanguage("lucene"); sp.setQuery("@" + LuceneQueryParser.escape(mlQName.toString()) + ":banana"); - sp.addLocale(Locale.ITALIAN); + sp.addLocale(Locale.ITALIAN); results = searcher.query(sp); assertEquals(1, results.length()); results.close(); - + sp = new SearchParameters(); sp.addStore(rootNodeRef.getStoreRef()); sp.setLanguage("lucene"); sp.setQuery("@" + LuceneQueryParser.escape(mlQName.toString()) + ":バナナ"); - sp.addLocale(new Locale("ja")); + sp.addLocale(new Locale("ja")); results = searcher.query(sp); assertEquals(1, results.length()); results.close(); - + sp = new SearchParameters(); sp.addStore(rootNodeRef.getStoreRef()); sp.setLanguage("lucene"); sp.setQuery("@" + LuceneQueryParser.escape(mlQName.toString()) + ":바나나"); - sp.addLocale(new Locale("ko")); + sp.addLocale(new Locale("ko")); results = searcher.query(sp); assertEquals(1, results.length()); results.close(); - + sp = new SearchParameters(); sp.addStore(rootNodeRef.getStoreRef()); sp.setLanguage("lucene"); sp.setQuery("@" + LuceneQueryParser.escape(mlQName.toString()) + ":banana"); - sp.addLocale(new Locale("pt")); + sp.addLocale(new Locale("pt")); results = searcher.query(sp); assertEquals(1, results.length()); results.close(); - + sp = new SearchParameters(); sp.addStore(rootNodeRef.getStoreRef()); sp.setLanguage("lucene"); sp.setQuery("@" + LuceneQueryParser.escape(mlQName.toString()) + ":банан"); - sp.addLocale(new Locale("ru")); + sp.addLocale(new Locale("ru")); + results = searcher.query(sp); + assertEquals(1, results.length()); + results.close(); + + sp = new SearchParameters(); + sp.addStore(rootNodeRef.getStoreRef()); + sp.setLanguage("lucene"); + sp.setQuery("@" + LuceneQueryParser.escape(mlQName.toString()) + ":plátano"); + sp.addLocale(new Locale("es")); + results = searcher.query(sp); + assertEquals(1, results.length()); + results.close(); + + // Test ISNULL/ISNOTNULL + + sp = new SearchParameters(); + sp.addStore(rootNodeRef.getStoreRef()); + sp.setLanguage("lucene"); + sp.setQuery("ISNULL:\"" + QName.createQName(TEST_NAMESPACE, "null").toString() + "\""); results = searcher.query(sp); assertEquals(1, results.length()); results.close(); @@ -2067,18 +2254,35 @@ public class LuceneTest2 extends TestCase sp = new SearchParameters(); sp.addStore(rootNodeRef.getStoreRef()); sp.setLanguage("lucene"); - sp.setQuery("@" + LuceneQueryParser.escape(mlQName.toString()) + ":plátano"); - sp.addLocale(new Locale("es")); + sp.setQuery("ISNULL:\"" + QName.createQName(TEST_NAMESPACE, "path-ista").toString() + "\""); + results = searcher.query(sp); + assertEquals(0, results.length()); + results.close(); + + sp = new SearchParameters(); + sp.addStore(rootNodeRef.getStoreRef()); + sp.setLanguage("lucene"); + sp.setQuery("ISNOTNULL:\"" + QName.createQName(TEST_NAMESPACE, "null").toString() + "\""); + results = searcher.query(sp); + assertEquals(0, results.length()); + results.close(); + + sp = new SearchParameters(); + sp.addStore(rootNodeRef.getStoreRef()); + sp.setLanguage("lucene"); + sp.setQuery("ISNOTNULL:\"" + QName.createQName(TEST_NAMESPACE, "path-ista").toString() + "\""); results = searcher.query(sp); assertEquals(1, results.length()); results.close(); - // Test non field queries + + // Test non field queries + results = searcher.query(rootNodeRef.getStoreRef(), "lucene", "TEXT:fox", null, null); assertEquals(1, results.length()); results.close(); - + results = searcher.query(rootNodeRef.getStoreRef(), "lucene", "TEXT:fo*", null, null); assertEquals(1, results.length()); results.close(); @@ -2090,50 +2294,50 @@ public class LuceneTest2 extends TestCase results = searcher.query(rootNodeRef.getStoreRef(), "lucene", "TEXT:*ox", null, null); assertEquals(1, results.length()); results.close(); - - results = searcher.query(rootNodeRef.getStoreRef(), "lucene", "@" - + LuceneQueryParser.escape(ContentModel.PROP_CONTENT.toString()) +":fox", null, null); - assertEquals(1, results.length()); - results.close(); - - results = searcher.query(rootNodeRef.getStoreRef(), "lucene", "@" - + LuceneQueryParser.escape(ContentModel.PROP_CONTENT.toString()) +":fo*", null, null); - assertEquals(1, results.length()); - results.close(); - - results = searcher.query(rootNodeRef.getStoreRef(), "lucene", "@" - + LuceneQueryParser.escape(ContentModel.PROP_CONTENT.toString()) +":f*x", null, null); - assertEquals(1, results.length()); - results.close(); - - results = searcher.query(rootNodeRef.getStoreRef(), "lucene", "@" - + LuceneQueryParser.escape(ContentModel.PROP_CONTENT.toString()) +":*ox", null, null); - assertEquals(1, results.length()); - results.close(); - - results = searcher.query(rootNodeRef.getStoreRef(), "lucene", "@" - + LuceneQueryParser.escape(ContentModel.PROP_CONTENT.toPrefixString(namespacePrefixResolver)) +":fox", null, null); - assertEquals(1, results.length()); - results.close(); - - results = searcher.query(rootNodeRef.getStoreRef(), "lucene", "@" - + LuceneQueryParser.escape(ContentModel.PROP_CONTENT.toPrefixString(namespacePrefixResolver)) +":fo*", null, null); - assertEquals(1, results.length()); - results.close(); - - results = searcher.query(rootNodeRef.getStoreRef(), "lucene", "@" - + LuceneQueryParser.escape(ContentModel.PROP_CONTENT.toPrefixString(namespacePrefixResolver)) +":f*x", null, null); - assertEquals(1, results.length()); - results.close(); - - results = searcher.query(rootNodeRef.getStoreRef(), "lucene", "@" - + LuceneQueryParser.escape(ContentModel.PROP_CONTENT.toPrefixString(namespacePrefixResolver)) +":*ox", null, null); - assertEquals(1, results.length()); - results.close(); - - - + results = searcher.query(rootNodeRef.getStoreRef(), "lucene", "@" + + LuceneQueryParser.escape(ContentModel.PROP_CONTENT.toString()) + ":fox", null, null); + assertEquals(1, results.length()); + results.close(); + + results = searcher.query(rootNodeRef.getStoreRef(), "lucene", "@" + + LuceneQueryParser.escape(ContentModel.PROP_CONTENT.toString()) + ":fo*", null, null); + assertEquals(1, results.length()); + results.close(); + + results = searcher.query(rootNodeRef.getStoreRef(), "lucene", "@" + + LuceneQueryParser.escape(ContentModel.PROP_CONTENT.toString()) + ":f*x", null, null); + assertEquals(1, results.length()); + results.close(); + + results = searcher.query(rootNodeRef.getStoreRef(), "lucene", "@" + + LuceneQueryParser.escape(ContentModel.PROP_CONTENT.toString()) + ":*ox", null, null); + assertEquals(1, results.length()); + results.close(); + + results = searcher.query(rootNodeRef.getStoreRef(), "lucene", "@" + + LuceneQueryParser.escape(ContentModel.PROP_CONTENT.toPrefixString(namespacePrefixResolver)) + ":fox", + null, null); + assertEquals(1, results.length()); + results.close(); + + results = searcher.query(rootNodeRef.getStoreRef(), "lucene", "@" + + LuceneQueryParser.escape(ContentModel.PROP_CONTENT.toPrefixString(namespacePrefixResolver)) + ":fo*", + null, null); + assertEquals(1, results.length()); + results.close(); + + results = searcher.query(rootNodeRef.getStoreRef(), "lucene", "@" + + LuceneQueryParser.escape(ContentModel.PROP_CONTENT.toPrefixString(namespacePrefixResolver)) + ":f*x", + null, null); + assertEquals(1, results.length()); + results.close(); + + results = searcher.query(rootNodeRef.getStoreRef(), "lucene", "@" + + LuceneQueryParser.escape(ContentModel.PROP_CONTENT.toPrefixString(namespacePrefixResolver)) + ":*ox", + null, null); + assertEquals(1, results.length()); + results.close(); // Parameters @@ -3661,6 +3865,7 @@ public class LuceneTest2 extends TestCase DynamicNamespacePrefixResolver nspr = new DynamicNamespacePrefixResolver(null); nspr.registerNamespace(NamespaceService.ALFRESCO_PREFIX, NamespaceService.ALFRESCO_URI); nspr.registerNamespace(NamespaceService.CONTENT_MODEL_PREFIX, NamespaceService.CONTENT_MODEL_1_0_URI); + nspr.registerNamespace(NamespaceService.DICTIONARY_MODEL_PREFIX, NamespaceService.DICTIONARY_MODEL_1_0_URI); nspr.registerNamespace("namespace", "namespace"); nspr.registerNamespace("test", TEST_NAMESPACE); nspr.registerNamespace(NamespaceService.DEFAULT_PREFIX, defaultURI); diff --git a/source/java/org/alfresco/repo/search/impl/lucene/LuceneTest_model.xml b/source/java/org/alfresco/repo/search/impl/lucene/LuceneTest_model.xml index 5145995dfd..1a99a6a459 100644 --- a/source/java/org/alfresco/repo/search/impl/lucene/LuceneTest_model.xml +++ b/source/java/org/alfresco/repo/search/impl/lucene/LuceneTest_model.xml @@ -238,6 +238,26 @@ true true + + + d:text + false + false + + true + true + true + + + + d:path + false + false + + true + true + true + diff --git a/source/java/org/alfresco/repo/search/impl/lucene/MultiFieldQueryParser.java b/source/java/org/alfresco/repo/search/impl/lucene/MultiFieldQueryParser.java index 51430152c4..e606e67f0b 100644 --- a/source/java/org/alfresco/repo/search/impl/lucene/MultiFieldQueryParser.java +++ b/source/java/org/alfresco/repo/search/impl/lucene/MultiFieldQueryParser.java @@ -16,6 +16,8 @@ package org.alfresco.repo.search.impl.lucene; * limitations under the License. */ +import java.util.Vector; + import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.BooleanQuery; @@ -23,8 +25,6 @@ import org.apache.lucene.search.MultiPhraseQuery; import org.apache.lucene.search.PhraseQuery; import org.apache.lucene.search.Query; -import java.util.Vector; - /** * A QueryParser which constructs queries to search multiple fields. * diff --git a/source/java/org/alfresco/repo/search/impl/lucene/MultiReader.java b/source/java/org/alfresco/repo/search/impl/lucene/MultiReader.java new file mode 100644 index 0000000000..8a753a388c --- /dev/null +++ b/source/java/org/alfresco/repo/search/impl/lucene/MultiReader.java @@ -0,0 +1,93 @@ +/** + * + */ +package org.alfresco.repo.search.impl.lucene; + +import java.io.IOException; +import java.io.Reader; + +class MultiReader extends Reader +{ + Reader first; + + Reader second; + + boolean firstActive = true; + + MultiReader(Reader first, Reader second) + { + this.first = first; + this.second = second; + } + + @Override + public void close() throws IOException + { + IOException ioe = null; + try + { + first.close(); + } + catch (IOException e) + { + ioe = e; + } + + second.close(); + if (ioe != null) + { + throw ioe; + } + + } + + @Override + public int read(char[] cbuf, int off, int len) throws IOException + { + synchronized (lock) + { + if ((off < 0) || (off > cbuf.length) || (len < 0) || ((off + len) > cbuf.length) || ((off + len) < 0)) + { + throw new IndexOutOfBoundsException(); + } + else if (len == 0) + { + return 0; + } + for(int i = 0; i < len; i++) + { + int c; + if(firstActive) + { + c = first.read(); + if(c == -1) + { + firstActive = false; + c = second.read(); + } + } + else + { + c = second.read(); + } + if(c == -1) + { + if(i == 0) + { + return -1; + } + else + { + return i; + } + } + else + { + cbuf[off+i] = (char)c; + } + } + return len; + } + } + +} \ No newline at end of file diff --git a/source/java/org/alfresco/repo/search/impl/lucene/MultiReaderTest.java b/source/java/org/alfresco/repo/search/impl/lucene/MultiReaderTest.java new file mode 100644 index 0000000000..cfe9f31ffb --- /dev/null +++ b/source/java/org/alfresco/repo/search/impl/lucene/MultiReaderTest.java @@ -0,0 +1,85 @@ +package org.alfresco.repo.search.impl.lucene; + +import java.io.IOException; +import java.io.Reader; +import java.io.StringReader; + +import junit.framework.TestCase; + +public class MultiReaderTest extends TestCase +{ + + public MultiReaderTest() + { + super(); + } + + public MultiReaderTest(String arg0) + { + super(arg0); + } + + public void testMultiReader_single() throws IOException + { + String first = "my first string"; + String second = "another little string"; + + StringReader one = new StringReader(first); + StringReader two = new StringReader(second); + + Reader multiReader = new MultiReader(one, two); + StringBuilder builder = new StringBuilder(); + int c; + while ((c = multiReader.read()) != -1) + { + builder.append((char) c); + } + assertEquals(builder.toString(), first + second); + + } + + public void testMultiReader_bits() throws IOException + { + String first = "my first string"; + String second = "another little string"; + + StringReader one = new StringReader(first); + StringReader two = new StringReader(second); + + Reader multiReader = new MultiReader(one, two); + StringBuilder builder = new StringBuilder(); + for (int chunk = 1; chunk < 100; chunk++) + { + char[] c = new char[chunk]; + int i = 0; + while (i != -1) + { + i = multiReader.read(c); + for (int j = 0; j < i; j++) + { + builder.append(c[j]); + } + } + assertEquals(builder.toString(), first + second); + } + } + + public void testSkip() throws IOException + { + String first = "my first string"; + String second = "another little string"; + + StringReader one = new StringReader(first); + StringReader two = new StringReader(second); + + Reader multiReader = new MultiReader(one, two); + + multiReader.skip(3); + String all = first + second; + assertEquals((char)multiReader.read(), all.charAt(3)); + + multiReader.skip(15); + assertEquals((char)multiReader.read(), all.charAt(3+15+1)); + } + +} diff --git a/source/java/org/alfresco/repo/search/impl/lucene/QueryParser.java b/source/java/org/alfresco/repo/search/impl/lucene/QueryParser.java index 96d64e77d8..97b77b2bf4 100644 --- a/source/java/org/alfresco/repo/search/impl/lucene/QueryParser.java +++ b/source/java/org/alfresco/repo/search/impl/lucene/QueryParser.java @@ -1,14 +1,31 @@ /* Generated By:JavaCC: Do not edit this line. QueryParser.java */ package org.alfresco.repo.search.impl.lucene; +import java.io.IOException; +import java.io.StringReader; +import java.text.DateFormat; +import java.util.ArrayList; +import java.util.Calendar; +import java.util.Date; +import java.util.List; +import java.util.Locale; import java.util.Vector; -import java.io.*; -import java.text.*; -import java.util.*; + +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.document.DateField; +import org.apache.lucene.document.DateTools; import org.apache.lucene.index.Term; -import org.apache.lucene.analysis.*; -import org.apache.lucene.document.*; -import org.apache.lucene.search.*; +import org.apache.lucene.search.BooleanClause; +import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.search.FuzzyQuery; +import org.apache.lucene.search.MultiPhraseQuery; +import org.apache.lucene.search.PhraseQuery; +import org.apache.lucene.search.PrefixQuery; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.RangeQuery; +import org.apache.lucene.search.TermQuery; +import org.apache.lucene.search.WildcardQuery; import org.apache.lucene.util.Parameter; /** diff --git a/source/java/org/alfresco/repo/search/impl/lucene/QueryParserTokenManager.java b/source/java/org/alfresco/repo/search/impl/lucene/QueryParserTokenManager.java index 5421c13889..6ded453f28 100644 --- a/source/java/org/alfresco/repo/search/impl/lucene/QueryParserTokenManager.java +++ b/source/java/org/alfresco/repo/search/impl/lucene/QueryParserTokenManager.java @@ -1,14 +1,5 @@ /* Generated By:JavaCC: Do not edit this line. QueryParserTokenManager.java */ package org.alfresco.repo.search.impl.lucene; -import java.util.Vector; -import java.io.*; -import java.text.*; -import java.util.*; -import org.apache.lucene.index.Term; -import org.apache.lucene.analysis.*; -import org.apache.lucene.document.*; -import org.apache.lucene.search.*; -import org.apache.lucene.util.Parameter; public class QueryParserTokenManager implements QueryParserConstants { diff --git a/source/java/org/alfresco/repo/search/impl/lucene/analysis/AlfrescoStandardFilter.java b/source/java/org/alfresco/repo/search/impl/lucene/analysis/AlfrescoStandardFilter.java index 6006e4442c..4700ab226d 100644 --- a/source/java/org/alfresco/repo/search/impl/lucene/analysis/AlfrescoStandardFilter.java +++ b/source/java/org/alfresco/repo/search/impl/lucene/analysis/AlfrescoStandardFilter.java @@ -18,7 +18,6 @@ package org.alfresco.repo.search.impl.lucene.analysis; import java.util.LinkedList; import java.util.Queue; -import java.util.Stack; import java.util.StringTokenizer; import org.apache.lucene.analysis.TokenFilter; diff --git a/source/java/org/alfresco/repo/search/impl/lucene/analysis/FloatTokenFilter.java b/source/java/org/alfresco/repo/search/impl/lucene/analysis/FloatTokenFilter.java index 31d2e907f3..b69c935595 100644 --- a/source/java/org/alfresco/repo/search/impl/lucene/analysis/FloatTokenFilter.java +++ b/source/java/org/alfresco/repo/search/impl/lucene/analysis/FloatTokenFilter.java @@ -22,7 +22,6 @@ import java.io.Reader; import org.apache.lucene.analysis.Token; import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.WhitespaceTokenizer; -import org.apache.lucene.analysis.standard.StandardTokenizer; /** * Simple tokeniser for floats. diff --git a/source/java/org/alfresco/repo/search/impl/lucene/analysis/IntegerTokenFilter.java b/source/java/org/alfresco/repo/search/impl/lucene/analysis/IntegerTokenFilter.java index f1bf3213af..dee14d60fa 100644 --- a/source/java/org/alfresco/repo/search/impl/lucene/analysis/IntegerTokenFilter.java +++ b/source/java/org/alfresco/repo/search/impl/lucene/analysis/IntegerTokenFilter.java @@ -22,7 +22,6 @@ import java.io.Reader; import org.apache.lucene.analysis.Token; import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.WhitespaceTokenizer; -import org.apache.lucene.analysis.standard.StandardTokenizer; /** * Simple tokeniser for integers. diff --git a/source/java/org/alfresco/repo/search/impl/lucene/analysis/LongTokenFilter.java b/source/java/org/alfresco/repo/search/impl/lucene/analysis/LongTokenFilter.java index 5f11883170..f2a64fb939 100644 --- a/source/java/org/alfresco/repo/search/impl/lucene/analysis/LongTokenFilter.java +++ b/source/java/org/alfresco/repo/search/impl/lucene/analysis/LongTokenFilter.java @@ -19,11 +19,9 @@ package org.alfresco.repo.search.impl.lucene.analysis; import java.io.IOException; import java.io.Reader; -import org.alfresco.error.AlfrescoRuntimeException; import org.apache.lucene.analysis.Token; import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.WhitespaceTokenizer; -import org.apache.lucene.analysis.standard.StandardTokenizer; /** * Simple tokeniser for longs. diff --git a/source/java/org/alfresco/repo/search/impl/lucene/analysis/LowerCaseVerbatimAnalyser.java b/source/java/org/alfresco/repo/search/impl/lucene/analysis/LowerCaseVerbatimAnalyser.java new file mode 100644 index 0000000000..c15b62f670 --- /dev/null +++ b/source/java/org/alfresco/repo/search/impl/lucene/analysis/LowerCaseVerbatimAnalyser.java @@ -0,0 +1,9 @@ +package org.alfresco.repo.search.impl.lucene.analysis; + +public class LowerCaseVerbatimAnalyser extends VerbatimAnalyser +{ + public LowerCaseVerbatimAnalyser() + { + super(true); + } +} diff --git a/source/java/org/alfresco/repo/search/impl/lucene/analysis/MLAnalayser.java b/source/java/org/alfresco/repo/search/impl/lucene/analysis/MLAnalayser.java index 94e871f682..6006b9bce5 100644 --- a/source/java/org/alfresco/repo/search/impl/lucene/analysis/MLAnalayser.java +++ b/source/java/org/alfresco/repo/search/impl/lucene/analysis/MLAnalayser.java @@ -7,7 +7,7 @@ import java.util.HashMap; import java.util.Locale; import org.alfresco.i18n.I18NUtil; -import org.alfresco.repo.search.impl.lucene.LuceneQueryParser; +import org.alfresco.repo.search.MLAnalysisMode; import org.alfresco.service.cmr.dictionary.DataTypeDefinition; import org.alfresco.service.cmr.dictionary.DictionaryService; import org.apache.log4j.Logger; @@ -21,10 +21,13 @@ public class MLAnalayser extends Analyzer private DictionaryService dictionaryService; private HashMap analysers = new HashMap(); + + private MLAnalysisMode mlAnalaysisMode; - public MLAnalayser(DictionaryService dictionaryService) + public MLAnalayser(DictionaryService dictionaryService, MLAnalysisMode mlAnalaysisMode) { this.dictionaryService = dictionaryService; + this.mlAnalaysisMode = mlAnalaysisMode; } @Override @@ -107,7 +110,7 @@ public class MLAnalayser extends Analyzer } Locale locale = new Locale(language, country, varient); // leave the reader where it is .... - return new MLTokenDuplicator(getAnalyser(locale).tokenStream(fieldName, breader), locale, breader); + return new MLTokenDuplicator(getAnalyser(locale).tokenStream(fieldName, breader), locale, breader, mlAnalaysisMode); } else { diff --git a/source/java/org/alfresco/repo/search/impl/lucene/analysis/MLTokenDuplicator.java b/source/java/org/alfresco/repo/search/impl/lucene/analysis/MLTokenDuplicator.java index df4f867158..2c7f4038ee 100644 --- a/source/java/org/alfresco/repo/search/impl/lucene/analysis/MLTokenDuplicator.java +++ b/source/java/org/alfresco/repo/search/impl/lucene/analysis/MLTokenDuplicator.java @@ -3,39 +3,36 @@ package org.alfresco.repo.search.impl.lucene.analysis; import java.io.IOException; import java.io.Reader; import java.util.ArrayList; +import java.util.HashSet; import java.util.Iterator; import java.util.Locale; +import org.alfresco.repo.search.MLAnalysisMode; +import org.apache.log4j.Logger; import org.apache.lucene.analysis.Token; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.Tokenizer; /** - * Create duplicate tokens for multilingual varients - * - * The forms are - * - * Tokens: - * Token - all languages - * {fr}Token - if a language is specified - * {fr_CA}Token - if a language and country is specified - * {fr_CA_Varient}Token - for all three + * Create duplicate tokens for multilingual varients The forms are Tokens: Token - all languages {fr}Token - if a + * language is specified {fr_CA}Token - if a language and country is specified {fr_CA_Varient}Token - for all three * {fr__Varient}Token - for a language varient with no country * * @author andyh - * */ public class MLTokenDuplicator extends Tokenizer { + private static Logger s_logger = Logger.getLogger(MLTokenDuplicator.class); + TokenStream source; Locale locale; Iterator it; - ArrayList prefixes; + HashSet prefixes; - public MLTokenDuplicator(TokenStream source, Locale locale, Reader reader) + public MLTokenDuplicator(TokenStream source, Locale locale, Reader reader, MLAnalysisMode mlAnalaysisMode) { super(reader); this.source = source; @@ -45,27 +42,92 @@ public class MLTokenDuplicator extends Tokenizer boolean c = locale.getCountry().length() != 0; boolean v = locale.getVariant().length() != 0; - prefixes = new ArrayList(4); - prefixes.add(""); + prefixes = new HashSet(4); + if (mlAnalaysisMode.includesAll()) + { + prefixes.add(""); + } - if (l) + if (mlAnalaysisMode.includesExact()) { StringBuffer result = new StringBuffer(); - result.append("{").append(locale.getLanguage()).append("}"); + result.append("{").append(locale.toString()).append("}"); prefixes.add(result.toString()); - result.deleteCharAt(result.length()-1); - - if (c || (l && v)) + } + + if (mlAnalaysisMode.includesContaining()) + { + if (v) { - result.append('_').append(locale.getCountry()).append("}"); + Locale noVarient = new Locale(locale.getLanguage(), locale.getCountry(), ""); + StringBuffer result = new StringBuffer(); + result.append("{").append(noVarient.toString()).append("}"); prefixes.add(result.toString()); - result.deleteCharAt(result.length()-1); - } - if (v && (l || c)) - { - result.append('_').append(locale.getVariant()).append("}"); + + Locale noCountry = new Locale(locale.getLanguage(), "", ""); + result = new StringBuffer(); + result.append("{").append(noCountry.toString()).append("}"); prefixes.add(result.toString()); } + if (c) + { + Locale noCountry = new Locale(locale.getLanguage(), "", ""); + StringBuffer result = new StringBuffer(); + result.append("{").append(noCountry.toString()).append("}"); + prefixes.add(result.toString()); + } + } + + if (mlAnalaysisMode.includesContained()) + { + // varients have not contained + if (!v) + { + if (!c) + { + if (!l) + { + // All + for (Locale toAdd : Locale.getAvailableLocales()) + { + StringBuffer result = new StringBuffer(); + result.append("{").append(toAdd.toString()).append("}"); + prefixes.add(result.toString()); + } + } + else + { + // All that match language + for (Locale toAdd : Locale.getAvailableLocales()) + { + if (locale.getLanguage().equals(toAdd.getLanguage())) + { + StringBuffer result = new StringBuffer(); + result.append("{").append(toAdd.toString()).append("}"); + prefixes.add(result.toString()); + } + } + } + } + else + { + // All that match language and country + for (Locale toAdd : Locale.getAvailableLocales()) + { + if ((locale.getLanguage().equals(toAdd.getLanguage())) + && (locale.getCountry().equals(toAdd.getCountry()))) + { + StringBuffer result = new StringBuffer(); + result.append("{").append(toAdd.toString()).append("}"); + prefixes.add(result.toString()); + } + } + } + } + } + if(s_logger.isDebugEnabled()) + { + s_logger.debug("Locale "+ locale +" using "+mlAnalaysisMode+" is "+prefixes); } } @@ -81,7 +143,7 @@ public class MLTokenDuplicator extends Tokenizer { return null; } - if(it.hasNext()) + if (it.hasNext()) { return it.next(); } @@ -99,12 +161,12 @@ public class MLTokenDuplicator extends Tokenizer { return null; } - + ArrayList tokens = new ArrayList(prefixes.size()); - for(String prefix : prefixes) + for (String prefix : prefixes) { - Token newToken = new Token(prefix+token.termText(), token.startOffset(), token.endOffset(), token.type()); - if(tokens.size() == 0) + Token newToken = new Token(prefix + token.termText(), token.startOffset(), token.endOffset(), token.type()); + if (tokens.size() == 0) { newToken.setPositionIncrement(token.getPositionIncrement()); } @@ -118,5 +180,4 @@ public class MLTokenDuplicator extends Tokenizer } - } diff --git a/source/java/org/alfresco/repo/search/impl/lucene/analysis/PathTokenFilterTest.java b/source/java/org/alfresco/repo/search/impl/lucene/analysis/PathTokenFilterTest.java index 27c64246d1..52f15e158f 100644 --- a/source/java/org/alfresco/repo/search/impl/lucene/analysis/PathTokenFilterTest.java +++ b/source/java/org/alfresco/repo/search/impl/lucene/analysis/PathTokenFilterTest.java @@ -19,11 +19,11 @@ package org.alfresco.repo.search.impl.lucene.analysis; import java.io.IOException; import java.io.StringReader; +import junit.framework.TestCase; + import org.apache.lucene.analysis.Token; import org.apache.lucene.analysis.TokenStream; -import junit.framework.TestCase; - public class PathTokenFilterTest extends TestCase { diff --git a/source/java/org/alfresco/repo/search/impl/lucene/analysis/VerbatimAnalyser.java b/source/java/org/alfresco/repo/search/impl/lucene/analysis/VerbatimAnalyser.java index 817113327f..d39fec920f 100644 --- a/source/java/org/alfresco/repo/search/impl/lucene/analysis/VerbatimAnalyser.java +++ b/source/java/org/alfresco/repo/search/impl/lucene/analysis/VerbatimAnalyser.java @@ -5,18 +5,23 @@ import java.io.Reader; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.TokenStream; -public class VerbatimAnalyser - -extends Analyzer +public class VerbatimAnalyser extends Analyzer { - + boolean lowerCase; + public VerbatimAnalyser() + { + lowerCase = false; + } + + public VerbatimAnalyser(boolean lowerCase) { super(); + this.lowerCase = lowerCase; } public TokenStream tokenStream(String fieldName, Reader reader) { - return new VerbatimTokenFilter(reader); + return new VerbatimTokenFilter(reader, lowerCase); } } diff --git a/source/java/org/alfresco/repo/search/impl/lucene/analysis/VerbatimTokenFilter.java b/source/java/org/alfresco/repo/search/impl/lucene/analysis/VerbatimTokenFilter.java index b77cee9498..579a98bf3b 100644 --- a/source/java/org/alfresco/repo/search/impl/lucene/analysis/VerbatimTokenFilter.java +++ b/source/java/org/alfresco/repo/search/impl/lucene/analysis/VerbatimTokenFilter.java @@ -10,9 +10,12 @@ public class VerbatimTokenFilter extends Tokenizer { boolean readInput = true; - VerbatimTokenFilter(Reader in) + boolean lowerCase; + + VerbatimTokenFilter(Reader in, boolean lowerCase) { super(in); + this.lowerCase = lowerCase; } @Override @@ -31,6 +34,10 @@ public class VerbatimTokenFilter extends Tokenizer } String token = buffer.toString(); + if(lowerCase) + { + token = token.toLowerCase(); + } return new Token(token, 0, token.length() - 1, "VERBATIM"); } else diff --git a/source/java/org/alfresco/service/cmr/search/SearchParameters.java b/source/java/org/alfresco/service/cmr/search/SearchParameters.java index 164810f7ae..bc82852544 100644 --- a/source/java/org/alfresco/service/cmr/search/SearchParameters.java +++ b/source/java/org/alfresco/service/cmr/search/SearchParameters.java @@ -18,8 +18,10 @@ package org.alfresco.service.cmr.search; import java.util.ArrayList; import java.util.Collections; +import java.util.HashSet; import java.util.List; import java.util.Locale; +import java.util.Set; import org.alfresco.repo.search.MLAnalysisMode; import org.alfresco.service.cmr.repository.Path; @@ -94,6 +96,10 @@ public class SearchParameters extends SearchStatement private PermissionEvaluationMode permissionEvaluation = PermissionEvaluationMode.EAGER; private int limit = DEFAULT_LIMIT; + + private HashSet allAttributes = new HashSet(); + + private HashSet textAttributes = new HashSet(); /** * Default constructor @@ -351,6 +357,52 @@ public class SearchParameters extends SearchStatement { return Collections.unmodifiableList(locales); } + + + + + /** + * Add a locale to include for multi-lingual text searches. + * If non are set, the default is to use the user's locale. + * + * @param locale + */ + public void addTextAttribute(String attribute) + { + textAttributes.add(attribute); + } + + /** + * Get the locales used for multi-lingual text searches. + * + * @return + */ + public Set getTextAttributes() + { + return Collections.unmodifiableSet(textAttributes); + } + + /** + * Add a locale to include for multi-lingual text searches. + * If non are set, the default is to use the user's locale. + * + * @param locale + */ + public void addAllAttribute(String attribute) + { + allAttributes.add(attribute); + } + + /** + * Get the locales used for multi-lingual text searches. + * + * @return + */ + public Set getAllAttributes() + { + return Collections.unmodifiableSet(allAttributes); + } + /** * A helper class for sort definition. Encapsulated using the lucene sortType, field name and a flag for