Split out the lucene query parser from the core repository.

git-svn-id: https://svn.alfresco.com/repos/alfresco-enterprise/alfresco/HEAD/root@20978 c4b6b30b-aa2e-2d43-bbcb-ca4b014f7261
This commit is contained in:
Andrew Hind
2010-07-07 12:31:10 +00:00
parent 8105f39e33
commit 839f421e5f
28 changed files with 4 additions and 8534 deletions

View File

@@ -80,7 +80,7 @@ public class DirectProperty extends AbstractSimpleProperty
// find best field match // find best field match
HashSet<String> allowableLocales = new HashSet<String>(); HashSet<String> allowableLocales = new HashSet<String>();
MLAnalysisMode analysisMode = lqp.getConfig().getDefaultMLSearchAnalysisMode(); MLAnalysisMode analysisMode = lqp.getDefaultSearchMLAnalysisMode();
for (Locale l : MLAnalysisMode.getLocales(analysisMode, sortLocale, false)) for (Locale l : MLAnalysisMode.getLocales(analysisMode, sortLocale, false))
{ {
allowableLocales.add(l.toString()); allowableLocales.add(l.toString());

View File

@@ -286,7 +286,7 @@ public class ADMLuceneSearcherImpl extends AbstractLuceneBase implements LuceneS
ClosingIndexSearcher searcher = getSearcher(indexer); ClosingIndexSearcher searcher = getSearcher(indexer);
Query query = LuceneQueryParser.parse(parameterisedQueryString, searchParameters.getDefaultFieldName(), new LuceneAnalyser(getDictionaryService(), Query query = LuceneQueryParser.parse(parameterisedQueryString, searchParameters.getDefaultFieldName(), new LuceneAnalyser(getDictionaryService(),
searchParameters.getMlAnalaysisMode() == null ? getLuceneConfig().getDefaultMLSearchAnalysisMode() : searchParameters.getMlAnalaysisMode()), searchParameters.getMlAnalaysisMode() == null ? getLuceneConfig().getDefaultMLSearchAnalysisMode() : searchParameters.getMlAnalaysisMode()),
namespacePrefixResolver, getDictionaryService(), tenantService, defaultOperator, searchParameters, getLuceneConfig(), searcher.getIndexReader()); namespacePrefixResolver, getDictionaryService(), tenantService, defaultOperator, searchParameters, getLuceneConfig().getDefaultMLSearchAnalysisMode(), searcher.getIndexReader());
if (s_logger.isDebugEnabled()) if (s_logger.isDebugEnabled())
{ {
s_logger.debug("Query is " + query.toString()); s_logger.debug("Query is " + query.toString());

View File

@@ -1,45 +0,0 @@
/*
* Copyright (C) 2005-2010 Alfresco Software Limited.
*
* This file is part of Alfresco
*
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
*/
package org.alfresco.repo.search.impl.lucene;
/**
* Functions that can be applied to lucene fields
*
* Currently upper and lower that perform a case insensitive match for untokenised fields.
* (If the field is tokenised the match should already be case insensitive.)
*
* @author andyh
*
*/
public enum LuceneFunction
{
/**
* Match as if the field was converted to upper case.
*/
UPPER,
/**
* Match as if the field was converted to lower case.
*/
LOWER,
/**
* A normal lucene field match.
*/
FIELD;
}

View File

@@ -1,515 +0,0 @@
/*
* Copyright (C) 2005-2010 Alfresco Software Limited.
*
* This file is part of Alfresco
*
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
*/
package org.alfresco.repo.search.impl.lucene;
import java.util.ArrayList;
import org.alfresco.repo.search.impl.lucene.analysis.PathTokenFilter;
import org.alfresco.repo.search.impl.lucene.query.AbsoluteStructuredFieldPosition;
import org.alfresco.repo.search.impl.lucene.query.DescendantAndSelfStructuredFieldPosition;
import org.alfresco.repo.search.impl.lucene.query.PathQuery;
import org.alfresco.repo.search.impl.lucene.query.RelativeStructuredFieldPosition;
import org.alfresco.repo.search.impl.lucene.query.SelfAxisStructuredFieldPosition;
import org.alfresco.repo.search.impl.lucene.query.StructuredFieldPosition;
import org.alfresco.service.cmr.dictionary.DictionaryService;
import org.alfresco.service.namespace.NamespacePrefixResolver;
import org.saxpath.Axis;
import org.saxpath.Operator;
import org.saxpath.SAXPathException;
import org.saxpath.XPathHandler;
/**
* XPATH handler for lucene - currently only does basic paths
*
* @author andyh
*
*/
public class LuceneXPathHandler implements XPathHandler
{
private PathQuery query;
private boolean isAbsolutePath = true;
int absolutePosition = 0;
private NamespacePrefixResolver namespacePrefixResolver;
private DictionaryService dictionaryService;
/**
*
*/
public LuceneXPathHandler()
{
super();
}
/**
* Get the resulting query
* @return - the query
*/
public PathQuery getQuery()
{
return this.query;
}
public void endAbsoluteLocationPath() throws SAXPathException
{
// No action
}
public void endAdditiveExpr(int op) throws SAXPathException
{
switch (op)
{
case Operator.NO_OP:
break;
case Operator.ADD:
case Operator.SUBTRACT:
throw new UnsupportedOperationException();
default:
throw new UnsupportedOperationException("Unknown operation " + op);
}
}
public void endAllNodeStep() throws SAXPathException
{
// Nothing to do
// Todo: Predicates
}
public void endAndExpr(boolean create) throws SAXPathException
{
if (create)
{
throw new UnsupportedOperationException();
}
}
public void endCommentNodeStep() throws SAXPathException
{
// TODO Auto-generated method stub
throw new UnsupportedOperationException();
}
public void endEqualityExpr(int op) throws SAXPathException
{
switch (op)
{
case Operator.NO_OP:
break;
case Operator.EQUALS:
case Operator.NOT_EQUALS:
throw new UnsupportedOperationException();
default:
throw new UnsupportedOperationException("Unknown operation " + op);
}
}
public void endFilterExpr() throws SAXPathException
{
// TODO Auto-generated method stub
throw new UnsupportedOperationException();
}
public void endFunction() throws SAXPathException
{
// TODO Auto-generated method stub
throw new UnsupportedOperationException();
}
public void endMultiplicativeExpr(int op) throws SAXPathException
{
switch (op)
{
case Operator.NO_OP:
break;
case Operator.MULTIPLY:
case Operator.DIV:
case Operator.MOD:
throw new UnsupportedOperationException();
default:
throw new UnsupportedOperationException("Unknown operation " + op);
}
}
public void endNameStep() throws SAXPathException
{
// Do nothing at the moment
// Could have repdicates
}
public void endOrExpr(boolean create) throws SAXPathException
{
if (create)
{
throw new UnsupportedOperationException();
}
}
public void endPathExpr() throws SAXPathException
{
// Already built
}
public void endPredicate() throws SAXPathException
{
// TODO Auto-generated method stub
throw new UnsupportedOperationException();
}
public void endProcessingInstructionNodeStep() throws SAXPathException
{
// TODO Auto-generated method stub
throw new UnsupportedOperationException();
}
public void endRelationalExpr(int op) throws SAXPathException
{
switch (op)
{
case Operator.NO_OP:
break;
case Operator.GREATER_THAN:
case Operator.GREATER_THAN_EQUALS:
case Operator.LESS_THAN:
case Operator.LESS_THAN_EQUALS:
throw new UnsupportedOperationException();
default:
throw new UnsupportedOperationException("Unknown operation " + op);
}
}
public void endRelativeLocationPath() throws SAXPathException
{
// No action
}
public void endTextNodeStep() throws SAXPathException
{
// TODO Auto-generated method stub
throw new UnsupportedOperationException();
}
public void endUnaryExpr(int op) throws SAXPathException
{
switch (op)
{
case Operator.NO_OP:
break;
case Operator.NEGATIVE:
throw new UnsupportedOperationException();
default:
throw new UnsupportedOperationException("Unknown operation " + op);
}
}
public void endUnionExpr(boolean create) throws SAXPathException
{
if (create)
{
throw new UnsupportedOperationException();
}
}
public void endXPath() throws SAXPathException
{
// Do nothing at the moment
}
public void literal(String arg0) throws SAXPathException
{
// TODO Auto-generated method stub
throw new UnsupportedOperationException();
}
public void number(double arg0) throws SAXPathException
{
// TODO Auto-generated method stub
throw new UnsupportedOperationException();
}
public void number(int arg0) throws SAXPathException
{
// TODO Auto-generated method stub
throw new UnsupportedOperationException();
}
public void startAbsoluteLocationPath() throws SAXPathException
{
if (!isAbsolutePath)
{
throw new IllegalStateException();
}
}
public void startAdditiveExpr() throws SAXPathException
{
// Do nothing at the moment
}
public void startAllNodeStep(int axis) throws SAXPathException
{
switch (axis)
{
case Axis.CHILD:
if (isAbsolutePath)
{
// addAbsolute(null, null);
// We can always do relative stuff
addRelative(null, null);
}
else
{
addRelative(null, null);
}
break;
case Axis.DESCENDANT_OR_SELF:
query.appendQuery(getArrayList(new DescendantAndSelfStructuredFieldPosition(), new DescendantAndSelfStructuredFieldPosition()));
break;
case Axis.SELF:
query.appendQuery(getArrayList(new SelfAxisStructuredFieldPosition(), new SelfAxisStructuredFieldPosition()));
break;
default:
throw new UnsupportedOperationException();
}
}
private ArrayList<StructuredFieldPosition> getArrayList(StructuredFieldPosition one, StructuredFieldPosition two)
{
ArrayList<StructuredFieldPosition> answer = new ArrayList<StructuredFieldPosition>(2);
answer.add(one);
answer.add(two);
return answer;
}
public void startAndExpr() throws SAXPathException
{
// Do nothing
}
public void startCommentNodeStep(int arg0) throws SAXPathException
{
// TODO Auto-generated method stub
throw new UnsupportedOperationException();
}
public void startEqualityExpr() throws SAXPathException
{
// Do nothing
}
public void startFilterExpr() throws SAXPathException
{
// TODO Auto-generated method stub
throw new UnsupportedOperationException();
}
public void startFunction(String arg0, String arg1) throws SAXPathException
{
// TODO Auto-generated method stub
throw new UnsupportedOperationException();
}
public void startMultiplicativeExpr() throws SAXPathException
{
// Do nothing at the moment
}
public void startNameStep(int axis, String nameSpace, String localName) throws SAXPathException
{
switch (axis)
{
case Axis.CHILD:
if (isAbsolutePath)
{
// addAbsolute(nameSpace, localName);
// we can always do relative stuff
addRelative(nameSpace, localName);
}
else
{
addRelative(nameSpace, localName);
}
break;
default:
throw new UnsupportedOperationException();
}
}
@SuppressWarnings("unused")
private void addAbsolute(String nameSpace, String localName)
{
ArrayList<StructuredFieldPosition> answer = new ArrayList<StructuredFieldPosition>(2);
// TODO: Resolve name space
absolutePosition++;
if ((nameSpace == null) || (nameSpace.length() == 0))
{
if(localName.equals("*"))
{
answer.add(new RelativeStructuredFieldPosition("*"));
}
else if (namespacePrefixResolver.getNamespaceURI("") == null)
{
answer.add(new AbsoluteStructuredFieldPosition(PathTokenFilter.NO_NS_TOKEN_TEXT, absolutePosition));
}
else
{
answer.add(new AbsoluteStructuredFieldPosition(namespacePrefixResolver.getNamespaceURI(""), absolutePosition));
}
}
else
{
answer.add(new AbsoluteStructuredFieldPosition(namespacePrefixResolver.getNamespaceURI(nameSpace), absolutePosition));
}
absolutePosition++;
if ((localName == null) || (localName.length() == 0))
{
answer.add(new AbsoluteStructuredFieldPosition("*", absolutePosition));
}
else
{
answer.add(new AbsoluteStructuredFieldPosition(localName, absolutePosition));
}
query.appendQuery(answer);
}
private void addRelative(String nameSpace, String localName)
{
ArrayList<StructuredFieldPosition> answer = new ArrayList<StructuredFieldPosition>(2);
if ((nameSpace == null) || (nameSpace.length() == 0))
{
if(localName.equals("*"))
{
answer.add(new RelativeStructuredFieldPosition("*"));
}
else if (namespacePrefixResolver.getNamespaceURI("") == null)
{
answer.add(new RelativeStructuredFieldPosition(PathTokenFilter.NO_NS_TOKEN_TEXT));
}
else if (namespacePrefixResolver.getNamespaceURI("").equals(""))
{
answer.add(new RelativeStructuredFieldPosition(PathTokenFilter.NO_NS_TOKEN_TEXT));
}
else
{
answer.add(new RelativeStructuredFieldPosition(namespacePrefixResolver.getNamespaceURI("")));
}
}
else
{
answer.add(new RelativeStructuredFieldPosition(namespacePrefixResolver.getNamespaceURI(nameSpace)));
}
if ((localName == null) || (localName.length() == 0))
{
answer.add(new RelativeStructuredFieldPosition("*"));
}
else
{
answer.add(new RelativeStructuredFieldPosition(localName));
}
query.appendQuery(answer);
}
public void startOrExpr() throws SAXPathException
{
// Do nothing at the moment
}
public void startPathExpr() throws SAXPathException
{
// Just need one!
}
public void startPredicate() throws SAXPathException
{
// TODO Auto-generated method stub
throw new UnsupportedOperationException();
}
public void startProcessingInstructionNodeStep(int arg0, String arg1) throws SAXPathException
{
// TODO Auto-generated method stub
throw new UnsupportedOperationException();
}
public void startRelationalExpr() throws SAXPathException
{
// Do nothing at the moment
}
public void startRelativeLocationPath() throws SAXPathException
{
isAbsolutePath = false;
}
public void startTextNodeStep(int arg0) throws SAXPathException
{
// TODO Auto-generated method stub
throw new UnsupportedOperationException();
}
public void startUnaryExpr() throws SAXPathException
{
// Do nothing for now
}
public void startUnionExpr() throws SAXPathException
{
// Do nothing at the moment
}
public void startXPath() throws SAXPathException
{
query = new PathQuery(dictionaryService);
}
public void variableReference(String uri, String localName) throws SAXPathException
{
}
/**
* Set the service to resolve name space prefixes
* @param namespacePrefixResolver
*/
public void setNamespacePrefixResolver(NamespacePrefixResolver namespacePrefixResolver)
{
this.namespacePrefixResolver = namespacePrefixResolver;
}
/**
* Set the dictionary service.
* @param dictionaryService
*/
public void setDictionaryService(DictionaryService dictionaryService)
{
this.dictionaryService = dictionaryService;
}
}

View File

@@ -1,50 +0,0 @@
/*
* Copyright (C) 2005-2010 Alfresco Software Limited.
*
* This file is part of Alfresco
*
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
*/
package org.alfresco.repo.search.impl.lucene.index;
import java.io.IOException;
import org.apache.lucene.index.TermDocs;
/**
*
* @author andyh
*
*/
public interface CachingIndexReader
{
public String getId(int n) throws IOException;
public String getPathLinkId(int n) throws IOException;
public String[] getIds(int n) throws IOException;
public String getIsCategory(int n) throws IOException;
public String getPath(int n) throws IOException;
public String[] getParents(int n) throws IOException;
public String[] getLinkAspects(int n) throws IOException;
public String getType(int n) throws IOException;
public TermDocs getNodeDocs() throws IOException;
}

View File

@@ -1,108 +0,0 @@
/*
* Copyright (C) 2005-2010 Alfresco Software Limited.
*
* This file is part of Alfresco
*
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
*/
package org.alfresco.repo.search.impl.lucene.query;
import java.io.IOException;
/**
* This class patches a term at a specified location.
*
* @author andyh
*/
public class AbsoluteStructuredFieldPosition extends AbstractStructuredFieldPosition
{
int requiredPosition;
/**
* Search for a term at the specified position.
*/
public AbsoluteStructuredFieldPosition(String termText, int position)
{
super(termText, true, true);
this.requiredPosition = position;
}
/*
* (non-Javadoc)
*
* @see org.alfresco.lucene.extensions.StructuredFieldPosition#matches(int,
* org.apache.lucene.index.TermPositions)
*/
public int matches(int start, int end, int offset) throws IOException
{
if (offset >= requiredPosition)
{
return -1;
}
if (getCachingTermPositions() != null)
{
// Doing "termText"
getCachingTermPositions().reset();
int count = getCachingTermPositions().freq();
int realPosition = 0;
int adjustedPosition = 0;
for (int i = 0; i < count; i++)
{
realPosition = getCachingTermPositions().nextPosition();
adjustedPosition = realPosition - start;
if ((end != -1) && (realPosition > end))
{
return -1;
}
if (adjustedPosition > requiredPosition)
{
return -1;
}
if (adjustedPosition == requiredPosition)
{
return adjustedPosition;
}
}
}
else
{
// Doing "*"
if ((offset + 1) == requiredPosition)
{
return offset + 1;
}
}
return -1;
}
/*
* (non-Javadoc)
*
* @see org.alfresco.lucene.extensions.StructuredFieldPosition#getPosition()
*/
public int getPosition()
{
return requiredPosition;
}
public String getDescription()
{
return "Absolute Named child";
}
}

View File

@@ -1,126 +0,0 @@
/*
* Copyright (C) 2005-2010 Alfresco Software Limited.
*
* This file is part of Alfresco
*
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
*/
package org.alfresco.repo.search.impl.lucene.query;
public abstract class AbstractStructuredFieldPosition implements StructuredFieldPosition
{
private String termText;
private boolean isTerminal;
private boolean isAbsolute;
private CachingTermPositions tps;
public AbstractStructuredFieldPosition(String termText, boolean isTerminal, boolean isAbsolute)
{
super();
this.termText = termText;
this.isTerminal = isTerminal;
this.isAbsolute = isAbsolute;
}
public boolean isTerminal()
{
return isTerminal;
}
protected void setTerminal(boolean isTerminal)
{
this.isTerminal = isTerminal;
}
public boolean isAbsolute()
{
return isAbsolute;
}
public boolean isRelative()
{
return !isAbsolute;
}
public String getTermText()
{
return termText;
}
public int getPosition()
{
return -1;
}
public void setCachingTermPositions(CachingTermPositions tps)
{
this.tps = tps;
}
public CachingTermPositions getCachingTermPositions()
{
return this.tps;
}
public boolean allowsLinkingBySelf()
{
return false;
}
public boolean allowslinkingByParent()
{
return true;
}
public boolean linkParent()
{
return true;
}
public boolean linkSelf()
{
return false;
}
public String toString()
{
StringBuffer buffer = new StringBuffer(256);
buffer.append(getDescription());
buffer.append("<"+getTermText()+"> at "+getPosition());
buffer.append(" Terminal = "+isTerminal());
buffer.append(" Absolute = "+isAbsolute());
return buffer.toString();
}
public abstract String getDescription();
public boolean isDescendant()
{
return false;
}
public boolean matchesAll()
{
return getCachingTermPositions() == null;
}
}

View File

@@ -1,93 +0,0 @@
/*
* Copyright (C) 2005-2010 Alfresco Software Limited.
*
* This file is part of Alfresco
*
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
*/
package org.alfresco.repo.search.impl.lucene.query;
import java.io.IOException;
/**
* @author andyh
*
* TODO To change the template for this generated type comment go to Window -
* Preferences - Java - Code Style - Code Templates
*/
public class AnyStructuredFieldPosition extends AbstractStructuredFieldPosition
{
/**
*
*/
public AnyStructuredFieldPosition(String termText)
{
super(termText, true, false);
if (termText == null)
{
setTerminal(false);
}
}
public AnyStructuredFieldPosition()
{
super(null, false, false);
}
/*
* (non-Javadoc)
*
* @see org.alfresco.lucene.extensions.StructuredFieldPosition#matches(int,
* int, org.apache.lucene.index.TermPositions)
*/
public int matches(int start, int end, int offset) throws IOException
{
// we are doing //name
if (getCachingTermPositions() != null)
{
setTerminal(true);
int realPosition = 0;
int adjustedPosition = 0;
getCachingTermPositions().reset();
int count = getCachingTermPositions().freq();
for (int i = 0; i < count; i++)
{
realPosition = getCachingTermPositions().nextPosition();
adjustedPosition = realPosition - start;
if ((end != -1) && (realPosition > end))
{
return -1;
}
if (adjustedPosition > offset)
{
return adjustedPosition;
}
}
}
else
{
// we are doing //
setTerminal(false);
return offset;
}
return -1;
}
public String getDescription()
{
return "Any";
}
}

View File

@@ -1,197 +0,0 @@
/*
* Copyright (C) 2005-2010 Alfresco Software Limited.
*
* This file is part of Alfresco
*
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
*/
package org.alfresco.repo.search.impl.lucene.query;
import java.io.IOException;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermEnum;
import org.apache.lucene.index.TermPositions;
/**
* @author andyh
*
* TODO To change the template for this generated type comment go to Window -
* Preferences - Java - Code Style - Code Templates
*/
public class CachingTermPositions implements TermPositions
{
int[] results;
int position = -1;
int last = -1;
TermPositions delegate;
CachingTermPositions(TermPositions delegate)
{
this.delegate = delegate;
}
/*
* (non-Javadoc)
*
* @see org.apache.lucene.index.TermPositions#nextPosition()
*/
public int nextPosition() throws IOException
{
if (results == null)
{
results = new int[freq()];
}
position++;
if (last < position)
{
results[position] = delegate.nextPosition();
last = position;
}
return results[position];
}
public void reset()
{
position = -1;
}
private void clear()
{
position = -1;
last = -1;
results = null;
}
/*
* (non-Javadoc)
*
* @see org.apache.lucene.index.TermDocs#seek(org.apache.lucene.index.Term)
*/
public void seek(Term term) throws IOException
{
delegate.seek(term);
clear();
}
/*
* (non-Javadoc)
*
* @see org.apache.lucene.index.TermDocs#seek(org.apache.lucene.index.TermEnum)
*/
public void seek(TermEnum termEnum) throws IOException
{
delegate.seek(termEnum);
clear();
}
/*
* (non-Javadoc)
*
* @see org.apache.lucene.index.TermDocs#doc()
*/
public int doc()
{
return delegate.doc();
}
/*
* (non-Javadoc)
*
* @see org.apache.lucene.index.TermDocs#freq()
*/
public int freq()
{
return delegate.freq();
}
/*
* (non-Javadoc)
*
* @see org.apache.lucene.index.TermDocs#next()
*/
public boolean next() throws IOException
{
if (delegate.next())
{
clear();
return true;
}
else
{
return false;
}
}
/*
* (non-Javadoc)
*
* @see org.apache.lucene.index.TermDocs#read(int[], int[])
*/
public int read(int[] docs, int[] freqs) throws IOException
{
int answer = delegate.read(docs, freqs);
clear();
return answer;
}
/*
* (non-Javadoc)
*
* @see org.apache.lucene.index.TermDocs#skipTo(int)
*/
public boolean skipTo(int target) throws IOException
{
if (delegate.skipTo(target))
{
clear();
return true;
}
else
{
return false;
}
}
/*
* (non-Javadoc)
*
* @see org.apache.lucene.index.TermDocs#close()
*/
public void close() throws IOException
{
delegate.close();
clear();
}
public byte[] getPayload(byte[] data, int offset) throws IOException
{
return delegate.getPayload(data, offset);
}
public int getPayloadLength()
{
return delegate.getPayloadLength();
}
public boolean isPayloadAvailable()
{
return delegate.isPayloadAvailable();
}
}

View File

@@ -1,56 +0,0 @@
/*
* Copyright (C) 2005-2010 Alfresco Software Limited.
*
* This file is part of Alfresco
*
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
*/
package org.alfresco.repo.search.impl.lucene.query;
import java.io.IOException;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.FilteredTermEnum;
import org.apache.lucene.search.MultiTermQuery;
/**
* Perform a case insensitive match against a field
*
* @author andyh
*
*/
public class CaseInsensitiveFieldQuery extends MultiTermQuery
{
/**
*
*/
private static final long serialVersionUID = -2570803495329346982L;
/**
* @param term - the term for the match
*/
public CaseInsensitiveFieldQuery(Term term)
{
super(term);
}
@Override
protected FilteredTermEnum getEnum(IndexReader reader) throws IOException
{
Term term = new Term(getTerm().field(), getTerm().text());
return new CaseInsensitiveTermEnum(reader, term);
}
}

View File

@@ -1,78 +0,0 @@
/*
* Copyright (C) 2005-2010 Alfresco Software Limited.
*
* This file is part of Alfresco
*
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
*/
package org.alfresco.repo.search.impl.lucene.query;
import java.io.IOException;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.FilteredTermEnum;
import org.apache.lucene.search.MultiTermQuery;
/**
* Find terms that match a range ignoring case
*
* @author andyh
*/
public class CaseInsensitiveFieldRangeQuery extends MultiTermQuery
{
/**
*
*/
private static final long serialVersionUID = -5859977841901861122L;
String expandedFieldName;
String lowerTermText;
String upperTermText;
boolean includeLower;
boolean includeUpper;
/**
* @param expandedFieldName -
* field
* @param lowerTermText -
* upper range value
* @param upperTermText -
* lower range value
* @param includeLower -
* include the lower value
* @param includeUpper -
* include the upper value
*/
public CaseInsensitiveFieldRangeQuery(String expandedFieldName, String lowerTermText, String upperTermText, boolean includeLower, boolean includeUpper)
{
super(new Term(expandedFieldName, ""));
this.expandedFieldName = expandedFieldName;
this.lowerTermText = lowerTermText;
this.upperTermText = upperTermText;
this.includeLower = includeLower;
this.includeUpper = includeUpper;
}
@Override
protected FilteredTermEnum getEnum(IndexReader reader) throws IOException
{
return new CaseInsensitiveTermRangeEnum(reader, expandedFieldName, lowerTermText, upperTermText, includeLower, includeUpper);
}
}

View File

@@ -1,80 +0,0 @@
/*
* Copyright (C) 2005-2010 Alfresco Software Limited.
*
* This file is part of Alfresco
*
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
*/
package org.alfresco.repo.search.impl.lucene.query;
import java.io.IOException;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.FilteredTermEnum;
/**
* A term enum to find case insensitive matches - used for Upper and Lower
*
* @author andyh
*/
public class CaseInsensitiveTermEnum extends FilteredTermEnum
{
private String field = "";
private boolean endEnum = false;
private String text;
/**
* @param reader =
* the index reader
* @param term -
* the term to match
* @throws IOException
*/
public CaseInsensitiveTermEnum(IndexReader reader, Term term) throws IOException
{
super();
field = term.field();
text = term.text();
// position at the start - we could do slightly better
setEnum(reader.terms(new Term(term.field(), "")));
}
@Override
public float difference()
{
return 1.0f;
}
@Override
protected boolean endEnum()
{
return endEnum;
}
@Override
protected boolean termCompare(Term term)
{
if (field.equals(term.field()))
{
String searchText = term.text();
return searchText.equalsIgnoreCase(text);
}
endEnum = true;
return false;
}
}

View File

@@ -1,123 +0,0 @@
/*
* Copyright (C) 2005-2010 Alfresco Software Limited.
*
* This file is part of Alfresco
*
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
*/
package org.alfresco.repo.search.impl.lucene.query;
import java.io.IOException;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.FilteredTermEnum;
/**
* A term enum that finds terms that lie with in some range ignoring case
*
* @author andyh
*/
public class CaseInsensitiveTermRangeEnum extends FilteredTermEnum
{
private boolean endEnum = false;
String expandedFieldName;
String lowerTermText;
String upperTermText;
boolean includeLower;
boolean includeUpper;
/**
* @param reader
* the index reader
* @param expandedFieldName -
* field
* @param lowerTermText -
* upper range value
* @param upperTermText -
* lower range value
* @param includeLower -
* include the lower value
* @param includeUpper -
* include the upper value
* @throws IOException
*/
public CaseInsensitiveTermRangeEnum(IndexReader reader, String expandedFieldName, String lowerTermText, String upperTermText, boolean includeLower, boolean includeUpper)
throws IOException
{
super();
this.expandedFieldName = expandedFieldName;
this.lowerTermText = lowerTermText.toLowerCase();
this.upperTermText = upperTermText.toLowerCase();
this.includeLower = includeLower;
this.includeUpper = includeUpper;
setEnum(reader.terms(new Term(expandedFieldName, "")));
}
@Override
public float difference()
{
return 1.0f;
}
@Override
protected boolean endEnum()
{
return endEnum;
}
@Override
protected boolean termCompare(Term term)
{
if (expandedFieldName.equals(term.field()))
{
String searchText = term.text().toLowerCase();
return checkLower(searchText) && checkUpper(searchText);
}
endEnum = true;
return false;
}
private boolean checkLower(String searchText)
{
if (includeLower)
{
return (lowerTermText.compareTo(searchText) <= 0);
}
else
{
return (lowerTermText.compareTo(searchText) < 0);
}
}
private boolean checkUpper(String searchText)
{
if (includeUpper)
{
return (upperTermText.compareTo(searchText) >= 0);
}
else
{
return (upperTermText.compareTo(searchText) > 0);
}
}
}

View File

@@ -1,555 +0,0 @@
/*
* Copyright (C) 2005-2010 Alfresco Software Limited.
*
* This file is part of Alfresco
*
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
*/
package org.alfresco.repo.search.impl.lucene.query;
import java.io.IOException;
import org.apache.lucene.index.TermPositions;
import org.apache.lucene.search.Explanation;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.Similarity;
import org.apache.lucene.search.Weight;
/**
* The scorer for structured field queries.
*
* A document either matches or it does not, there for the frequency is reported
* as 0.0f or 1.0.
*
*
*
* @author andyh
*/
public class ContainerScorer extends Scorer
{
// Unused
Weight weight;
// Positions of documents with multiple structure elements
// e.g have mutiple paths, multiple categories or multiples entries in the
// same category
TermPositions root;
// The Field positions that describe the structure we are trying to match
StructuredFieldPosition[] positions;
// Unused at the moment
byte[] norms;
// The minium document found so far
int min = 0;
// The max document found so far
int max = 0;
// The next root doc
// -1 and it has gone off the end
int rootDoc = 0;
// Are there potentially more documents
boolean more = true;
// The frequency of the terms in the doc (0.0f or 1.0f)
float freq = 0.0f;
// A term position to find all container entries (there is no better way of finding the set of rquired containers)
private TermPositions containers;
/**
* The arguments here follow the same pattern as used by the PhraseQuery.
* (It has the same unused arguments)
*
* @param weight -
* curently unsued
* @param tps -
* the term positions for the terms we are trying to find
* @param root -
* the term positions for documents with multiple entries - this
* may be null, or contain no matches - it specifies those things
* that appear under multiple categories etc.
* @param positions -
* the structured field positions - where terms should appear
* @param similarity -
* used in the abstract scorer implementation
* @param norms -
* unused
*/
public ContainerScorer(Weight weight, TermPositions root, StructuredFieldPosition[] positions, TermPositions containers, Similarity similarity, byte[] norms)
{
super(similarity);
this.weight = weight;
this.positions = positions;
this.norms = norms;
this.root = root;
this.containers = containers;
}
/*
* (non-Javadoc)
*
* @see org.apache.lucene.search.Scorer#next()
*/
public boolean next() throws IOException
{
// If there is no filtering
if (allContainers())
{
// containers and roots must be in sync or the index is broken
while (more)
{
if (containers.next() && root.next())
{
if (check(0, root.nextPosition()))
{
return true;
}
}
else
{
more = false;
return false;
}
}
}
if (!more)
{
// One of the search terms has no more docuements
return false;
}
if (max == 0)
{
// We need to initialise
// Just do a next on all terms and check if the first doc matches
doNextOnAll();
if (found())
{
return true;
}
// drop through to the normal find sequence
}
return findNext();
}
/**
* Are we looking for all containers?
* If there are no positions we must have a better filter
*
* @return
*/
private boolean allContainers()
{
if (positions.length == 0)
{
return true;
}
for (StructuredFieldPosition sfp : positions)
{
if (sfp.getCachingTermPositions() != null)
{
return false;
}
}
return true;
}
/**
* @return
* @throws IOException
*/
private boolean findNext() throws IOException
{
// Move to the next document
while (more)
{
move(); // may set more to false
if (found())
{
return true;
}
}
// If we get here we must have no more documents
return false;
}
/**
* Check if we have found a match
*
* @return
* @throws IOException
*/
private boolean found() throws IOException
{
// No predicate test if there are no positions
if (positions.length == 0)
{
return true;
}
// no more documents - no match
if (!more)
{
return false;
}
// min and max must point to the same document
if (min != max)
{
return false;
}
if (rootDoc != max)
{
return false;
}
// We have duplicate entries - suport should be improved but it is not used at the moment
// This shuld work akin to the leaf scorer
// It would compact the index
// The match must be in a known term range
int count = root.freq();
int start = 0;
int end = -1;
for (int i = 0; i < count; i++)
{
if (i == 0)
{
// First starts at zero
start = 0;
end = root.nextPosition() ;
}
else
{
start = end + 1;
end = root.nextPosition() ;
}
if (check(start, end))
{
return true;
}
}
// We had checks to do and they all failed.
return false;
}
/*
* We have all documents at the same state. Now we check the positions of
* the terms.
*/
private boolean check(int start, int end) throws IOException
{
int offset = checkTail(start, end, 0, 0);
// Last match may fail
if (offset == -1)
{
return false;
}
else
{
// Check non // ending patterns end at the end of the available pattern
if (positions[positions.length - 1].isTerminal())
{
return ((offset+1) == end);
}
else
{
return true;
}
}
}
/**
* For // type pattern matches we need to test patterns of variable greedyness.
*
*
* @param start
* @param end
* @param currentPosition
* @param currentOffset
* @return
* @throws IOException
*/
private int checkTail(int start, int end, int currentPosition, int currentOffset) throws IOException
{
int offset = currentOffset;
for (int i = currentPosition, l = positions.length; i < l; i++)
{
offset = positions[i].matches(start, end, offset);
if (offset == -1)
{
return -1;
}
if (positions[i].isDescendant())
{
for (int j = offset; j < end; j++)
{
int newOffset = checkTail(start, end, i + 1, j);
if (newOffset != -1)
{
return newOffset;
}
}
return -1;
}
}
return offset;
}
/*
* Move to the next position to consider for a match test
*/
private void move() throws IOException
{
if (min == max)
{
// If we were at a match just do next on all terms
// They all must move on
doNextOnAll();
}
else
{
// We are in a range - try and skip to the max position on all terms
// Only some need to move on - some may move past the current max and set a new target
skipToMax();
}
}
/*
* Go through all the term positions and try and move to next document. Any
* failure measn we have no more.
*
* This can be used at initialisation and when moving away from an existing
* match.
*
* This will set min, max, more and rootDoc
*
*/
private void doNextOnAll() throws IOException
{
// Do the terms
int current;
boolean first = true;
for (int i = 0, l = positions.length; i < l; i++)
{
if (positions[i].getCachingTermPositions() != null)
{
if (positions[i].getCachingTermPositions().next())
{
current = positions[i].getCachingTermPositions().doc();
adjustMinMax(current, first);
first = false;
}
else
{
more = false;
return;
}
}
}
// Do the root term - it must always exists as the path could well have mutiple entries
// If an entry in the index does not have a root terminal it is broken
if (root.next())
{
rootDoc = root.doc();
}
else
{
more = false;
return;
}
if (root.doc() < max)
{
if (root.skipTo(max))
{
rootDoc = root.doc();
}
else
{
more = false;
return;
}
}
}
/*
* Try and skip all those term positions at documents less than the current
* max up to value. This is quite likely to fail and leave us with (min !=
* max) but that is OK, we try again.
*
* It is possible that max increases as we process terms, this is OK. We
* just failed to skip to a given value of max and start doing the next.
*/
private void skipToMax() throws IOException
{
// Do the terms
int current;
for (int i = 0, l = positions.length; i < l; i++)
{
if (i == 0)
{
min = max;
}
if (positions[i].getCachingTermPositions() != null)
{
if (positions[i].getCachingTermPositions().doc() < max)
{
if (positions[i].getCachingTermPositions().skipTo(max))
{
current = positions[i].getCachingTermPositions().doc();
adjustMinMax(current, false);
}
else
{
more = false;
return;
}
}
}
}
// Do the root
if (root.doc() < max)
{
if (root.skipTo(max))
{
rootDoc = root.doc();
}
else
{
more = false;
return;
}
}
}
/*
* Adjust the min and max values Convenience boolean to set or adjust the
* minimum.
*/
private void adjustMinMax(int doc, boolean setMin)
{
if (max < doc)
{
max = doc;
}
if (setMin)
{
min = doc;
}
else if (min > doc)
{
min = doc;
}
}
/*
* (non-Javadoc)
*
* @see org.apache.lucene.search.Scorer#doc()
*/
public int doc()
{
if (allContainers())
{
return containers.doc();
}
return max;
}
/*
* (non-Javadoc)
*
* @see org.apache.lucene.search.Scorer#score()
*/
public float score() throws IOException
{
return 1.0f;
}
/*
* (non-Javadoc)
*
* @see org.apache.lucene.search.Scorer#skipTo(int)
*/
public boolean skipTo(int target) throws IOException
{
if (allContainers())
{
containers.skipTo(target);
root.skipTo(containers.doc()); // must match
if (check(0, root.nextPosition()))
{
return true;
}
while (more)
{
if (containers.next() && root.next())
{
if (check(0, root.nextPosition()))
{
return true;
}
}
else
{
more = false;
return false;
}
}
}
max = target;
return findNext();
}
/*
* (non-Javadoc)
*
* @see org.apache.lucene.search.Scorer#explain(int)
*/
public Explanation explain(int doc) throws IOException
{
// TODO: Work out what a proper explanation would be here?
Explanation tfExplanation = new Explanation();
while (next() && doc() < doc)
{
}
float phraseFreq = (doc() == doc) ? freq : 0.0f;
tfExplanation.setValue(getSimilarity().tf(phraseFreq));
tfExplanation.setDescription("tf(phraseFreq=" + phraseFreq + ")");
return tfExplanation;
}
}

View File

@@ -1,240 +0,0 @@
/*
* Copyright (C) 2005-2010 Alfresco Software Limited.
*
* This file is part of Alfresco
*
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
*/
package org.alfresco.repo.search.impl.lucene.query;
import java.io.IOException;
import java.util.Arrays;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.MultiReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermDocs;
import org.apache.lucene.index.TermEnum;
import org.apache.lucene.index.TermPositions;
public class DeltaReader extends MultiReader
{
int[][] deletions;
Boolean hasExclusions = null;
private IndexReader[] subReaders;
private int maxDoc = 0;
private int[] starts;
public DeltaReader(IndexReader[] readers, int[][] deletions) throws IOException
{
super(readers);
this.deletions = deletions;
initialize(readers);
}
private void initialize(IndexReader[] subReaders) throws IOException
{
this.subReaders = subReaders;
starts = new int[subReaders.length + 1]; // build starts array
for (int i = 0; i < subReaders.length; i++)
{
starts[i] = maxDoc;
maxDoc += subReaders[i].maxDoc(); // compute maxDocs
}
starts[subReaders.length] = maxDoc;
}
protected void doCommit() throws IOException
{
// TODO Auto-generated method stub
throw new UnsupportedOperationException();
}
protected void doDelete(int arg0) throws IOException
{
// TODO Auto-generated method stub
throw new UnsupportedOperationException();
}
protected void doUndeleteAll() throws IOException
{
// TODO Auto-generated method stub
throw new UnsupportedOperationException();
}
public boolean hasDeletions()
{
return super.hasDeletions() || hasSearchExclusions();
}
private boolean hasSearchExclusions()
{
if (hasExclusions == null)
{
for (int i = 0; i < deletions.length; i++)
{
if (deletions[i].length > 0)
{
hasExclusions = new Boolean(true);
break;
}
}
hasExclusions = new Boolean(false);
}
return hasExclusions.booleanValue();
}
public boolean isDeleted(int docNumber)
{
int i = readerIndex(docNumber);
return super.isDeleted(docNumber) || (Arrays.binarySearch(deletions[i], docNumber - starts[i]) != -1);
}
private int readerIndex(int n)
{ // find reader for doc n:
int lo = 0; // search starts array
int hi = subReaders.length - 1; // for first element less
while (hi >= lo)
{
int mid = (lo + hi) >> 1;
int midValue = starts[mid];
if (n < midValue)
hi = mid - 1;
else if (n > midValue)
lo = mid + 1;
else
{ // found a match
while (mid + 1 < subReaders.length && starts[mid + 1] == midValue)
{
mid++; // scan to last match
}
return mid;
}
}
return hi;
}
public TermDocs termDocs() throws IOException
{
return new DeletingTermDocs(super.termDocs());
}
public TermPositions termPositions() throws IOException
{
// TODO Auto-generated method stub
throw new UnsupportedOperationException();
}
private class DeletingTermDocs implements TermDocs
{
TermDocs delegate;
DeletingTermDocs(TermDocs delegate)
{
super();
this.delegate = delegate;
}
public void seek(Term term) throws IOException
{
delegate.seek(term);
}
public void seek(TermEnum termEnum) throws IOException
{
delegate.seek(termEnum);
}
public int doc()
{
return delegate.doc();
}
public int freq()
{
return delegate.freq();
}
public boolean next() throws IOException
{
while (delegate.next())
{
if (!isDeleted(doc()))
{
return true;
}
}
return false;
}
public int read(int[] docs, int[] freqs) throws IOException
{
int end;
int deletedCount;
do
{
end = delegate.read(docs, freqs);
if (end == 0)
{
return end;
}
deletedCount = 0;
for (int i = 0; i < end; i++)
{
if (!isDeleted(docs[i]))
{
deletedCount++;
}
}
}
while (end == deletedCount);
// fix up for deleted
int position = 0;
for(int i = 0; i < end; i++)
{
if(!isDeleted(i))
{
docs[position] = docs[i];
freqs[position] = freqs[i];
position++;
}
}
return position;
}
public boolean skipTo(int docNumber) throws IOException
{
delegate.skipTo(docNumber);
if (!isDeleted(doc()))
{
return true;
}
else
{
return next();
}
}
public void close() throws IOException
{
delegate.close();
}
}
}

View File

@@ -1,45 +0,0 @@
/*
* Copyright (C) 2005-2010 Alfresco Software Limited.
*
* This file is part of Alfresco
*
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
*/
package org.alfresco.repo.search.impl.lucene.query;
public class DescendantAndSelfStructuredFieldPosition extends AnyStructuredFieldPosition
{
public DescendantAndSelfStructuredFieldPosition()
{
super();
}
public String getDescription()
{
return "Descendant and Self Axis";
}
public boolean allowsLinkingBySelf()
{
return true;
}
public boolean isDescendant()
{
return true;
}
}

View File

@@ -1,386 +0,0 @@
/*
* Copyright (C) 2005-2010 Alfresco Software Limited.
*
* This file is part of Alfresco
*
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
*/
package org.alfresco.repo.search.impl.lucene.query;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import org.alfresco.service.cmr.dictionary.DictionaryService;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.Explanation;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.Searcher;
import org.apache.lucene.search.Weight;
/**
* An extension to the Lucene query set.
*
* This query supports structured queries against paths.
*
* The field must have been tokenised using the path tokeniser.
*
* This class manages linking together an ordered chain of absolute and relative
* positional queries.
*
* @author Andy Hind
*/
public class PathQuery extends Query
{
/**
*
*/
private static final long serialVersionUID = 3832904355660707892L;
private String pathField = "PATH";
private String qNameField = "QNAME";
private int unitSize = 2;
private List<StructuredFieldPosition> pathStructuredFieldPositions = new ArrayList<StructuredFieldPosition>();
private List<StructuredFieldPosition> qNameStructuredFieldPositions = new ArrayList<StructuredFieldPosition>();
private DictionaryService dictionarySertvice;
private boolean repeats = false;
/**
* The base query
*
* @param query
*/
public PathQuery(DictionaryService dictionarySertvice)
{
super();
this.dictionarySertvice = dictionarySertvice;
}
public void setQuery(List<StructuredFieldPosition> path, List<StructuredFieldPosition> qname)
{
qNameStructuredFieldPositions.clear();
pathStructuredFieldPositions.clear();
if (qname.size() != unitSize)
{
throw new UnsupportedOperationException();
}
if (path.size() % unitSize != 0)
{
throw new UnsupportedOperationException();
}
qNameStructuredFieldPositions.addAll(qname);
pathStructuredFieldPositions.addAll(path);
}
public void appendQuery(List<StructuredFieldPosition> sfps)
{
if (sfps.size() != unitSize)
{
throw new UnsupportedOperationException();
}
StructuredFieldPosition last = null;
StructuredFieldPosition next = sfps.get(0);
if (qNameStructuredFieldPositions.size() > 0)
{
last = qNameStructuredFieldPositions.get(qNameStructuredFieldPositions.size() - 1);
}
if ((last != null) && next.linkParent() && !last.allowslinkingByParent())
{
return;
}
if ((last != null) && next.linkSelf() && !last.allowsLinkingBySelf())
{
return;
}
if (qNameStructuredFieldPositions.size() == unitSize)
{
pathStructuredFieldPositions.addAll(qNameStructuredFieldPositions);
}
qNameStructuredFieldPositions.clear();
qNameStructuredFieldPositions.addAll(sfps);
}
public String getPathField()
{
return pathField;
}
public void setPathField(String pathField)
{
this.pathField = pathField;
}
public String getQnameField()
{
return qNameField;
}
public void setQnameField(String qnameField)
{
this.qNameField = qnameField;
}
public Term getPathRootTerm()
{
return new Term(getPathField(), ";");
}
public Term getQNameRootTerm()
{
return new Term(getQnameField(), ";");
}
/*
* @see org.apache.lucene.search.Query#createWeight(org.apache.lucene.search.Searcher)
*/
protected Weight createWeight(Searcher searcher)
{
return new StructuredFieldWeight(searcher);
}
/*
* @see java.lang.Object#toString()
*/
public String toString()
{
StringBuilder stringBuilder = new StringBuilder();
stringBuilder.append("<PATH>:");
int i = 0;
for(StructuredFieldPosition sfp : pathStructuredFieldPositions)
{
if(i % 2 == 0)
{
stringBuilder.append('/');
}
else
{
stringBuilder.append(':');
}
if(sfp.matchesAll())
{
stringBuilder.append("*");
}
else
{
stringBuilder.append(sfp.getTermText());
}
i++;
}
for(StructuredFieldPosition sfp : qNameStructuredFieldPositions)
{
if(i % 2 == 0)
{
stringBuilder.append('/');
}
else
{
stringBuilder.append(':');
}
if(sfp.matchesAll())
{
stringBuilder.append("*");
}
else
{
stringBuilder.append(sfp.getTermText());
}
i++;
}
return stringBuilder.toString();
}
/*
* @see org.apache.lucene.search.Query#toString(java.lang.String)
*/
public String toString(String field)
{
return toString();
}
private class StructuredFieldWeight implements Weight
{
/**
*
*/
private static final long serialVersionUID = 3257854259645985328L;
private Searcher searcher;
private float value;
private float idf;
private float queryNorm;
private float queryWeight;
public StructuredFieldWeight(Searcher searcher)
{
this.searcher = searcher;
}
/*
* @see org.apache.lucene.search.Weight#explain(org.apache.lucene.index.IndexReader,
* int)
*/
public Explanation explain(IndexReader reader, int doc) throws IOException
{
throw new UnsupportedOperationException();
}
/*
* @see org.apache.lucene.search.Weight#getQuery()
*/
public Query getQuery()
{
return PathQuery.this;
}
/*
* (non-Javadoc)
*
* @see org.apache.lucene.search.Weight#getValue()
*/
public float getValue()
{
return value;
}
/*
* (non-Javadoc)
*
* @see org.apache.lucene.search.Weight#normalize(float)
*/
public void normalize(float queryNorm)
{
this.queryNorm = queryNorm;
queryWeight *= queryNorm; // normalize query weight
value = queryWeight * idf; // idf for document
}
/*
* (non-Javadoc)
*
* @see org.apache.lucene.search.Weight#scorer(org.apache.lucene.index.IndexReader)
*/
public Scorer scorer(IndexReader reader) throws IOException
{
return PathScorer.createPathScorer(getSimilarity(searcher), PathQuery.this, reader, this, dictionarySertvice, repeats);
}
/*
* (non-Javadoc)
*
* @see org.apache.lucene.search.Weight#sumOfSquaredWeights()
*/
public float sumOfSquaredWeights() throws IOException
{
idf = getSimilarity(searcher).idf(getTerms(), searcher); // compute
// idf
queryWeight = idf * getBoost(); // compute query weight
return queryWeight * queryWeight; // square it
}
private ArrayList<Term> getTerms()
{
ArrayList<Term> answer = new ArrayList<Term>(pathStructuredFieldPositions.size());
for (StructuredFieldPosition sfp : pathStructuredFieldPositions)
{
if (sfp.getTermText() != null)
{
Term term = new Term(pathField, sfp.getTermText());
answer.add(term);
}
}
return answer;
}
}
public void removeDescendantAndSelf()
{
while ((getLast() != null) && getLast().linkSelf())
{
removeLast();
removeLast();
}
}
private StructuredFieldPosition getLast()
{
if (qNameStructuredFieldPositions.size() > 0)
{
return qNameStructuredFieldPositions.get(qNameStructuredFieldPositions.size() - 1);
}
else
{
return null;
}
}
private void removeLast()
{
qNameStructuredFieldPositions.clear();
for (int i = 0; i < unitSize; i++)
{
if (pathStructuredFieldPositions.size() > 0)
{
qNameStructuredFieldPositions.add(0, pathStructuredFieldPositions.remove(pathStructuredFieldPositions.size() - 1));
}
}
}
public boolean isEmpty()
{
return qNameStructuredFieldPositions.size() == 0;
}
public List<StructuredFieldPosition> getPathStructuredFieldPositions()
{
return pathStructuredFieldPositions;
}
public List<StructuredFieldPosition> getQNameStructuredFieldPositions()
{
return qNameStructuredFieldPositions;
}
public void setRepeats(boolean repeats)
{
this.repeats = repeats;
}
}

View File

@@ -1,208 +0,0 @@
/*
* Copyright (C) 2005-2010 Alfresco Software Limited.
*
* This file is part of Alfresco
*
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
*/
package org.alfresco.repo.search.impl.lucene.query;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import org.alfresco.repo.search.impl.lucene.index.CachingIndexReader;
import org.alfresco.repo.search.impl.lucene.query.LeafScorer.Counter;
import org.alfresco.service.cmr.dictionary.DictionaryService;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermDocs;
import org.apache.lucene.index.TermPositions;
import org.apache.lucene.search.Explanation;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.Similarity;
import org.apache.lucene.search.Weight;
public class PathScorer extends Scorer
{
Scorer scorer;
PathScorer(Similarity similarity, Scorer scorer)
{
super(similarity);
this.scorer = scorer;
}
public static PathScorer createPathScorer(Similarity similarity, PathQuery pathQuery, IndexReader reader, Weight weight, DictionaryService dictionarySertvice, boolean repeat) throws IOException
{
Scorer selfScorer = null;
HashMap<String, Counter> selfIds = null;
StructuredFieldPosition last = null;
if(pathQuery.getQNameStructuredFieldPositions().size() > 0)
{
last = pathQuery.getQNameStructuredFieldPositions().get(pathQuery.getQNameStructuredFieldPositions().size() - 1);
}
if ((last != null) && last.linkSelf())
{
PathQuery selfQuery = new PathQuery(dictionarySertvice);
selfQuery.setQuery(pathQuery.getPathStructuredFieldPositions(), pathQuery.getQNameStructuredFieldPositions());
selfQuery.removeDescendantAndSelf();
if (!selfQuery.isEmpty())
{
selfIds = new HashMap<String, Counter>();
selfScorer = PathScorer.createPathScorer(similarity, selfQuery, reader, weight, dictionarySertvice, repeat);
selfIds.clear();
while (selfScorer.next())
{
int doc = selfScorer.doc();
Document document = reader.document(doc);
Field id = document.getField("ID");
Counter counter = selfIds.get(id.stringValue());
if (counter == null)
{
counter = new Counter();
selfIds.put(id.stringValue(), counter);
}
counter.count++;
}
}
}
if ((pathQuery.getPathStructuredFieldPositions().size() + pathQuery.getQNameStructuredFieldPositions().size()) == 0)
{
ArrayList<StructuredFieldPosition> answer = new ArrayList<StructuredFieldPosition>(2);
answer.add(new SelfAxisStructuredFieldPosition());
answer.add(new SelfAxisStructuredFieldPosition());
pathQuery.appendQuery(answer);
}
for (StructuredFieldPosition sfp : pathQuery.getPathStructuredFieldPositions())
{
if (sfp.getTermText() != null)
{
TermPositions p = reader.termPositions(new Term(pathQuery.getPathField(), sfp.getTermText()));
if (p == null)
return null;
CachingTermPositions ctp = new CachingTermPositions(p);
sfp.setCachingTermPositions(ctp);
}
}
for (StructuredFieldPosition sfp : pathQuery.getQNameStructuredFieldPositions())
{
if (sfp.getTermText() != null)
{
TermPositions p = reader.termPositions(new Term(pathQuery.getQnameField(), sfp.getTermText()));
if (p == null)
return null;
CachingTermPositions ctp = new CachingTermPositions(p);
sfp.setCachingTermPositions(ctp);
}
}
TermPositions rootContainerPositions = null;
if (pathQuery.getPathRootTerm() != null)
{
rootContainerPositions = reader.termPositions(pathQuery.getPathRootTerm());
}
TermPositions rootLeafPositions = null;
if (pathQuery.getQNameRootTerm() != null)
{
rootLeafPositions = reader.termPositions(pathQuery.getQNameRootTerm());
}
TermPositions tp = reader.termPositions();
ContainerScorer cs = null;
TermPositions level0 = null;
TermDocs nodeDocs;
if (reader instanceof CachingIndexReader)
{
CachingIndexReader cachingIndexReader = (CachingIndexReader) reader;
nodeDocs = cachingIndexReader.getNodeDocs();
}
else
{
nodeDocs = reader.termDocs(new Term("ISNODE", "T"));
}
// StructuredFieldPosition[] test =
// (StructuredFieldPosition[])structuredFieldPositions.toArray(new
// StructuredFieldPosition[]{});
if (pathQuery.getPathStructuredFieldPositions().size() > 0)
{
TermPositions containerPositions = reader.termPositions(new Term("ISCONTAINER", "T"));
cs = new ContainerScorer(weight, rootContainerPositions, (StructuredFieldPosition[]) pathQuery.getPathStructuredFieldPositions().toArray(new StructuredFieldPosition[] {}),
containerPositions, similarity, reader.norms(pathQuery.getPathField()));
}
else
{
level0 = reader.termPositions(new Term("ISROOT", "T"));
}
if((cs == null) &&
(pathQuery.getQNameStructuredFieldPositions().get(pathQuery.getQNameStructuredFieldPositions().size()-1)).linkSelf())
{
nodeDocs = reader.termDocs(new Term("ISROOT", "T"));
}
LeafScorer ls = new LeafScorer(weight, rootLeafPositions, level0, cs, (StructuredFieldPosition[]) pathQuery.getQNameStructuredFieldPositions().toArray(new StructuredFieldPosition[] {}), nodeDocs,
selfIds, reader, similarity, reader.norms(pathQuery.getQnameField()), dictionarySertvice, repeat, tp);
return new PathScorer(similarity, ls);
}
@Override
public boolean next() throws IOException
{
return scorer.next();
}
@Override
public int doc()
{
return scorer.doc();
}
@Override
public float score() throws IOException
{
return scorer.score();
}
@Override
public boolean skipTo(int position) throws IOException
{
return scorer.skipTo(position);
}
@Override
public Explanation explain(int position) throws IOException
{
return scorer.explain(position);
}
}

View File

@@ -1,96 +0,0 @@
/*
* Copyright (C) 2005-2010 Alfresco Software Limited.
*
* This file is part of Alfresco
*
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
*/
package org.alfresco.repo.search.impl.lucene.query;
import java.io.IOException;
/**
* Search for a term relative to the last one found.
*
* @author andyh
*/
public class RelativeStructuredFieldPosition extends AbstractStructuredFieldPosition
{
int relativePosition;
/**
*
*/
public RelativeStructuredFieldPosition(String termText)
{
super(termText.equals("*") ? null : termText, true, false);
relativePosition = 1;
}
public RelativeStructuredFieldPosition()
{
super(null, false, false);
relativePosition = 1;
}
/*
* (non-Javadoc)
*
* @see org.alfresco.lucene.extensions.StructuredFieldPosition#matches(int,
* int, org.apache.lucene.index.TermPositions)
*/
public int matches(int start, int end, int offset) throws IOException
{
if (getCachingTermPositions() != null)
{
// Doing "termText"
getCachingTermPositions().reset();
int count = getCachingTermPositions().freq();
int requiredPosition = offset + relativePosition;
int realPosition = 0;
int adjustedPosition = 0;
for (int i = 0; i < count; i++)
{
realPosition = getCachingTermPositions().nextPosition();
adjustedPosition = realPosition - start;
if ((end != -1) && (realPosition > end))
{
return -1;
}
if (adjustedPosition == requiredPosition)
{
return adjustedPosition;
}
if (adjustedPosition > requiredPosition)
{
return -1;
}
}
}
else
{
// Doing "*";
return offset + 1;
}
return -1;
}
public String getDescription()
{
return "Relative Named child";
}
}

View File

@@ -1,54 +0,0 @@
/*
* Copyright (C) 2005-2010 Alfresco Software Limited.
*
* This file is part of Alfresco
*
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
*/
package org.alfresco.repo.search.impl.lucene.query;
import java.io.IOException;
public class SelfAxisStructuredFieldPosition extends AbstractStructuredFieldPosition
{
public SelfAxisStructuredFieldPosition()
{
super(null, true, false);
}
public int matches(int start, int end, int offset) throws IOException
{
return offset;
}
public String getDescription()
{
return "Self Axis";
}
public boolean linkSelf()
{
return true;
}
public boolean isTerminal()
{
return false;
}
}

View File

@@ -1,116 +0,0 @@
/*
* Copyright (C) 2005-2010 Alfresco Software Limited.
*
* This file is part of Alfresco
*
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
*/
package org.alfresco.repo.search.impl.lucene.query;
import java.io.IOException;
/**
* Elements used to test agains path and Qname
*
* @author andyh
*/
public interface StructuredFieldPosition
{
/**
* Does this element match
*
* @param start -
* the start postion of the paths terms
* @param end -
* the end position of the paths terms
* @param offset -
* the current offset in the path
* @return returns the next match position (usually offset + 1) or -1 if it
* does not match.
* @throws IOException
*/
public int matches(int start, int end, int offset) throws IOException;
/**
* If this position is last in the chain and it is terminal it will ensure
* it is an exact match for the length of the chain found. If false, it will
* effectively allow prefix mathces for the likes of descendant-and-below
* style queries.
*
* @return
*/
public boolean isTerminal();
/**
* Is this an absolute element; that is, it knows its exact position.
*
* @return
*/
public boolean isAbsolute();
/**
* This element only knows its position relative to the previous element.
*
* @return
*/
public boolean isRelative();
/**
* Get the test to search for in the term query. This may be null if it
* should not have a term query
*
* @return
*/
public String getTermText();
/**
* If absolute return the position. If relative we could compute the
* position knowing the previous term unless this element is preceded by a
* descendat and below style element
*
* @return
*/
public int getPosition();
/**
* A reference to the caching term positions this element uses. This may be
* null which indicates all terms match, in that case there is no action
* against the index
*
* @param tps
*/
public void setCachingTermPositions(CachingTermPositions tps);
public CachingTermPositions getCachingTermPositions();
/**
* Normally paths would require onlt parent chaining. for some it is parent
* and child chaining.
*
* @return
*/
public boolean linkSelf();
public boolean linkParent();
public boolean allowslinkingByParent();
public boolean allowsLinkingBySelf();
public boolean isDescendant();
public boolean matchesAll();
}

View File

@@ -1,57 +0,0 @@
/*
* Copyright (C) 2005-2010 Alfresco Software Limited.
*
* This file is part of Alfresco
*
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
*/
package org.alfresco.repo.search.impl.lucene.query;
import org.apache.lucene.index.Term;
/**
* @author andyh
*/
public class StructuredFieldTerm
{
private Term term;
private StructuredFieldPosition sfp;
/**
*
*/
public StructuredFieldTerm(Term term, StructuredFieldPosition sfp)
{
this.term = term;
this.sfp = sfp;
}
/**
* @return Returns the sfp.
*/
public StructuredFieldPosition getSfp()
{
return sfp;
}
/**
* @return Returns the term.
*/
public Term getTerm()
{
return term;
}
}

View File

@@ -212,7 +212,7 @@ public class AlfrescoFunctionEvaluationContext implements FunctionEvaluationCont
// find best field match // find best field match
HashSet<String> allowableLocales = new HashSet<String>(); HashSet<String> allowableLocales = new HashSet<String>();
MLAnalysisMode analysisMode = lqp.getConfig().getDefaultMLSearchAnalysisMode(); MLAnalysisMode analysisMode = lqp.getDefaultSearchMLAnalysisMode();
for (Locale l : MLAnalysisMode.getLocales(analysisMode, sortLocale, false)) for (Locale l : MLAnalysisMode.getLocales(analysisMode, sortLocale, false))
{ {
allowableLocales.add(l.toString()); allowableLocales.add(l.toString());

View File

@@ -57,7 +57,7 @@ public class LuceneQueryBuilderContext
lqp.setNamespacePrefixResolver(namespacePrefixResolver); lqp.setNamespacePrefixResolver(namespacePrefixResolver);
lqp.setTenantService(tenantService); lqp.setTenantService(tenantService);
lqp.setSearchParameters(searchParameters); lqp.setSearchParameters(searchParameters);
lqp.setLuceneConfig(config); lqp.setDefaultSearchMLAnalysisMode(config.getDefaultMLSearchAnalysisMode());
lqp.setIndexReader(indexReader); lqp.setIndexReader(indexReader);
lqp.setAllowLeadingWildcard(true); lqp.setAllowLeadingWildcard(true);
this.namespacePrefixResolver = namespacePrefixResolver; this.namespacePrefixResolver = namespacePrefixResolver;

View File

@@ -1,323 +0,0 @@
/*
* Copyright (C) 2005-2010 Alfresco Software Limited.
*
* This file is part of Alfresco
*
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
*/
package org.alfresco.util;
import org.alfresco.repo.search.impl.lucene.LuceneQueryParser;
import org.apache.lucene.queryParser.QueryParser;
/**
* Helper class to provide conversions between different search languages
*
* @author Derek Hulley
*/
public class SearchLanguageConversion
{
/**
* SQL like query language summary:
* <ul>
* <li>Escape: \</li>
* <li>Single char search: _</li>
* <li>Multiple char search: %</li>
* <li>Reserved: \%_</li>
* </ul>
*/
public static LanguageDefinition DEF_SQL_LIKE = new SimpleLanguageDef('\\', "%", "_", "\\%_[]");
/**
* XPath like query language summary:
* <ul>
* <li>Escape: \</li>
* <li>Single char search: _</li>
* <li>Multiple char search: %</li>
* <li>Reserved: \%_</li>
* </ul>
*/
public static LanguageDefinition DEF_XPATH_LIKE = new SimpleLanguageDef('\\', "%", "_", "\\%_[]");
/**
* Regular expression query language summary:
* <ul>
* <li>Escape: \</li>
* <li>Single char search: .</li>
* <li>Multiple char search: .*</li>
* <li>Reserved: \*.+?^$(){}|</li>
* </ul>
*/
public static LanguageDefinition DEF_REGEX = new SimpleLanguageDef('\\', ".*", ".", "\\*.+?^$(){}|");
/**
* Lucene syntax summary: {@link QueryParser#escape(String) Lucene Query Parser}
*/
public static LanguageDefinition DEF_LUCENE = new LuceneLanguageDef(true);
public static LanguageDefinition DEF_LUCENE_INTERNAL = new LuceneLanguageDef(false);
/**
* CIFS name patch query language summary:
* <ul>
* <li>Escape: \ (but not used)</li>
* <li>Single char search: ?</li>
* <li>Multiple char search: *</li>
* <li>Reserved: "*\<>?/:|£%&+;</li>
* </ul>
*/
public static LanguageDefinition DEF_CIFS = new SimpleLanguageDef('\\', "*", "?", "\"*\\<>?/:|£%&+;");
/**
* Escape a string according to the <b>XPath</b> like function syntax.
*
* @param str
* the string to escape
* @return Returns the escaped string
*/
public static String escapeForXPathLike(String str)
{
return escape(DEF_XPATH_LIKE, str);
}
/**
* Escape a string according to the <b>regex</b> language syntax.
*
* @param str
* the string to escape
* @return Returns the escaped string
*/
public static String escapeForRegex(String str)
{
return escape(DEF_REGEX, str);
}
/**
* Escape a string according to the <b>Lucene</b> query syntax.
*
* @param str
* the string to escape
* @return Returns the escaped string
*/
public static String escapeForLucene(String str)
{
return escape(DEF_LUCENE, str);
}
/**
* Generic escaping using the language definition
*/
private static String escape(LanguageDefinition def, String str)
{
StringBuilder sb = new StringBuilder(str.length() * 2);
char[] chars = str.toCharArray();
for (int i = 0; i < chars.length; i++)
{
// first check for reserved chars
if (def.isReserved(chars[i]))
{
// escape it
sb.append(def.escapeChar);
}
sb.append(chars[i]);
}
return sb.toString();
}
/**
* Convert an <b>xpath</b> like function clause into a <b>regex</b> query.
*
* @param xpathLikeClause
* @return Returns a valid regular expression that is equivalent to the given <b>xpath</b> like clause.
*/
public static String convertXPathLikeToRegex(String xpathLikeClause)
{
return "(?s)" + convert(DEF_XPATH_LIKE, DEF_REGEX, xpathLikeClause);
}
/**
* Convert an <b>xpath</b> like function clause into a <b>Lucene</b> query.
*
* @param xpathLikeClause
* @return Returns a valid <b>Lucene</b> expression that is equivalent to the given <b>xpath</b> like clause.
*/
public static String convertXPathLikeToLucene(String xpathLikeClause)
{
return convert(DEF_XPATH_LIKE, DEF_LUCENE, xpathLikeClause);
}
/**
* Convert a <b>sql</b> like function clause into a <b>Lucene</b> query.
*
* @param sqlLikeClause
* @return Returns a valid <b>Lucene</b> expression that is equivalent to the given <b>sql</b> like clause.
*/
public static String convertSQLLikeToLucene(String sqlLikeClause)
{
return convert(DEF_SQL_LIKE, DEF_LUCENE_INTERNAL, sqlLikeClause);
}
/**
* Convert a <b>sql</b> like function clause into a <b>regex</b> query.
*
* @param sqlLikeClause
* @return Returns a valid regular expression that is equivalent to the given <b>sql</b> like clause.
*/
public static String convertSQLLikeToRegex(String sqlLikeClause)
{
return "(?s)" + convert(DEF_SQL_LIKE, DEF_REGEX, sqlLikeClause);
}
/**
* Convert a <b>CIFS</b> name path into the equivalent <b>Lucene</b> query.
*
* @param cifsNamePath
* the CIFS named path
* @return Returns a valid <b>Lucene</b> expression that is equivalent to the given CIFS name path
*/
public static String convertCifsToLucene(String cifsNamePath)
{
return convert(DEF_CIFS, DEF_LUCENE, cifsNamePath);
}
public static String convert(LanguageDefinition from, LanguageDefinition to, String query)
{
char[] chars = query.toCharArray();
StringBuilder sb = new StringBuilder(chars.length * 2);
boolean escaping = false;
for (int i = 0; i < chars.length; i++)
{
if (escaping) // if we are currently escaping, just escape the current character
{
if(to.isReserved(chars[i]))
{
sb.append(to.escapeChar); // the to format escape char
}
sb.append(chars[i]); // the current char
escaping = false;
}
else if (chars[i] == from.escapeChar) // not escaping and have escape char
{
escaping = true;
}
else if (query.startsWith(from.multiCharWildcard, i)) // not escaping but have multi-char wildcard
{
// translate the wildcard
sb.append(to.multiCharWildcard);
}
else if (query.startsWith(from.singleCharWildcard, i)) // have single-char wildcard
{
// translate the wildcard
sb.append(to.singleCharWildcard);
}
else if (to.isReserved(chars[i])) // reserved character
{
sb.append(to.escapeChar).append(chars[i]);
}
else
// just a normal char in both
{
sb.append(chars[i]);
}
}
return sb.toString();
}
/**
* Simple store of special characters for a given query language
*/
public static abstract class LanguageDefinition
{
public final char escapeChar;
public final String multiCharWildcard;
public final String singleCharWildcard;
public LanguageDefinition(char escapeChar, String multiCharWildcard, String singleCharWildcard)
{
this.escapeChar = escapeChar;
this.multiCharWildcard = multiCharWildcard;
this.singleCharWildcard = singleCharWildcard;
}
public abstract boolean isReserved(char ch);
}
private static class SimpleLanguageDef extends LanguageDefinition
{
private String reserved;
public SimpleLanguageDef(char escapeChar, String multiCharWildcard, String singleCharWildcard, String reserved)
{
super(escapeChar, multiCharWildcard, singleCharWildcard);
this.reserved = reserved;
}
@Override
public boolean isReserved(char ch)
{
return (reserved.indexOf(ch) > -1);
}
}
private static class LuceneLanguageDef extends LanguageDefinition
{
private String reserved;
public LuceneLanguageDef(boolean reserve)
{
super('\\', "*", "?");
if (reserve)
{
init();
}
else
{
reserved = "";
}
}
/**
* Discovers all the reserved chars
*/
private void init()
{
StringBuilder sb = new StringBuilder(20);
for (char ch = 0; ch < 256; ch++)
{
char[] chars = new char[] { ch };
String unescaped = new String(chars);
// check it
String escaped = LuceneQueryParser.escape(unescaped);
if (!escaped.equals(unescaped))
{
// it was escaped
sb.append(ch);
}
}
reserved = sb.toString();
}
@Override
public boolean isReserved(char ch)
{
return (reserved.indexOf(ch) > -1);
}
}
}

View File

@@ -1,122 +0,0 @@
/*
* Copyright (C) 2005-2010 Alfresco Software Limited.
*
* This file is part of Alfresco
*
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
*/
package org.alfresco.util;
import junit.framework.TestCase;
/**
* @see org.alfresco.util.SearchLanguageConversion
*
* @author Derek Hulley
*/
public class SearchLanguageConversionTest extends TestCase
{
/**
* A string with a whole lod of badness to stress test with
*/
private static final String BAD_STRING =
"\\ | ! \" £ " +
"$ % ^ & * ( " +
") _ { } [ ] " +
"@ # ~ ' : ; " +
", . < > + ? " +
"/ \\\\ \\* \\? \\_";
public void testEscapeXPathLike()
{
String good = SearchLanguageConversion.escapeForXPathLike(BAD_STRING);
assertEquals("Escaping for xpath failed",
"\\\\ | ! \" £ " +
"$ \\% ^ & * ( " +
") \\_ { } \\[ \\] " +
"@ # ~ ' : ; " +
", . < > + ? " +
"/ \\\\\\\\ \\\\* \\\\? \\\\\\_",
good);
}
public void testEscapeRegex()
{
String good = SearchLanguageConversion.escapeForRegex(BAD_STRING);
assertEquals("Escaping for regex failed",
"\\\\ \\| ! \" £ " +
"\\$ % \\^ & \\* \\( " +
"\\) _ \\{ \\} [ ] " +
"@ # ~ ' : ; " +
", \\. < > \\+ \\? " +
"/ \\\\\\\\ \\\\\\* \\\\\\? \\\\_",
good);
}
public void testEscapeLucene()
{
String good = SearchLanguageConversion.escapeForLucene(BAD_STRING);
assertEquals("Escaping for Lucene failed",
"\\\\ \\| \\! \\\" £ " +
"$ % \\^ \\& \\* \\( " +
"\\) _ \\{ \\} \\[ \\] " +
"@ # \\~ ' \\: ; " +
", . < > \\+ \\? " +
"/ \\\\\\\\ \\\\\\* \\\\\\? \\\\_",
good);
}
public void testConvertXPathLikeToRegex()
{
String good = SearchLanguageConversion.convertXPathLikeToRegex(BAD_STRING);
assertEquals("XPath like to regex failed",
"(?s) \\| ! \" £ " +
"\\$ .* \\^ & \\* \\( " +
"\\) . \\{ \\} [ ] " +
"@ # ~ ' : ; " +
", \\. < > \\+ \\? " +
"/ \\\\ \\* \\? _",
good);
}
public void testConvertXPathLikeToLucene()
{
String good = SearchLanguageConversion.convertXPathLikeToLucene(BAD_STRING);
assertEquals("XPath like to Lucene failed",
" \\| \\! \\\" £ " +
"$ * \\^ \\& \\* \\( " +
"\\) ? \\{ \\} \\[ \\] " +
"@ # \\~ ' \\: ; " +
", . < > \\+ \\? " +
"/ \\\\ \\* \\? _",
good);
}
public void testSqlToLucene()
{
String sqlLike = "AB%_*?\\%\\_";
String lucene = "AB*?\\*\\?%_";
String converted = SearchLanguageConversion.convert(SearchLanguageConversion.DEF_SQL_LIKE, SearchLanguageConversion.DEF_LUCENE, sqlLike);
assertEquals(lucene, converted);
}
public void testLuceneToRegexp()
{
String lucene = "AB*?\\*\\?.*.";
String regexp = "AB.*.\\*\\?\\..*\\.";
String converted = SearchLanguageConversion.convert(SearchLanguageConversion.DEF_LUCENE, SearchLanguageConversion.DEF_REGEX, lucene);
assertEquals(regexp, converted);
}
}