Updated Query parser

git-svn-id: https://svn.alfresco.com/repos/alfresco-enterprise/alfresco/HEAD/root@4519 c4b6b30b-aa2e-2d43-bbcb-ca4b014f7261
This commit is contained in:
Andrew Hind
2006-12-05 11:20:22 +00:00
parent e8b37eb880
commit fd351b9411
11 changed files with 1235 additions and 864 deletions

View File

@@ -25,20 +25,6 @@ public interface CharStream {
*/ */
char readChar() throws java.io.IOException; char readChar() throws java.io.IOException;
/**
* Returns the column position of the character last read.
* @deprecated
* @see #getEndColumn
*/
int getColumn();
/**
* Returns the line number of the character last read.
* @deprecated
* @see #getEndLine
*/
int getLine();
/** /**
* Returns the column number of the last character for current token (being * Returns the column number of the last character for current token (being
* matched after the last call to BeginTOken). * matched after the last call to BeginTOken).

View File

@@ -17,8 +17,7 @@ package org.alfresco.repo.search.impl.lucene;
* limitations under the License. * limitations under the License.
*/ */
import java.io.IOException; import java.io.*;
import java.io.Reader;
/** An efficient implementation of JavaCC's CharStream interface. <p>Note that /** An efficient implementation of JavaCC's CharStream interface. <p>Note that
* this does not do line-number counting, but instead keeps track of the * this does not do line-number counting, but instead keeps track of the

View File

@@ -63,7 +63,7 @@ public class LuceneQueryParser extends QueryParser
* if the parsing fails * if the parsing fails
*/ */
static public Query parse(String query, String field, Analyzer analyzer, static public Query parse(String query, String field, Analyzer analyzer,
NamespacePrefixResolver namespacePrefixResolver, DictionaryService dictionaryService, int defaultOperator) NamespacePrefixResolver namespacePrefixResolver, DictionaryService dictionaryService, Operator defaultOperator)
throws ParseException throws ParseException
{ {
if (s_logger.isDebugEnabled()) if (s_logger.isDebugEnabled())
@@ -71,7 +71,7 @@ public class LuceneQueryParser extends QueryParser
s_logger.debug("Using Alfresco Lucene Query Parser for query: " + query); s_logger.debug("Using Alfresco Lucene Query Parser for query: " + query);
} }
LuceneQueryParser parser = new LuceneQueryParser(field, analyzer); LuceneQueryParser parser = new LuceneQueryParser(field, analyzer);
parser.setOperator(defaultOperator); parser.setDefaultOperator(defaultOperator);
parser.setNamespacePrefixResolver(namespacePrefixResolver); parser.setNamespacePrefixResolver(namespacePrefixResolver);
parser.setDictionaryService(dictionaryService); parser.setDictionaryService(dictionaryService);
return parser.parse(query); return parser.parse(query);

View File

@@ -32,6 +32,7 @@ import org.alfresco.repo.search.Indexer;
import org.alfresco.repo.search.QueryRegisterComponent; import org.alfresco.repo.search.QueryRegisterComponent;
import org.alfresco.repo.search.SearcherException; import org.alfresco.repo.search.SearcherException;
import org.alfresco.repo.search.impl.NodeSearcher; import org.alfresco.repo.search.impl.NodeSearcher;
import org.alfresco.repo.search.impl.lucene.QueryParser.Operator;
import org.alfresco.service.cmr.dictionary.DictionaryService; import org.alfresco.service.cmr.dictionary.DictionaryService;
import org.alfresco.service.cmr.repository.InvalidNodeRefException; import org.alfresco.service.cmr.repository.InvalidNodeRefException;
import org.alfresco.service.cmr.repository.NodeRef; import org.alfresco.service.cmr.repository.NodeRef;
@@ -203,14 +204,14 @@ public class LuceneSearcherImpl2 extends LuceneBase2 implements LuceneSearcher2
try try
{ {
int defaultOperator; Operator defaultOperator;
if (searchParameters.getDefaultOperator() == SearchParameters.AND) if (searchParameters.getDefaultOperator() == SearchParameters.AND)
{ {
defaultOperator = LuceneQueryParser.DEFAULT_OPERATOR_AND; defaultOperator = LuceneQueryParser.AND_OPERATOR;
} }
else else
{ {
defaultOperator = LuceneQueryParser.DEFAULT_OPERATOR_OR; defaultOperator = LuceneQueryParser.OR_OPERATOR;
} }
Query query = LuceneQueryParser.parse(parameterisedQueryString, DEFAULT_FIELD, new LuceneAnalyser( Query query = LuceneQueryParser.parse(parameterisedQueryString, DEFAULT_FIELD, new LuceneAnalyser(

View File

@@ -0,0 +1,268 @@
package org.alfresco.repo.search.impl.lucene;
/**
* Copyright 2004 The Apache Software Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.MultiPhraseQuery;
import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.Query;
import java.util.Vector;
/**
* A QueryParser which constructs queries to search multiple fields.
*
* @author <a href="mailto:kelvin@relevanz.com">Kelvin Tan</a>, Daniel Naber
* @version $Revision: 406088 $
*/
public class MultiFieldQueryParser extends QueryParser
{
private String[] fields;
/**
* Creates a MultiFieldQueryParser.
*
* <p>It will, when parse(String query)
* is called, construct a query like this (assuming the query consists of
* two terms and you specify the two fields <code>title</code> and <code>body</code>):</p>
*
* <code>
* (title:term1 body:term1) (title:term2 body:term2)
* </code>
*
* <p>When setDefaultOperator(AND_OPERATOR) is set, the result will be:</p>
*
* <code>
* +(title:term1 body:term1) +(title:term2 body:term2)
* </code>
*
* <p>In other words, all the query's terms must appear, but it doesn't matter in
* what fields they appear.</p>
*/
public MultiFieldQueryParser(String[] fields, Analyzer analyzer) {
super(null, analyzer);
this.fields = fields;
}
protected Query getFieldQuery(String field, String queryText, int slop) throws ParseException {
if (field == null) {
Vector clauses = new Vector();
for (int i = 0; i < fields.length; i++) {
Query q = super.getFieldQuery(fields[i], queryText);
if (q != null) {
if (q instanceof PhraseQuery) {
((PhraseQuery) q).setSlop(slop);
}
if (q instanceof MultiPhraseQuery) {
((MultiPhraseQuery) q).setSlop(slop);
}
clauses.add(new BooleanClause(q, BooleanClause.Occur.SHOULD));
}
}
if (clauses.size() == 0) // happens for stopwords
return null;
return getBooleanQuery(clauses, true);
}
return super.getFieldQuery(field, queryText);
}
protected Query getFieldQuery(String field, String queryText) throws ParseException {
return getFieldQuery(field, queryText, 0);
}
protected Query getFuzzyQuery(String field, String termStr, float minSimilarity) throws ParseException
{
if (field == null) {
Vector clauses = new Vector();
for (int i = 0; i < fields.length; i++) {
clauses.add(new BooleanClause(super.getFuzzyQuery(fields[i], termStr, minSimilarity),
BooleanClause.Occur.SHOULD));
}
return getBooleanQuery(clauses, true);
}
return super.getFuzzyQuery(field, termStr, minSimilarity);
}
protected Query getPrefixQuery(String field, String termStr) throws ParseException
{
if (field == null) {
Vector clauses = new Vector();
for (int i = 0; i < fields.length; i++) {
clauses.add(new BooleanClause(super.getPrefixQuery(fields[i], termStr),
BooleanClause.Occur.SHOULD));
}
return getBooleanQuery(clauses, true);
}
return super.getPrefixQuery(field, termStr);
}
protected Query getWildcardQuery(String field, String termStr) throws ParseException {
if (field == null) {
Vector clauses = new Vector();
for (int i = 0; i < fields.length; i++) {
clauses.add(new BooleanClause(super.getWildcardQuery(fields[i], termStr),
BooleanClause.Occur.SHOULD));
}
return getBooleanQuery(clauses, true);
}
return super.getWildcardQuery(field, termStr);
}
protected Query getRangeQuery(String field, String part1, String part2, boolean inclusive) throws ParseException {
if (field == null) {
Vector clauses = new Vector();
for (int i = 0; i < fields.length; i++) {
clauses.add(new BooleanClause(super.getRangeQuery(fields[i], part1, part2, inclusive),
BooleanClause.Occur.SHOULD));
}
return getBooleanQuery(clauses, true);
}
return super.getRangeQuery(field, part1, part2, inclusive);
}
/**
* Parses a query which searches on the fields specified.
* <p>
* If x fields are specified, this effectively constructs:
* <pre>
* <code>
* (field1:query1) (field2:query2) (field3:query3)...(fieldx:queryx)
* </code>
* </pre>
* @param queries Queries strings to parse
* @param fields Fields to search on
* @param analyzer Analyzer to use
* @throws ParseException if query parsing fails
* @throws IllegalArgumentException if the length of the queries array differs
* from the length of the fields array
*/
public static Query parse(String[] queries, String[] fields,
Analyzer analyzer) throws ParseException
{
if (queries.length != fields.length)
throw new IllegalArgumentException("queries.length != fields.length");
BooleanQuery bQuery = new BooleanQuery();
for (int i = 0; i < fields.length; i++)
{
QueryParser qp = new QueryParser(fields[i], analyzer);
Query q = qp.parse(queries[i]);
bQuery.add(q, BooleanClause.Occur.SHOULD);
}
return bQuery;
}
/**
* Parses a query, searching on the fields specified.
* Use this if you need to specify certain fields as required,
* and others as prohibited.
* <p><pre>
* Usage:
* <code>
* String[] fields = {"filename", "contents", "description"};
* BooleanClause.Occur[] flags = {BooleanClause.Occur.SHOULD,
* BooleanClause.Occur.MUST,
* BooleanClause.Occur.MUST_NOT};
* MultiFieldQueryParser.parse("query", fields, flags, analyzer);
* </code>
* </pre>
*<p>
* The code above would construct a query:
* <pre>
* <code>
* (filename:query) +(contents:query) -(description:query)
* </code>
* </pre>
*
* @param query Query string to parse
* @param fields Fields to search on
* @param flags Flags describing the fields
* @param analyzer Analyzer to use
* @throws ParseException if query parsing fails
* @throws IllegalArgumentException if the length of the fields array differs
* from the length of the flags array
*/
public static Query parse(String query, String[] fields,
BooleanClause.Occur[] flags, Analyzer analyzer) throws ParseException {
if (fields.length != flags.length)
throw new IllegalArgumentException("fields.length != flags.length");
BooleanQuery bQuery = new BooleanQuery();
for (int i = 0; i < fields.length; i++) {
QueryParser qp = new QueryParser(fields[i], analyzer);
Query q = qp.parse(query);
bQuery.add(q, flags[i]);
}
return bQuery;
}
/**
* Parses a query, searching on the fields specified.
* Use this if you need to specify certain fields as required,
* and others as prohibited.
* <p><pre>
* Usage:
* <code>
* String[] query = {"query1", "query2", "query3"};
* String[] fields = {"filename", "contents", "description"};
* BooleanClause.Occur[] flags = {BooleanClause.Occur.SHOULD,
* BooleanClause.Occur.MUST,
* BooleanClause.Occur.MUST_NOT};
* MultiFieldQueryParser.parse(query, fields, flags, analyzer);
* </code>
* </pre>
*<p>
* The code above would construct a query:
* <pre>
* <code>
* (filename:query1) +(contents:query2) -(description:query3)
* </code>
* </pre>
*
* @param queries Queries string to parse
* @param fields Fields to search on
* @param flags Flags describing the fields
* @param analyzer Analyzer to use
* @throws ParseException if query parsing fails
* @throws IllegalArgumentException if the length of the queries, fields,
* and flags array differ
*/
public static Query parse(String[] queries, String[] fields, BooleanClause.Occur[] flags,
Analyzer analyzer) throws ParseException
{
if (!(queries.length == fields.length && queries.length == flags.length))
throw new IllegalArgumentException("queries, fields, and flags array have have different length");
BooleanQuery bQuery = new BooleanQuery();
for (int i = 0; i < fields.length; i++)
{
QueryParser qp = new QueryParser(fields[i], analyzer);
Query q = qp.parse(queries[i]);
bQuery.add(q, flags[i]);
}
return bQuery;
}
}

View File

@@ -1,31 +1,19 @@
/* Generated By:JavaCC: Do not edit this line. QueryParser.java */ /* Generated By:JavaCC: Do not edit this line. QueryParser.java */
package org.alfresco.repo.search.impl.lucene; package org.alfresco.repo.search.impl.lucene;
import java.io.IOException;
import java.io.StringReader;
import java.text.DateFormat;
import java.util.Date;
import java.util.Locale;
import java.util.Vector; import java.util.Vector;
import java.io.*;
import org.apache.lucene.analysis.Analyzer; import java.text.*;
import org.apache.lucene.analysis.TokenStream; import java.util.*;
import org.apache.lucene.document.DateField;
import org.apache.lucene.index.Term; import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause; import org.apache.lucene.analysis.*;
import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.document.*;
import org.apache.lucene.search.FuzzyQuery; import org.apache.lucene.search.*;
import org.apache.lucene.search.PhraseQuery; import org.apache.lucene.util.Parameter;
import org.apache.lucene.search.PrefixQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.RangeQuery;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.WildcardQuery;
import org.apache.lucene.search.BooleanClause.Occur;
/** /**
* This class is generated by JavaCC. The only method that clients should need * This class is generated by JavaCC. The most important method is
* to call is <a href="#parse">parse()</a>. * {@link #parse(String)}.
* *
* The syntax for query strings is as follows: * The syntax for query strings is as follows:
* A Query is a series of clauses. * A Query is a series of clauses.
@@ -53,9 +41,21 @@ import org.apache.lucene.search.BooleanClause.Occur;
* *
* <p> * <p>
* Examples of appropriately formatted queries can be found in the <a * Examples of appropriately formatted queries can be found in the <a
* href="http://jakarta.apache.org/lucene/src/test/org/apache/lucene/queryParser/TestQueryParser.java">test cases</a>. * href="http://lucene.apache.org/java/docs/queryparsersyntax.html">query syntax
* documentation</a>.
* </p> * </p>
* *
* <p>In {@link RangeQuery}s, QueryParser tries to detect date values, e.g. <tt>date:[6/1/2005 TO 6/4/2005]</tt>
* produces a range query that searches for "date" fields between 2005-06-01 and 2005-06-04. Note
* that the format of the accpeted input depends on {@link #setLocale(Locale) the locale}. This
* feature also assumes that your index uses the {@link DateField} class to store dates.
* If you use a different format (e.g. {@link DateTools}) and you still want QueryParser
* to turn local dates in range queries into valid queries you need to create your own
* query parser that inherits QueryParser and overwrites
* {@link #getRangeQuery(String, String, String, boolean)}.</p>
*
* <p>Note that QueryParser is <em>not</em> thread-safe.</p>
*
* @author Brian Goetz * @author Brian Goetz
* @author Peter Halacsy * @author Peter Halacsy
* @author Tatu Saloranta * @author Tatu Saloranta
@@ -71,35 +71,36 @@ public class QueryParser implements QueryParserConstants {
private static final int MOD_NOT = 10; private static final int MOD_NOT = 10;
private static final int MOD_REQ = 11; private static final int MOD_REQ = 11;
public static final int DEFAULT_OPERATOR_OR = 0; // make it possible to call setDefaultOperator() without accessing
public static final int DEFAULT_OPERATOR_AND = 1; // the nested class:
/** Alternative form of QueryParser.Operator.AND */
public static final Operator AND_OPERATOR = Operator.AND;
/** Alternative form of QueryParser.Operator.OR */
public static final Operator OR_OPERATOR = Operator.OR;
/** The actual operator that parser uses to combine query terms */ /** The actual operator that parser uses to combine query terms */
private int operator = DEFAULT_OPERATOR_OR; private Operator operator = OR_OPERATOR;
/** boolean lowercaseExpandedTerms = true;
* Whether terms of wildcard and prefix queries are to be automatically
* lower-cased or not. Default is <code>true</code>.
*/
boolean lowercaseWildcardTerms = true;
Analyzer analyzer; Analyzer analyzer;
String field; String field;
int phraseSlop = 0; int phraseSlop = 0;
float fuzzyMinSim = FuzzyQuery.defaultMinSimilarity; float fuzzyMinSim = FuzzyQuery.defaultMinSimilarity;
int fuzzyPrefixLength = FuzzyQuery.defaultPrefixLength;
Locale locale = Locale.getDefault(); Locale locale = Locale.getDefault();
/** Parses a query string, returning a {@link org.apache.lucene.search.Query}. /** The default operator for parsing queries.
* @param query the query string to be parsed. * Use {@link QueryParser#setDefaultOperator} to change it.
* @param field the default field for query terms.
* @param analyzer used to find terms in the query text.
* @throws ParseException if the parsing fails
*/ */
static public Query parse(String query, String field, Analyzer analyzer) static public final class Operator extends Parameter {
throws ParseException { private Operator(String name) {
QueryParser parser = new QueryParser(field, analyzer); super(name);
return parser.parse(query);
} }
static public final Operator OR = new Operator("OR");
static public final Operator AND = new Operator("AND");
}
/** Constructs a query parser. /** Constructs a query parser.
* @param f the default field for query terms. * @param f the default field for query terms.
@@ -111,8 +112,7 @@ public class QueryParser implements QueryParserConstants {
field = f; field = f;
} }
/** Parses a query string, returning a /** Parses a query string, returning a {@link org.apache.lucene.search.Query}.
* <a href="lucene.search.Query.html">Query</a>.
* @param query the query string to be parsed. * @param query the query string to be parsed.
* @throws ParseException if the parsing fails * @throws ParseException if the parsing fails
*/ */
@@ -144,18 +144,36 @@ public class QueryParser implements QueryParserConstants {
} }
/** /**
* Get the default minimal similarity for fuzzy queries. * Get the minimal similarity for fuzzy queries.
*/ */
public float getFuzzyMinSim() { public float getFuzzyMinSim() {
return fuzzyMinSim; return fuzzyMinSim;
} }
/** /**
*Set the default minimum similarity for fuzzy queries. * Set the minimum similarity for fuzzy queries.
* Default is 0.5f.
*/ */
public void setFuzzyMinSim(float fuzzyMinSim) { public void setFuzzyMinSim(float fuzzyMinSim) {
this.fuzzyMinSim = fuzzyMinSim; this.fuzzyMinSim = fuzzyMinSim;
} }
/**
* Get the prefix length for fuzzy queries.
* @return Returns the fuzzyPrefixLength.
*/
public int getFuzzyPrefixLength() {
return fuzzyPrefixLength;
}
/**
* Set the prefix length for fuzzy queries. Default is 0.
* @param fuzzyPrefixLength The fuzzyPrefixLength to set.
*/
public void setFuzzyPrefixLength(int fuzzyPrefixLength) {
this.fuzzyPrefixLength = fuzzyPrefixLength;
}
/** /**
* Sets the default slop for phrases. If zero, then exact phrase matches * Sets the default slop for phrases. If zero, then exact phrase matches
* are required. Default value is zero. * are required. Default value is zero.
@@ -171,32 +189,43 @@ public class QueryParser implements QueryParserConstants {
return phraseSlop; return phraseSlop;
} }
/**
* Sets the boolean operator of the QueryParser.
* In classic mode (<code>DEFAULT_OPERATOR_OR</code>) terms without any modifiers
* are considered optional: for example <code>capital of Hungary</code> is equal to
* <code>capital OR of OR Hungary</code>.<br/>
* In <code>DEFAULT_OPERATOR_AND</code> terms are considered to be in conjuction: the
* above mentioned query is parsed as <code>capital AND of AND Hungary</code>
*/
public void setOperator(int operator) {
this.operator = operator;
}
/** /**
* Gets implicit operator setting, which will be either DEFAULT_OPERATOR_AND * Sets the boolean operator of the QueryParser.
* or DEFAULT_OPERATOR_OR. * In default mode (<code>OR_OPERATOR</code>) terms without any modifiers
* are considered optional: for example <code>capital of Hungary</code> is equal to
* <code>capital OR of OR Hungary</code>.<br/>
* In <code>AND_OPERATOR</code> mode terms are considered to be in conjuction: the
* above mentioned query is parsed as <code>capital AND of AND Hungary</code>
*/ */
public int getOperator() { public void setDefaultOperator(Operator op) {
this.operator = op;
}
/**
* Gets implicit operator setting, which will be either AND_OPERATOR
* or OR_OPERATOR.
*/
public Operator getDefaultOperator() {
return operator; return operator;
} }
public void setLowercaseWildcardTerms(boolean lowercaseWildcardTerms) {
this.lowercaseWildcardTerms = lowercaseWildcardTerms; /**
* Whether terms of wildcard, prefix, fuzzy and range queries are to be automatically
* lower-cased or not. Default is <code>true</code>.
*/
public void setLowercaseExpandedTerms(boolean lowercaseExpandedTerms) {
this.lowercaseExpandedTerms = lowercaseExpandedTerms;
} }
public boolean getLowercaseWildcardTerms() {
return lowercaseWildcardTerms; /**
* @see #setLowercaseExpandedTerms(boolean)
*/
public boolean getLowercaseExpandedTerms() {
return lowercaseExpandedTerms;
} }
/** /**
@@ -221,17 +250,17 @@ public class QueryParser implements QueryParserConstants {
if (clauses.size() > 0 && conj == CONJ_AND) { if (clauses.size() > 0 && conj == CONJ_AND) {
BooleanClause c = (BooleanClause) clauses.elementAt(clauses.size()-1); BooleanClause c = (BooleanClause) clauses.elementAt(clauses.size()-1);
if (!c.isProhibited()) if (!c.isProhibited())
c.setOccur(Occur.MUST); c.setOccur(BooleanClause.Occur.MUST);
} }
if (clauses.size() > 0 && operator == DEFAULT_OPERATOR_AND && conj == CONJ_OR) { if (clauses.size() > 0 && operator == AND_OPERATOR && conj == CONJ_OR) {
// If this term is introduced by OR, make the preceding term optional, // If this term is introduced by OR, make the preceding term optional,
// unless it's prohibited (that means we leave -a OR b but +a OR b-->a OR b) // unless it's prohibited (that means we leave -a OR b but +a OR b-->a OR b)
// notice if the input is a OR b, first term is parsed as required; without // notice if the input is a OR b, first term is parsed as required; without
// this modification a OR b would parsed as +a OR b // this modification a OR b would parsed as +a OR b
BooleanClause c = (BooleanClause) clauses.elementAt(clauses.size()-1); BooleanClause c = (BooleanClause) clauses.elementAt(clauses.size()-1);
if (!c.isProhibited()) if (!c.isProhibited())
c.setOccur(Occur.SHOULD); c.setOccur(BooleanClause.Occur.SHOULD);
} }
// We might have been passed a null query; the term might have been // We might have been passed a null query; the term might have been
@@ -239,7 +268,7 @@ public class QueryParser implements QueryParserConstants {
if (q == null) if (q == null)
return; return;
if (operator == DEFAULT_OPERATOR_OR) { if (operator == OR_OPERATOR) {
// We set REQUIRED if we're introduced by AND or +; PROHIBITED if // We set REQUIRED if we're introduced by AND or +; PROHIBITED if
// introduced by NOT or -; make sure not to set both. // introduced by NOT or -; make sure not to set both.
prohibited = (mods == MOD_NOT); prohibited = (mods == MOD_NOT);
@@ -253,31 +282,16 @@ public class QueryParser implements QueryParserConstants {
prohibited = (mods == MOD_NOT); prohibited = (mods == MOD_NOT);
required = (!prohibited && conj != CONJ_OR); required = (!prohibited && conj != CONJ_OR);
} }
Occur occur = Occur.SHOULD; if (required && !prohibited)
if(prohibited) clauses.addElement(new BooleanClause(q, BooleanClause.Occur.MUST));
{ else if (!required && !prohibited)
occur = Occur.MUST_NOT; clauses.addElement(new BooleanClause(q, BooleanClause.Occur.SHOULD));
} else if (!required && prohibited)
if(required) clauses.addElement(new BooleanClause(q, BooleanClause.Occur.MUST_NOT));
{ else
occur = Occur.MUST; throw new RuntimeException("Clause cannot be both required and prohibited");
}
clauses.addElement(new BooleanClause(q, occur));
} }
/**
* Note that parameter analyzer is ignored. Calls inside the parser always
* use class member analyser. This method will be deprecated and substituted
* by {@link #getFieldQuery(String, String)} in future versions of Lucene.
* Currently overwriting either of these methods works.
*
* @exception ParseException throw in overridden method to disallow
*/
protected Query getFieldQuery(String field,
Analyzer analyzer,
String queryText) throws ParseException {
return getFieldQuery(field, queryText);
}
/** /**
* @exception ParseException throw in overridden method to disallow * @exception ParseException throw in overridden method to disallow
@@ -286,10 +300,11 @@ public class QueryParser implements QueryParserConstants {
// Use the analyzer to get all the tokens, and then build a TermQuery, // Use the analyzer to get all the tokens, and then build a TermQuery,
// PhraseQuery, or nothing based on the term count // PhraseQuery, or nothing based on the term count
TokenStream source = analyzer.tokenStream(field, TokenStream source = analyzer.tokenStream(field, new StringReader(queryText));
new StringReader(queryText));
Vector v = new Vector(); Vector v = new Vector();
org.apache.lucene.analysis.Token t; org.apache.lucene.analysis.Token t;
int positionCount = 0;
boolean severalTokensAtSamePosition = false;
while (true) { while (true) {
try { try {
@@ -300,7 +315,11 @@ public class QueryParser implements QueryParserConstants {
} }
if (t == null) if (t == null)
break; break;
v.addElement(t.termText()); v.addElement(t);
if (t.getPositionIncrement() != 0)
positionCount += t.getPositionIncrement();
else
severalTokensAtSamePosition = true;
} }
try { try {
source.close(); source.close();
@@ -311,37 +330,53 @@ public class QueryParser implements QueryParserConstants {
if (v.size() == 0) if (v.size() == 0)
return null; return null;
else if (v.size() == 1) else if (v.size() == 1) {
return new TermQuery(new Term(field, (String) v.elementAt(0))); t = (org.apache.lucene.analysis.Token) v.elementAt(0);
return new TermQuery(new Term(field, t.termText()));
} else {
if (severalTokensAtSamePosition) {
if (positionCount == 1) {
// no phrase query:
BooleanQuery q = new BooleanQuery(true);
for (int i = 0; i < v.size(); i++) {
t = (org.apache.lucene.analysis.Token) v.elementAt(i);
TermQuery currentQuery = new TermQuery(
new Term(field, t.termText()));
q.add(currentQuery, BooleanClause.Occur.SHOULD);
}
return q;
}
else {
// phrase query:
MultiPhraseQuery mpq = new MultiPhraseQuery();
mpq.setSlop(phraseSlop);
List multiTerms = new ArrayList();
for (int i = 0; i < v.size(); i++) {
t = (org.apache.lucene.analysis.Token) v.elementAt(i);
if (t.getPositionIncrement() == 1 && multiTerms.size() > 0) {
mpq.add((Term[])multiTerms.toArray(new Term[0]));
multiTerms.clear();
}
multiTerms.add(new Term(field, t.termText()));
}
mpq.add((Term[])multiTerms.toArray(new Term[0]));
return mpq;
}
}
else { else {
PhraseQuery q = new PhraseQuery(); PhraseQuery q = new PhraseQuery();
q.setSlop(phraseSlop); q.setSlop(phraseSlop);
for (int i = 0; i < v.size(); i++) { for (int i = 0; i < v.size(); i++) {
q.add(new Term(field, (String) v.elementAt(i))); q.add(new Term(field, ((org.apache.lucene.analysis.Token)
v.elementAt(i)).termText()));
} }
return q; return q;
} }
} }
/**
* Base implementation delegates to {@link #getFieldQuery(String, Analyzer, String)}.
* This method may be overwritten, for example, to return
* a SpanNearQuery instead of a PhraseQuery.
*
* Note that parameter analyzer is ignored. Calls inside the parser always
* use class member analyser. This method will be deprecated and substituted
* by {@link #getFieldQuery(String, String, int)} in future versions of Lucene.
* Currently overwriting either of these methods works.
*
* @exception ParseException throw in overridden method to disallow
*/
protected Query getFieldQuery(String field,
Analyzer analyzer,
String queryText,
int slop) throws ParseException {
return getFieldQuery(field, queryText, slop);
} }
/** /**
* Base implementation delegates to {@link #getFieldQuery(String,String)}. * Base implementation delegates to {@link #getFieldQuery(String,String)}.
* This method may be overridden, for example, to return * This method may be overridden, for example, to return
@@ -356,25 +391,13 @@ public class QueryParser implements QueryParserConstants {
if (query instanceof PhraseQuery) { if (query instanceof PhraseQuery) {
((PhraseQuery) query).setSlop(slop); ((PhraseQuery) query).setSlop(slop);
} }
if (query instanceof MultiPhraseQuery) {
((MultiPhraseQuery) query).setSlop(slop);
}
return query; return query;
} }
/**
* Note that parameter analyzer is ignored. Calls inside the parser always
* use class member analyser. This method will be deprecated and substituted
* by {@link #getRangeQuery(String, String, String, boolean)} in future versions of Lucene.
* Currently overwriting either of these methods works.
*
* @exception ParseException throw in overridden method to disallow
*/
protected Query getRangeQuery(String field,
Analyzer analyzer,
String part1,
String part2,
boolean inclusive) throws ParseException {
return getRangeQuery(field, part1, part2, inclusive);
}
/** /**
* @exception ParseException throw in overridden method to disallow * @exception ParseException throw in overridden method to disallow
@@ -384,11 +407,27 @@ public class QueryParser implements QueryParserConstants {
String part2, String part2,
boolean inclusive) throws ParseException boolean inclusive) throws ParseException
{ {
if (lowercaseExpandedTerms) {
part1 = part1.toLowerCase();
part2 = part2.toLowerCase();
}
try { try {
DateFormat df = DateFormat.getDateInstance(DateFormat.SHORT, locale); DateFormat df = DateFormat.getDateInstance(DateFormat.SHORT, locale);
df.setLenient(true); df.setLenient(true);
Date d1 = df.parse(part1); Date d1 = df.parse(part1);
Date d2 = df.parse(part2); Date d2 = df.parse(part2);
if (inclusive) {
// The user can only specify the date, not the time, so make sure
// the time is set to the latest possible time of that date to really
// include all documents:
Calendar cal = Calendar.getInstance(locale);
cal.setTime(d2);
cal.set(Calendar.HOUR_OF_DAY, 23);
cal.set(Calendar.MINUTE, 59);
cal.set(Calendar.SECOND, 59);
cal.set(Calendar.MILLISECOND, 999);
d2 = cal.getTime();
}
part1 = DateField.dateToString(d1); part1 = DateField.dateToString(d1);
part2 = DateField.dateToString(d2); part2 = DateField.dateToString(d2);
} }
@@ -412,9 +451,28 @@ public class QueryParser implements QueryParserConstants {
* @return Resulting {@link Query} object. * @return Resulting {@link Query} object.
* @exception ParseException throw in overridden method to disallow * @exception ParseException throw in overridden method to disallow
*/ */
protected Query getBooleanQuery(Vector clauses) throws ParseException protected Query getBooleanQuery(Vector clauses) throws ParseException {
return getBooleanQuery(clauses, false);
}
/**
* Factory method for generating query, given a set of clauses.
* By default creates a boolean query composed of clauses passed in.
*
* Can be overridden by extending classes, to modify query being
* returned.
*
* @param clauses Vector that contains {@link BooleanClause} instances
* to join.
* @param disableCoord true if coord scoring should be disabled.
*
* @return Resulting {@link Query} object.
* @exception ParseException throw in overridden method to disallow
*/
protected Query getBooleanQuery(Vector clauses, boolean disableCoord)
throws ParseException
{ {
BooleanQuery query = new BooleanQuery(); BooleanQuery query = new BooleanQuery(disableCoord);
for (int i = 0; i < clauses.size(); i++) { for (int i = 0; i < clauses.size(); i++) {
query.add((BooleanClause)clauses.elementAt(i)); query.add((BooleanClause)clauses.elementAt(i));
} }
@@ -444,7 +502,7 @@ public class QueryParser implements QueryParserConstants {
*/ */
protected Query getWildcardQuery(String field, String termStr) throws ParseException protected Query getWildcardQuery(String field, String termStr) throws ParseException
{ {
if (lowercaseWildcardTerms) { if (lowercaseExpandedTerms) {
termStr = termStr.toLowerCase(); termStr = termStr.toLowerCase();
} }
Term t = new Term(field, termStr); Term t = new Term(field, termStr);
@@ -453,7 +511,7 @@ public class QueryParser implements QueryParserConstants {
/** /**
* Factory method for generating a query (similar to * Factory method for generating a query (similar to
* ({@link #getWildcardQuery}). Called when parser parses an input term * {@link #getWildcardQuery}). Called when parser parses an input term
* token that uses prefix notation; that is, contains a single '*' wildcard * token that uses prefix notation; that is, contains a single '*' wildcard
* character as its last character. Since this is a special case * character as its last character. Since this is a special case
* of generic wildcard term, and such a query can be optimized easily, * of generic wildcard term, and such a query can be optimized easily,
@@ -476,16 +534,17 @@ public class QueryParser implements QueryParserConstants {
*/ */
protected Query getPrefixQuery(String field, String termStr) throws ParseException protected Query getPrefixQuery(String field, String termStr) throws ParseException
{ {
if (lowercaseWildcardTerms) { if (lowercaseExpandedTerms) {
termStr = termStr.toLowerCase(); termStr = termStr.toLowerCase();
} }
Term t = new Term(field, termStr); Term t = new Term(field, termStr);
return new PrefixQuery(t); return new PrefixQuery(t);
} }
/** /**
* Factory method for generating a query (similar to * Factory method for generating a query (similar to
* ({@link #getWildcardQuery}). Called when parser parses * {@link #getWildcardQuery}). Called when parser parses
* an input term token that has the fuzzy suffix (~) appended. * an input term token that has the fuzzy suffix (~) appended.
* *
* @param field Name of the field query will use. * @param field Name of the field query will use.
@@ -494,26 +553,13 @@ public class QueryParser implements QueryParserConstants {
* @return Resulting {@link Query} built for the term * @return Resulting {@link Query} built for the term
* @exception ParseException throw in overridden method to disallow * @exception ParseException throw in overridden method to disallow
*/ */
protected Query getFuzzyQuery(String field, String termStr) throws ParseException {
return getFuzzyQuery(field, termStr, fuzzyMinSim);
}
/**
* Factory method for generating a query (similar to
* ({@link #getWildcardQuery}). Called when parser parses
* an input term token that has the fuzzy suffix (~floatNumber) appended.
*
* @param field Name of the field query will use.
* @param termStr Term token to use for building term for the query
* @param minSimilarity the minimum similarity required for a fuzzy match
*
* @return Resulting {@link Query} built for the term
* @exception ParseException throw in overridden method to disallow
*/
protected Query getFuzzyQuery(String field, String termStr, float minSimilarity) throws ParseException protected Query getFuzzyQuery(String field, String termStr, float minSimilarity) throws ParseException
{ {
if (lowercaseExpandedTerms) {
termStr = termStr.toLowerCase();
}
Term t = new Term(field, termStr); Term t = new Term(field, termStr);
return new FuzzyQuery(t, minSimilarity); return new FuzzyQuery(t, minSimilarity, fuzzyPrefixLength);
} }
/** /**
@@ -534,10 +580,10 @@ public class QueryParser implements QueryParserConstants {
/** /**
* Returns a String where those characters that QueryParser * Returns a String where those characters that QueryParser
* expects to be escaped are escaped, i.e. preceded by a <code>\</code>. * expects to be escaped are escaped by a preceding <code>\</code>.
*/ */
public static String escape(String s) { public static String escape(String s) {
StringBuilder sb = new StringBuilder(s.length()); StringBuffer sb = new StringBuffer();
for (int i = 0; i < s.length(); i++) { for (int i = 0; i < s.length(); i++) {
char c = s.charAt(i); char c = s.charAt(i);
// NOTE: keep this in sync with _ESCAPED_CHAR below! // NOTE: keep this in sync with _ESCAPED_CHAR below!
@@ -551,7 +597,16 @@ public class QueryParser implements QueryParserConstants {
return sb.toString(); return sb.toString();
} }
/**
* Command line tool to test QueryParser, using {@link org.apache.lucene.analysis.SimpleAnalyzer}.
* Usage:<br>
* <code>java org.apache.lucene.queryParser.QueryParser &lt;input&gt;</code>
*/
public static void main(String[] args) throws Exception { public static void main(String[] args) throws Exception {
if (args.length == 0) {
System.out.println("Usage: java org.apache.lucene.queryParser.QueryParser <input>");
System.exit(0);
}
QueryParser qp = new QueryParser("field", QueryParser qp = new QueryParser("field",
new org.apache.lucene.analysis.SimpleAnalyzer()); new org.apache.lucene.analysis.SimpleAnalyzer());
Query q = qp.parse(args[0]); Query q = qp.parse(args[0]);
@@ -789,12 +844,9 @@ public class QueryParser implements QueryParserConstants {
if(fms < 0.0f || fms > 1.0f){ if(fms < 0.0f || fms > 1.0f){
{if (true) throw new ParseException("Minimum similarity for a FuzzyQuery has to be between 0.0f and 1.0f !");} {if (true) throw new ParseException("Minimum similarity for a FuzzyQuery has to be between 0.0f and 1.0f !");}
} }
if(fms == fuzzyMinSim)
q = getFuzzyQuery(field, termImage);
else
q = getFuzzyQuery(field, termImage,fms); q = getFuzzyQuery(field, termImage,fms);
} else { } else {
q = getFieldQuery(field, analyzer, termImage); q = getFieldQuery(field, termImage);
} }
break; break;
case RANGEIN_START: case RANGEIN_START:
@@ -851,7 +903,7 @@ public class QueryParser implements QueryParserConstants {
} else { } else {
goop2.image = discardEscapeChar(goop2.image); goop2.image = discardEscapeChar(goop2.image);
} }
q = getRangeQuery(field, analyzer, goop1.image, goop2.image, true); q = getRangeQuery(field, goop1.image, goop2.image, true);
break; break;
case RANGEEX_START: case RANGEEX_START:
jj_consume_token(RANGEEX_START); jj_consume_token(RANGEEX_START);
@@ -908,7 +960,7 @@ public class QueryParser implements QueryParserConstants {
goop2.image = discardEscapeChar(goop2.image); goop2.image = discardEscapeChar(goop2.image);
} }
q = getRangeQuery(field, analyzer, goop1.image, goop2.image, false); q = getRangeQuery(field, goop1.image, goop2.image, false);
break; break;
case QUOTED: case QUOTED:
term = jj_consume_token(QUOTED); term = jj_consume_token(QUOTED);
@@ -937,7 +989,7 @@ public class QueryParser implements QueryParserConstants {
} }
catch (Exception ignored) { } catch (Exception ignored) { }
} }
q = getFieldQuery(field, analyzer, term.image.substring(1, term.image.length()-1), s); q = getFieldQuery(field, term.image.substring(1, term.image.length()-1), s);
break; break;
default: default:
jj_la1[21] = jj_gen; jj_la1[21] = jj_gen;
@@ -1183,6 +1235,7 @@ public class QueryParser implements QueryParserConstants {
final private void jj_rescan_token() { final private void jj_rescan_token() {
jj_rescan = true; jj_rescan = true;
for (int i = 0; i < 1; i++) { for (int i = 0; i < 1; i++) {
try {
JJCalls p = jj_2_rtns[i]; JJCalls p = jj_2_rtns[i];
do { do {
if (p.gen > jj_gen) { if (p.gen > jj_gen) {
@@ -1193,6 +1246,7 @@ public class QueryParser implements QueryParserConstants {
} }
p = p.next; p = p.next;
} while (p != null); } while (p != null);
} catch(LookaheadSuccess ls) { }
} }
jj_rescan = false; jj_rescan = false;
} }

View File

@@ -32,10 +32,11 @@ import org.apache.lucene.index.Term;
import org.apache.lucene.analysis.*; import org.apache.lucene.analysis.*;
import org.apache.lucene.document.*; import org.apache.lucene.document.*;
import org.apache.lucene.search.*; import org.apache.lucene.search.*;
import org.apache.lucene.util.Parameter;
/** /**
* This class is generated by JavaCC. The only method that clients should need * This class is generated by JavaCC. The most important method is
* to call is <a href="#parse">parse()</a>. * {@link #parse(String)}.
* *
* The syntax for query strings is as follows: * The syntax for query strings is as follows:
* A Query is a series of clauses. * A Query is a series of clauses.
@@ -63,9 +64,21 @@ import org.apache.lucene.search.*;
* *
* <p> * <p>
* Examples of appropriately formatted queries can be found in the <a * Examples of appropriately formatted queries can be found in the <a
* href="http://jakarta.apache.org/lucene/src/test/org/apache/lucene/queryParser/TestQueryParser.java">test cases</a>. * href="http://lucene.apache.org/java/docs/queryparsersyntax.html">query syntax
* documentation</a>.
* </p> * </p>
* *
* <p>In {@link RangeQuery}s, QueryParser tries to detect date values, e.g. <tt>date:[6/1/2005 TO 6/4/2005]</tt>
* produces a range query that searches for "date" fields between 2005-06-01 and 2005-06-04. Note
* that the format of the accpeted input depends on {@link #setLocale(Locale) the locale}. This
* feature also assumes that your index uses the {@link DateField} class to store dates.
* If you use a different format (e.g. {@link DateTools}) and you still want QueryParser
* to turn local dates in range queries into valid queries you need to create your own
* query parser that inherits QueryParser and overwrites
* {@link #getRangeQuery(String, String, String, boolean)}.</p>
*
* <p>Note that QueryParser is <em>not</em> thread-safe.</p>
*
* @author Brian Goetz * @author Brian Goetz
* @author Peter Halacsy * @author Peter Halacsy
* @author Tatu Saloranta * @author Tatu Saloranta
@@ -81,35 +94,36 @@ public class QueryParser {
private static final int MOD_NOT = 10; private static final int MOD_NOT = 10;
private static final int MOD_REQ = 11; private static final int MOD_REQ = 11;
public static final int DEFAULT_OPERATOR_OR = 0; // make it possible to call setDefaultOperator() without accessing
public static final int DEFAULT_OPERATOR_AND = 1; // the nested class:
/** Alternative form of QueryParser.Operator.AND */
public static final Operator AND_OPERATOR = Operator.AND;
/** Alternative form of QueryParser.Operator.OR */
public static final Operator OR_OPERATOR = Operator.OR;
/** The actual operator that parser uses to combine query terms */ /** The actual operator that parser uses to combine query terms */
private int operator = DEFAULT_OPERATOR_OR; private Operator operator = OR_OPERATOR;
/** boolean lowercaseExpandedTerms = true;
* Whether terms of wildcard and prefix queries are to be automatically
* lower-cased or not. Default is <code>true</code>.
*/
boolean lowercaseWildcardTerms = true;
Analyzer analyzer; Analyzer analyzer;
String field; String field;
int phraseSlop = 0; int phraseSlop = 0;
float fuzzyMinSim = FuzzyQuery.defaultMinSimilarity; float fuzzyMinSim = FuzzyQuery.defaultMinSimilarity;
int fuzzyPrefixLength = FuzzyQuery.defaultPrefixLength;
Locale locale = Locale.getDefault(); Locale locale = Locale.getDefault();
/** Parses a query string, returning a {@link org.apache.lucene.search.Query}. /** The default operator for parsing queries.
* @param query the query string to be parsed. * Use {@link QueryParser#setDefaultOperator} to change it.
* @param field the default field for query terms.
* @param analyzer used to find terms in the query text.
* @throws ParseException if the parsing fails
*/ */
static public Query parse(String query, String field, Analyzer analyzer) static public final class Operator extends Parameter {
throws ParseException { private Operator(String name) {
QueryParser parser = new QueryParser(field, analyzer); super(name);
return parser.parse(query);
} }
static public final Operator OR = new Operator("OR");
static public final Operator AND = new Operator("AND");
}
/** Constructs a query parser. /** Constructs a query parser.
* @param f the default field for query terms. * @param f the default field for query terms.
@@ -121,8 +135,7 @@ public class QueryParser {
field = f; field = f;
} }
/** Parses a query string, returning a /** Parses a query string, returning a {@link org.apache.lucene.search.Query}.
* <a href="lucene.search.Query.html">Query</a>.
* @param query the query string to be parsed. * @param query the query string to be parsed.
* @throws ParseException if the parsing fails * @throws ParseException if the parsing fails
*/ */
@@ -154,18 +167,36 @@ public class QueryParser {
} }
/** /**
* Get the default minimal similarity for fuzzy queries. * Get the minimal similarity for fuzzy queries.
*/ */
public float getFuzzyMinSim() { public float getFuzzyMinSim() {
return fuzzyMinSim; return fuzzyMinSim;
} }
/** /**
*Set the default minimum similarity for fuzzy queries. * Set the minimum similarity for fuzzy queries.
* Default is 0.5f.
*/ */
public void setFuzzyMinSim(float fuzzyMinSim) { public void setFuzzyMinSim(float fuzzyMinSim) {
this.fuzzyMinSim = fuzzyMinSim; this.fuzzyMinSim = fuzzyMinSim;
} }
/**
* Get the prefix length for fuzzy queries.
* @return Returns the fuzzyPrefixLength.
*/
public int getFuzzyPrefixLength() {
return fuzzyPrefixLength;
}
/**
* Set the prefix length for fuzzy queries. Default is 0.
* @param fuzzyPrefixLength The fuzzyPrefixLength to set.
*/
public void setFuzzyPrefixLength(int fuzzyPrefixLength) {
this.fuzzyPrefixLength = fuzzyPrefixLength;
}
/** /**
* Sets the default slop for phrases. If zero, then exact phrase matches * Sets the default slop for phrases. If zero, then exact phrase matches
* are required. Default value is zero. * are required. Default value is zero.
@@ -181,32 +212,43 @@ public class QueryParser {
return phraseSlop; return phraseSlop;
} }
/**
* Sets the boolean operator of the QueryParser.
* In classic mode (<code>DEFAULT_OPERATOR_OR</code>) terms without any modifiers
* are considered optional: for example <code>capital of Hungary</code> is equal to
* <code>capital OR of OR Hungary</code>.<br/>
* In <code>DEFAULT_OPERATOR_AND</code> terms are considered to be in conjuction: the
* above mentioned query is parsed as <code>capital AND of AND Hungary</code>
*/
public void setOperator(int operator) {
this.operator = operator;
}
/** /**
* Gets implicit operator setting, which will be either DEFAULT_OPERATOR_AND * Sets the boolean operator of the QueryParser.
* or DEFAULT_OPERATOR_OR. * In default mode (<code>OR_OPERATOR</code>) terms without any modifiers
* are considered optional: for example <code>capital of Hungary</code> is equal to
* <code>capital OR of OR Hungary</code>.<br/>
* In <code>AND_OPERATOR</code> mode terms are considered to be in conjuction: the
* above mentioned query is parsed as <code>capital AND of AND Hungary</code>
*/ */
public int getOperator() { public void setDefaultOperator(Operator op) {
this.operator = op;
}
/**
* Gets implicit operator setting, which will be either AND_OPERATOR
* or OR_OPERATOR.
*/
public Operator getDefaultOperator() {
return operator; return operator;
} }
public void setLowercaseWildcardTerms(boolean lowercaseWildcardTerms) {
this.lowercaseWildcardTerms = lowercaseWildcardTerms; /**
* Whether terms of wildcard, prefix, fuzzy and range queries are to be automatically
* lower-cased or not. Default is <code>true</code>.
*/
public void setLowercaseExpandedTerms(boolean lowercaseExpandedTerms) {
this.lowercaseExpandedTerms = lowercaseExpandedTerms;
} }
public boolean getLowercaseWildcardTerms() {
return lowercaseWildcardTerms; /**
* @see #setLowercaseExpandedTerms(boolean)
*/
public boolean getLowercaseExpandedTerms() {
return lowercaseExpandedTerms;
} }
/** /**
@@ -230,18 +272,18 @@ public class QueryParser {
// unless it's already prohibited // unless it's already prohibited
if (clauses.size() > 0 && conj == CONJ_AND) { if (clauses.size() > 0 && conj == CONJ_AND) {
BooleanClause c = (BooleanClause) clauses.elementAt(clauses.size()-1); BooleanClause c = (BooleanClause) clauses.elementAt(clauses.size()-1);
if (!c.prohibited) if (!c.isProhibited())
c.required = true; c.setOccur(BooleanClause.Occur.MUST);
} }
if (clauses.size() > 0 && operator == DEFAULT_OPERATOR_AND && conj == CONJ_OR) { if (clauses.size() > 0 && operator == AND_OPERATOR && conj == CONJ_OR) {
// If this term is introduced by OR, make the preceding term optional, // If this term is introduced by OR, make the preceding term optional,
// unless it's prohibited (that means we leave -a OR b but +a OR b-->a OR b) // unless it's prohibited (that means we leave -a OR b but +a OR b-->a OR b)
// notice if the input is a OR b, first term is parsed as required; without // notice if the input is a OR b, first term is parsed as required; without
// this modification a OR b would parsed as +a OR b // this modification a OR b would parsed as +a OR b
BooleanClause c = (BooleanClause) clauses.elementAt(clauses.size()-1); BooleanClause c = (BooleanClause) clauses.elementAt(clauses.size()-1);
if (!c.prohibited) if (!c.isProhibited())
c.required = false; c.setOccur(BooleanClause.Occur.SHOULD);
} }
// We might have been passed a null query; the term might have been // We might have been passed a null query; the term might have been
@@ -249,7 +291,7 @@ public class QueryParser {
if (q == null) if (q == null)
return; return;
if (operator == DEFAULT_OPERATOR_OR) { if (operator == OR_OPERATOR) {
// We set REQUIRED if we're introduced by AND or +; PROHIBITED if // We set REQUIRED if we're introduced by AND or +; PROHIBITED if
// introduced by NOT or -; make sure not to set both. // introduced by NOT or -; make sure not to set both.
prohibited = (mods == MOD_NOT); prohibited = (mods == MOD_NOT);
@@ -263,22 +305,16 @@ public class QueryParser {
prohibited = (mods == MOD_NOT); prohibited = (mods == MOD_NOT);
required = (!prohibited && conj != CONJ_OR); required = (!prohibited && conj != CONJ_OR);
} }
clauses.addElement(new BooleanClause(q, required, prohibited)); if (required && !prohibited)
clauses.addElement(new BooleanClause(q, BooleanClause.Occur.MUST));
else if (!required && !prohibited)
clauses.addElement(new BooleanClause(q, BooleanClause.Occur.SHOULD));
else if (!required && prohibited)
clauses.addElement(new BooleanClause(q, BooleanClause.Occur.MUST_NOT));
else
throw new RuntimeException("Clause cannot be both required and prohibited");
} }
/**
* Note that parameter analyzer is ignored. Calls inside the parser always
* use class member analyser. This method will be deprecated and substituted
* by {@link #getFieldQuery(String, String)} in future versions of Lucene.
* Currently overwriting either of these methods works.
*
* @exception ParseException throw in overridden method to disallow
*/
protected Query getFieldQuery(String field,
Analyzer analyzer,
String queryText) throws ParseException {
return getFieldQuery(field, queryText);
}
/** /**
* @exception ParseException throw in overridden method to disallow * @exception ParseException throw in overridden method to disallow
@@ -287,10 +323,11 @@ public class QueryParser {
// Use the analyzer to get all the tokens, and then build a TermQuery, // Use the analyzer to get all the tokens, and then build a TermQuery,
// PhraseQuery, or nothing based on the term count // PhraseQuery, or nothing based on the term count
TokenStream source = analyzer.tokenStream(field, TokenStream source = analyzer.tokenStream(field, new StringReader(queryText));
new StringReader(queryText));
Vector v = new Vector(); Vector v = new Vector();
org.apache.lucene.analysis.Token t; org.apache.lucene.analysis.Token t;
int positionCount = 0;
boolean severalTokensAtSamePosition = false;
while (true) { while (true) {
try { try {
@@ -301,7 +338,11 @@ public class QueryParser {
} }
if (t == null) if (t == null)
break; break;
v.addElement(t.termText()); v.addElement(t);
if (t.getPositionIncrement() != 0)
positionCount += t.getPositionIncrement();
else
severalTokensAtSamePosition = true;
} }
try { try {
source.close(); source.close();
@@ -312,37 +353,53 @@ public class QueryParser {
if (v.size() == 0) if (v.size() == 0)
return null; return null;
else if (v.size() == 1) else if (v.size() == 1) {
return new TermQuery(new Term(field, (String) v.elementAt(0))); t = (org.apache.lucene.analysis.Token) v.elementAt(0);
return new TermQuery(new Term(field, t.termText()));
} else {
if (severalTokensAtSamePosition) {
if (positionCount == 1) {
// no phrase query:
BooleanQuery q = new BooleanQuery(true);
for (int i = 0; i < v.size(); i++) {
t = (org.apache.lucene.analysis.Token) v.elementAt(i);
TermQuery currentQuery = new TermQuery(
new Term(field, t.termText()));
q.add(currentQuery, BooleanClause.Occur.SHOULD);
}
return q;
}
else {
// phrase query:
MultiPhraseQuery mpq = new MultiPhraseQuery();
mpq.setSlop(phraseSlop);
List multiTerms = new ArrayList();
for (int i = 0; i < v.size(); i++) {
t = (org.apache.lucene.analysis.Token) v.elementAt(i);
if (t.getPositionIncrement() == 1 && multiTerms.size() > 0) {
mpq.add((Term[])multiTerms.toArray(new Term[0]));
multiTerms.clear();
}
multiTerms.add(new Term(field, t.termText()));
}
mpq.add((Term[])multiTerms.toArray(new Term[0]));
return mpq;
}
}
else { else {
PhraseQuery q = new PhraseQuery(); PhraseQuery q = new PhraseQuery();
q.setSlop(phraseSlop); q.setSlop(phraseSlop);
for (int i = 0; i < v.size(); i++) { for (int i = 0; i < v.size(); i++) {
q.add(new Term(field, (String) v.elementAt(i))); q.add(new Term(field, ((org.apache.lucene.analysis.Token)
v.elementAt(i)).termText()));
} }
return q; return q;
} }
} }
/**
* Base implementation delegates to {@link #getFieldQuery(String, Analyzer, String)}.
* This method may be overwritten, for example, to return
* a SpanNearQuery instead of a PhraseQuery.
*
* Note that parameter analyzer is ignored. Calls inside the parser always
* use class member analyser. This method will be deprecated and substituted
* by {@link #getFieldQuery(String, String, int)} in future versions of Lucene.
* Currently overwriting either of these methods works.
*
* @exception ParseException throw in overridden method to disallow
*/
protected Query getFieldQuery(String field,
Analyzer analyzer,
String queryText,
int slop) throws ParseException {
return getFieldQuery(field, queryText, slop);
} }
/** /**
* Base implementation delegates to {@link #getFieldQuery(String,String)}. * Base implementation delegates to {@link #getFieldQuery(String,String)}.
* This method may be overridden, for example, to return * This method may be overridden, for example, to return
@@ -357,25 +414,13 @@ public class QueryParser {
if (query instanceof PhraseQuery) { if (query instanceof PhraseQuery) {
((PhraseQuery) query).setSlop(slop); ((PhraseQuery) query).setSlop(slop);
} }
if (query instanceof MultiPhraseQuery) {
((MultiPhraseQuery) query).setSlop(slop);
}
return query; return query;
} }
/**
* Note that parameter analyzer is ignored. Calls inside the parser always
* use class member analyser. This method will be deprecated and substituted
* by {@link #getRangeQuery(String, String, String, boolean)} in future versions of Lucene.
* Currently overwriting either of these methods works.
*
* @exception ParseException throw in overridden method to disallow
*/
protected Query getRangeQuery(String field,
Analyzer analyzer,
String part1,
String part2,
boolean inclusive) throws ParseException {
return getRangeQuery(field, part1, part2, inclusive);
}
/** /**
* @exception ParseException throw in overridden method to disallow * @exception ParseException throw in overridden method to disallow
@@ -385,11 +430,27 @@ public class QueryParser {
String part2, String part2,
boolean inclusive) throws ParseException boolean inclusive) throws ParseException
{ {
if (lowercaseExpandedTerms) {
part1 = part1.toLowerCase();
part2 = part2.toLowerCase();
}
try { try {
DateFormat df = DateFormat.getDateInstance(DateFormat.SHORT, locale); DateFormat df = DateFormat.getDateInstance(DateFormat.SHORT, locale);
df.setLenient(true); df.setLenient(true);
Date d1 = df.parse(part1); Date d1 = df.parse(part1);
Date d2 = df.parse(part2); Date d2 = df.parse(part2);
if (inclusive) {
// The user can only specify the date, not the time, so make sure
// the time is set to the latest possible time of that date to really
// include all documents:
Calendar cal = Calendar.getInstance(locale);
cal.setTime(d2);
cal.set(Calendar.HOUR_OF_DAY, 23);
cal.set(Calendar.MINUTE, 59);
cal.set(Calendar.SECOND, 59);
cal.set(Calendar.MILLISECOND, 999);
d2 = cal.getTime();
}
part1 = DateField.dateToString(d1); part1 = DateField.dateToString(d1);
part2 = DateField.dateToString(d2); part2 = DateField.dateToString(d2);
} }
@@ -413,10 +474,28 @@ public class QueryParser {
* @return Resulting {@link Query} object. * @return Resulting {@link Query} object.
* @exception ParseException throw in overridden method to disallow * @exception ParseException throw in overridden method to disallow
*/ */
protected Query getBooleanQuery(Vector clauses) throws ParseException protected Query getBooleanQuery(Vector clauses) throws ParseException {
return getBooleanQuery(clauses, false);
}
/**
* Factory method for generating query, given a set of clauses.
* By default creates a boolean query composed of clauses passed in.
*
* Can be overridden by extending classes, to modify query being
* returned.
*
* @param clauses Vector that contains {@link BooleanClause} instances
* to join.
* @param disableCoord true if coord scoring should be disabled.
*
* @return Resulting {@link Query} object.
* @exception ParseException throw in overridden method to disallow
*/
protected Query getBooleanQuery(Vector clauses, boolean disableCoord)
throws ParseException
{ {
BooleanQuery query = new BooleanQuery(); BooleanQuery query = new BooleanQuery(disableCoord);
query.
for (int i = 0; i < clauses.size(); i++) { for (int i = 0; i < clauses.size(); i++) {
query.add((BooleanClause)clauses.elementAt(i)); query.add((BooleanClause)clauses.elementAt(i));
} }
@@ -446,7 +525,7 @@ public class QueryParser {
*/ */
protected Query getWildcardQuery(String field, String termStr) throws ParseException protected Query getWildcardQuery(String field, String termStr) throws ParseException
{ {
if (lowercaseWildcardTerms) { if (lowercaseExpandedTerms) {
termStr = termStr.toLowerCase(); termStr = termStr.toLowerCase();
} }
Term t = new Term(field, termStr); Term t = new Term(field, termStr);
@@ -455,7 +534,7 @@ public class QueryParser {
/** /**
* Factory method for generating a query (similar to * Factory method for generating a query (similar to
* ({@link #getWildcardQuery}). Called when parser parses an input term * {@link #getWildcardQuery}). Called when parser parses an input term
* token that uses prefix notation; that is, contains a single '*' wildcard * token that uses prefix notation; that is, contains a single '*' wildcard
* character as its last character. Since this is a special case * character as its last character. Since this is a special case
* of generic wildcard term, and such a query can be optimized easily, * of generic wildcard term, and such a query can be optimized easily,
@@ -478,16 +557,17 @@ public class QueryParser {
*/ */
protected Query getPrefixQuery(String field, String termStr) throws ParseException protected Query getPrefixQuery(String field, String termStr) throws ParseException
{ {
if (lowercaseWildcardTerms) { if (lowercaseExpandedTerms) {
termStr = termStr.toLowerCase(); termStr = termStr.toLowerCase();
} }
Term t = new Term(field, termStr); Term t = new Term(field, termStr);
return new PrefixQuery(t); return new PrefixQuery(t);
} }
/** /**
* Factory method for generating a query (similar to * Factory method for generating a query (similar to
* ({@link #getWildcardQuery}). Called when parser parses * {@link #getWildcardQuery}). Called when parser parses
* an input term token that has the fuzzy suffix (~) appended. * an input term token that has the fuzzy suffix (~) appended.
* *
* @param field Name of the field query will use. * @param field Name of the field query will use.
@@ -496,26 +576,13 @@ public class QueryParser {
* @return Resulting {@link Query} built for the term * @return Resulting {@link Query} built for the term
* @exception ParseException throw in overridden method to disallow * @exception ParseException throw in overridden method to disallow
*/ */
protected Query getFuzzyQuery(String field, String termStr) throws ParseException {
return getFuzzyQuery(field, termStr, fuzzyMinSim);
}
/**
* Factory method for generating a query (similar to
* ({@link #getWildcardQuery}). Called when parser parses
* an input term token that has the fuzzy suffix (~floatNumber) appended.
*
* @param field Name of the field query will use.
* @param termStr Term token to use for building term for the query
* @param minSimilarity the minimum similarity required for a fuzzy match
*
* @return Resulting {@link Query} built for the term
* @exception ParseException throw in overridden method to disallow
*/
protected Query getFuzzyQuery(String field, String termStr, float minSimilarity) throws ParseException protected Query getFuzzyQuery(String field, String termStr, float minSimilarity) throws ParseException
{ {
if (lowercaseExpandedTerms) {
termStr = termStr.toLowerCase();
}
Term t = new Term(field, termStr); Term t = new Term(field, termStr);
return new FuzzyQuery(t, minSimilarity); return new FuzzyQuery(t, minSimilarity, fuzzyPrefixLength);
} }
/** /**
@@ -536,7 +603,7 @@ public class QueryParser {
/** /**
* Returns a String where those characters that QueryParser * Returns a String where those characters that QueryParser
* expects to be escaped are escaped, i.e. preceded by a <code>\</code>. * expects to be escaped are escaped by a preceding <code>\</code>.
*/ */
public static String escape(String s) { public static String escape(String s) {
StringBuffer sb = new StringBuffer(); StringBuffer sb = new StringBuffer();
@@ -553,7 +620,16 @@ public class QueryParser {
return sb.toString(); return sb.toString();
} }
/**
* Command line tool to test QueryParser, using {@link org.apache.lucene.analysis.SimpleAnalyzer}.
* Usage:<br>
* <code>java org.apache.lucene.queryParser.QueryParser &lt;input&gt;</code>
*/
public static void main(String[] args) throws Exception { public static void main(String[] args) throws Exception {
if (args.length == 0) {
System.out.println("Usage: java org.apache.lucene.queryParser.QueryParser <input>");
System.exit(0);
}
QueryParser qp = new QueryParser("field", QueryParser qp = new QueryParser("field",
new org.apache.lucene.analysis.SimpleAnalyzer()); new org.apache.lucene.analysis.SimpleAnalyzer());
Query q = qp.parse(args[0]); Query q = qp.parse(args[0]);
@@ -584,13 +660,14 @@ PARSER_END(QueryParser)
} }
// OG: to support prefix queries: // OG: to support prefix queries:
// http://nagoya.apache.org/bugzilla/show_bug.cgi?id=12137 // http://issues.apache.org/bugzilla/show_bug.cgi?id=12137
// Change from: // Change from:
//
// | <WILDTERM: <_TERM_START_CHAR> // | <WILDTERM: <_TERM_START_CHAR>
// (<_TERM_CHAR> | ( [ "*", "?" ] ))* > // (<_TERM_CHAR> | ( [ "*", "?" ] ))* >
// To: // To:
// //
// | <WILDTERM: (<_TERM_CHAR> | ( [ "*", "?" ] ))* > // (<_TERM_START_CHAR> | [ "*", "?" ]) (<_TERM_CHAR> | ( [ "*", "?" ] ))* >
<DEFAULT> TOKEN : { <DEFAULT> TOKEN : {
<AND: ("AND" | "&&") > <AND: ("AND" | "&&") >
@@ -606,8 +683,7 @@ PARSER_END(QueryParser)
| <TERM: <_TERM_START_CHAR> (<_TERM_CHAR>)* > | <TERM: <_TERM_START_CHAR> (<_TERM_CHAR>)* >
| <FUZZY_SLOP: "~" ( (<_NUM_CHAR>)+ ( "." (<_NUM_CHAR>)+ )? )? > | <FUZZY_SLOP: "~" ( (<_NUM_CHAR>)+ ( "." (<_NUM_CHAR>)+ )? )? >
| <PREFIXTERM: <_TERM_START_CHAR> (<_TERM_CHAR>)* "*" > | <PREFIXTERM: <_TERM_START_CHAR> (<_TERM_CHAR>)* "*" >
| <WILDTERM: (<_TERM_START_CHAR> | ( [ "*", "?" ] )) | <WILDTERM: (<_TERM_START_CHAR> | [ "*", "?" ]) (<_TERM_CHAR> | ( [ "*", "?" ] ))* >
(<_TERM_CHAR> | ( [ "*", "?" ] ))* >
| <RANGEIN_START: "[" > : RangeIn | <RANGEIN_START: "[" > : RangeIn
| <RANGEEX_START: "{" > : RangeEx | <RANGEEX_START: "{" > : RangeEx
} }
@@ -746,12 +822,9 @@ Query Term(String field) : {
if(fms < 0.0f || fms > 1.0f){ if(fms < 0.0f || fms > 1.0f){
throw new ParseException("Minimum similarity for a FuzzyQuery has to be between 0.0f and 1.0f !"); throw new ParseException("Minimum similarity for a FuzzyQuery has to be between 0.0f and 1.0f !");
} }
if(fms == fuzzyMinSim)
q = getFuzzyQuery(field, termImage);
else
q = getFuzzyQuery(field, termImage,fms); q = getFuzzyQuery(field, termImage,fms);
} else { } else {
q = getFieldQuery(field, analyzer, termImage); q = getFieldQuery(field, termImage);
} }
} }
| ( <RANGEIN_START> ( goop1=<RANGEIN_GOOP>|goop1=<RANGEIN_QUOTED> ) | ( <RANGEIN_START> ( goop1=<RANGEIN_GOOP>|goop1=<RANGEIN_QUOTED> )
@@ -769,7 +842,7 @@ Query Term(String field) : {
} else { } else {
goop2.image = discardEscapeChar(goop2.image); goop2.image = discardEscapeChar(goop2.image);
} }
q = getRangeQuery(field, analyzer, goop1.image, goop2.image, true); q = getRangeQuery(field, goop1.image, goop2.image, true);
} }
| ( <RANGEEX_START> ( goop1=<RANGEEX_GOOP>|goop1=<RANGEEX_QUOTED> ) | ( <RANGEEX_START> ( goop1=<RANGEEX_GOOP>|goop1=<RANGEEX_QUOTED> )
[ <RANGEEX_TO> ] ( goop2=<RANGEEX_GOOP>|goop2=<RANGEEX_QUOTED> ) [ <RANGEEX_TO> ] ( goop2=<RANGEEX_GOOP>|goop2=<RANGEEX_QUOTED> )
@@ -787,7 +860,7 @@ Query Term(String field) : {
goop2.image = discardEscapeChar(goop2.image); goop2.image = discardEscapeChar(goop2.image);
} }
q = getRangeQuery(field, analyzer, goop1.image, goop2.image, false); q = getRangeQuery(field, goop1.image, goop2.image, false);
} }
| term=<QUOTED> | term=<QUOTED>
[ fuzzySlop=<FUZZY_SLOP> ] [ fuzzySlop=<FUZZY_SLOP> ]
@@ -801,7 +874,7 @@ Query Term(String field) : {
} }
catch (Exception ignored) { } catch (Exception ignored) { }
} }
q = getFieldQuery(field, analyzer, term.image.substring(1, term.image.length()-1), s); q = getFieldQuery(field, term.image.substring(1, term.image.length()-1), s);
} }
) )
{ {

View File

@@ -1,5 +1,14 @@
/* Generated By:JavaCC: Do not edit this line. QueryParserTokenManager.java */ /* Generated By:JavaCC: Do not edit this line. QueryParserTokenManager.java */
package org.alfresco.repo.search.impl.lucene; package org.alfresco.repo.search.impl.lucene;
import java.util.Vector;
import java.io.*;
import java.text.*;
import java.util.*;
import org.apache.lucene.index.Term;
import org.apache.lucene.analysis.*;
import org.apache.lucene.document.*;
import org.apache.lucene.search.*;
import org.apache.lucene.util.Parameter;
public class QueryParserTokenManager implements QueryParserConstants public class QueryParserTokenManager implements QueryParserConstants
{ {
@@ -937,12 +946,10 @@ protected CharStream input_stream;
private final int[] jjrounds = new int[34]; private final int[] jjrounds = new int[34];
private final int[] jjstateSet = new int[68]; private final int[] jjstateSet = new int[68];
protected char curChar; protected char curChar;
public QueryParserTokenManager(CharStream stream) public QueryParserTokenManager(CharStream stream){
{
input_stream = stream; input_stream = stream;
} }
public QueryParserTokenManager(CharStream stream, int lexState) public QueryParserTokenManager(CharStream stream, int lexState){
{
this(stream); this(stream);
SwitchTo(lexState); SwitchTo(lexState);
} }

View File

@@ -1,21 +1,4 @@
/* /* Generated By:JavaCC: Do not edit this line. Token.java Version 3.0 */package org.alfresco.repo.search.impl.lucene;
* Copyright (C) 2005 Alfresco, Inc.
*
* Licensed under the Mozilla Public License version 1.1
* with a permitted attribution clause. You may obtain a
* copy of the License at
*
* http://www.alfresco.org/legal/license.txt
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
* either express or implied. See the License for the specific
* language governing permissions and limitations under the
* License.
*/
/* Generated By:JavaCC: Do not edit this line. Token.java Version 3.0 */
package org.alfresco.repo.search.impl.lucene;
/** /**
* Describes the input token stream. * Describes the input token stream.

View File

@@ -38,7 +38,7 @@ public class TokenMgrError extends Error
* equivalents in the given string * equivalents in the given string
*/ */
protected static final String addEscapes(String str) { protected static final String addEscapes(String str) {
StringBuilder retval = new StringBuilder(str.length() + 8); StringBuffer retval = new StringBuffer();
char ch; char ch;
for (int i = 0; i < str.length(); i++) { for (int i = 0; i < str.length(); i++) {
switch (str.charAt(i)) switch (str.charAt(i))