Updated Query parser

git-svn-id: https://svn.alfresco.com/repos/alfresco-enterprise/alfresco/HEAD/root@4519 c4b6b30b-aa2e-2d43-bbcb-ca4b014f7261
This commit is contained in:
Andrew Hind
2006-12-05 11:20:22 +00:00
parent e8b37eb880
commit fd351b9411
11 changed files with 1235 additions and 864 deletions

View File

@@ -25,20 +25,6 @@ public interface CharStream {
*/
char readChar() throws java.io.IOException;
/**
* Returns the column position of the character last read.
* @deprecated
* @see #getEndColumn
*/
int getColumn();
/**
* Returns the line number of the character last read.
* @deprecated
* @see #getEndLine
*/
int getLine();
/**
* Returns the column number of the last character for current token (being
* matched after the last call to BeginTOken).

View File

@@ -17,8 +17,7 @@ package org.alfresco.repo.search.impl.lucene;
* limitations under the License.
*/
import java.io.IOException;
import java.io.Reader;
import java.io.*;
/** An efficient implementation of JavaCC's CharStream interface. <p>Note that
* this does not do line-number counting, but instead keeps track of the

View File

@@ -63,7 +63,7 @@ public class LuceneQueryParser extends QueryParser
* if the parsing fails
*/
static public Query parse(String query, String field, Analyzer analyzer,
NamespacePrefixResolver namespacePrefixResolver, DictionaryService dictionaryService, int defaultOperator)
NamespacePrefixResolver namespacePrefixResolver, DictionaryService dictionaryService, Operator defaultOperator)
throws ParseException
{
if (s_logger.isDebugEnabled())
@@ -71,7 +71,7 @@ public class LuceneQueryParser extends QueryParser
s_logger.debug("Using Alfresco Lucene Query Parser for query: " + query);
}
LuceneQueryParser parser = new LuceneQueryParser(field, analyzer);
parser.setOperator(defaultOperator);
parser.setDefaultOperator(defaultOperator);
parser.setNamespacePrefixResolver(namespacePrefixResolver);
parser.setDictionaryService(dictionaryService);
return parser.parse(query);

View File

@@ -32,6 +32,7 @@ import org.alfresco.repo.search.Indexer;
import org.alfresco.repo.search.QueryRegisterComponent;
import org.alfresco.repo.search.SearcherException;
import org.alfresco.repo.search.impl.NodeSearcher;
import org.alfresco.repo.search.impl.lucene.QueryParser.Operator;
import org.alfresco.service.cmr.dictionary.DictionaryService;
import org.alfresco.service.cmr.repository.InvalidNodeRefException;
import org.alfresco.service.cmr.repository.NodeRef;
@@ -203,14 +204,14 @@ public class LuceneSearcherImpl2 extends LuceneBase2 implements LuceneSearcher2
try
{
int defaultOperator;
Operator defaultOperator;
if (searchParameters.getDefaultOperator() == SearchParameters.AND)
{
defaultOperator = LuceneQueryParser.DEFAULT_OPERATOR_AND;
defaultOperator = LuceneQueryParser.AND_OPERATOR;
}
else
{
defaultOperator = LuceneQueryParser.DEFAULT_OPERATOR_OR;
defaultOperator = LuceneQueryParser.OR_OPERATOR;
}
Query query = LuceneQueryParser.parse(parameterisedQueryString, DEFAULT_FIELD, new LuceneAnalyser(

View File

@@ -0,0 +1,268 @@
package org.alfresco.repo.search.impl.lucene;
/**
* Copyright 2004 The Apache Software Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.MultiPhraseQuery;
import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.Query;
import java.util.Vector;
/**
* A QueryParser which constructs queries to search multiple fields.
*
* @author <a href="mailto:kelvin@relevanz.com">Kelvin Tan</a>, Daniel Naber
* @version $Revision: 406088 $
*/
public class MultiFieldQueryParser extends QueryParser
{
private String[] fields;
/**
* Creates a MultiFieldQueryParser.
*
* <p>It will, when parse(String query)
* is called, construct a query like this (assuming the query consists of
* two terms and you specify the two fields <code>title</code> and <code>body</code>):</p>
*
* <code>
* (title:term1 body:term1) (title:term2 body:term2)
* </code>
*
* <p>When setDefaultOperator(AND_OPERATOR) is set, the result will be:</p>
*
* <code>
* +(title:term1 body:term1) +(title:term2 body:term2)
* </code>
*
* <p>In other words, all the query's terms must appear, but it doesn't matter in
* what fields they appear.</p>
*/
public MultiFieldQueryParser(String[] fields, Analyzer analyzer) {
super(null, analyzer);
this.fields = fields;
}
protected Query getFieldQuery(String field, String queryText, int slop) throws ParseException {
if (field == null) {
Vector clauses = new Vector();
for (int i = 0; i < fields.length; i++) {
Query q = super.getFieldQuery(fields[i], queryText);
if (q != null) {
if (q instanceof PhraseQuery) {
((PhraseQuery) q).setSlop(slop);
}
if (q instanceof MultiPhraseQuery) {
((MultiPhraseQuery) q).setSlop(slop);
}
clauses.add(new BooleanClause(q, BooleanClause.Occur.SHOULD));
}
}
if (clauses.size() == 0) // happens for stopwords
return null;
return getBooleanQuery(clauses, true);
}
return super.getFieldQuery(field, queryText);
}
protected Query getFieldQuery(String field, String queryText) throws ParseException {
return getFieldQuery(field, queryText, 0);
}
protected Query getFuzzyQuery(String field, String termStr, float minSimilarity) throws ParseException
{
if (field == null) {
Vector clauses = new Vector();
for (int i = 0; i < fields.length; i++) {
clauses.add(new BooleanClause(super.getFuzzyQuery(fields[i], termStr, minSimilarity),
BooleanClause.Occur.SHOULD));
}
return getBooleanQuery(clauses, true);
}
return super.getFuzzyQuery(field, termStr, minSimilarity);
}
protected Query getPrefixQuery(String field, String termStr) throws ParseException
{
if (field == null) {
Vector clauses = new Vector();
for (int i = 0; i < fields.length; i++) {
clauses.add(new BooleanClause(super.getPrefixQuery(fields[i], termStr),
BooleanClause.Occur.SHOULD));
}
return getBooleanQuery(clauses, true);
}
return super.getPrefixQuery(field, termStr);
}
protected Query getWildcardQuery(String field, String termStr) throws ParseException {
if (field == null) {
Vector clauses = new Vector();
for (int i = 0; i < fields.length; i++) {
clauses.add(new BooleanClause(super.getWildcardQuery(fields[i], termStr),
BooleanClause.Occur.SHOULD));
}
return getBooleanQuery(clauses, true);
}
return super.getWildcardQuery(field, termStr);
}
protected Query getRangeQuery(String field, String part1, String part2, boolean inclusive) throws ParseException {
if (field == null) {
Vector clauses = new Vector();
for (int i = 0; i < fields.length; i++) {
clauses.add(new BooleanClause(super.getRangeQuery(fields[i], part1, part2, inclusive),
BooleanClause.Occur.SHOULD));
}
return getBooleanQuery(clauses, true);
}
return super.getRangeQuery(field, part1, part2, inclusive);
}
/**
* Parses a query which searches on the fields specified.
* <p>
* If x fields are specified, this effectively constructs:
* <pre>
* <code>
* (field1:query1) (field2:query2) (field3:query3)...(fieldx:queryx)
* </code>
* </pre>
* @param queries Queries strings to parse
* @param fields Fields to search on
* @param analyzer Analyzer to use
* @throws ParseException if query parsing fails
* @throws IllegalArgumentException if the length of the queries array differs
* from the length of the fields array
*/
public static Query parse(String[] queries, String[] fields,
Analyzer analyzer) throws ParseException
{
if (queries.length != fields.length)
throw new IllegalArgumentException("queries.length != fields.length");
BooleanQuery bQuery = new BooleanQuery();
for (int i = 0; i < fields.length; i++)
{
QueryParser qp = new QueryParser(fields[i], analyzer);
Query q = qp.parse(queries[i]);
bQuery.add(q, BooleanClause.Occur.SHOULD);
}
return bQuery;
}
/**
* Parses a query, searching on the fields specified.
* Use this if you need to specify certain fields as required,
* and others as prohibited.
* <p><pre>
* Usage:
* <code>
* String[] fields = {"filename", "contents", "description"};
* BooleanClause.Occur[] flags = {BooleanClause.Occur.SHOULD,
* BooleanClause.Occur.MUST,
* BooleanClause.Occur.MUST_NOT};
* MultiFieldQueryParser.parse("query", fields, flags, analyzer);
* </code>
* </pre>
*<p>
* The code above would construct a query:
* <pre>
* <code>
* (filename:query) +(contents:query) -(description:query)
* </code>
* </pre>
*
* @param query Query string to parse
* @param fields Fields to search on
* @param flags Flags describing the fields
* @param analyzer Analyzer to use
* @throws ParseException if query parsing fails
* @throws IllegalArgumentException if the length of the fields array differs
* from the length of the flags array
*/
public static Query parse(String query, String[] fields,
BooleanClause.Occur[] flags, Analyzer analyzer) throws ParseException {
if (fields.length != flags.length)
throw new IllegalArgumentException("fields.length != flags.length");
BooleanQuery bQuery = new BooleanQuery();
for (int i = 0; i < fields.length; i++) {
QueryParser qp = new QueryParser(fields[i], analyzer);
Query q = qp.parse(query);
bQuery.add(q, flags[i]);
}
return bQuery;
}
/**
* Parses a query, searching on the fields specified.
* Use this if you need to specify certain fields as required,
* and others as prohibited.
* <p><pre>
* Usage:
* <code>
* String[] query = {"query1", "query2", "query3"};
* String[] fields = {"filename", "contents", "description"};
* BooleanClause.Occur[] flags = {BooleanClause.Occur.SHOULD,
* BooleanClause.Occur.MUST,
* BooleanClause.Occur.MUST_NOT};
* MultiFieldQueryParser.parse(query, fields, flags, analyzer);
* </code>
* </pre>
*<p>
* The code above would construct a query:
* <pre>
* <code>
* (filename:query1) +(contents:query2) -(description:query3)
* </code>
* </pre>
*
* @param queries Queries string to parse
* @param fields Fields to search on
* @param flags Flags describing the fields
* @param analyzer Analyzer to use
* @throws ParseException if query parsing fails
* @throws IllegalArgumentException if the length of the queries, fields,
* and flags array differ
*/
public static Query parse(String[] queries, String[] fields, BooleanClause.Occur[] flags,
Analyzer analyzer) throws ParseException
{
if (!(queries.length == fields.length && queries.length == flags.length))
throw new IllegalArgumentException("queries, fields, and flags array have have different length");
BooleanQuery bQuery = new BooleanQuery();
for (int i = 0; i < fields.length; i++)
{
QueryParser qp = new QueryParser(fields[i], analyzer);
Query q = qp.parse(queries[i]);
bQuery.add(q, flags[i]);
}
return bQuery;
}
}

View File

@@ -1,31 +1,19 @@
/* Generated By:JavaCC: Do not edit this line. QueryParser.java */
package org.alfresco.repo.search.impl.lucene;
import java.io.IOException;
import java.io.StringReader;
import java.text.DateFormat;
import java.util.Date;
import java.util.Locale;
import java.util.Vector;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.document.DateField;
import java.io.*;
import java.text.*;
import java.util.*;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.FuzzyQuery;
import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.PrefixQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.RangeQuery;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.WildcardQuery;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.analysis.*;
import org.apache.lucene.document.*;
import org.apache.lucene.search.*;
import org.apache.lucene.util.Parameter;
/**
* This class is generated by JavaCC. The only method that clients should need
* to call is <a href="#parse">parse()</a>.
* This class is generated by JavaCC. The most important method is
* {@link #parse(String)}.
*
* The syntax for query strings is as follows:
* A Query is a series of clauses.
@@ -53,9 +41,21 @@ import org.apache.lucene.search.BooleanClause.Occur;
*
* <p>
* Examples of appropriately formatted queries can be found in the <a
* href="http://jakarta.apache.org/lucene/src/test/org/apache/lucene/queryParser/TestQueryParser.java">test cases</a>.
* href="http://lucene.apache.org/java/docs/queryparsersyntax.html">query syntax
* documentation</a>.
* </p>
*
* <p>In {@link RangeQuery}s, QueryParser tries to detect date values, e.g. <tt>date:[6/1/2005 TO 6/4/2005]</tt>
* produces a range query that searches for "date" fields between 2005-06-01 and 2005-06-04. Note
* that the format of the accpeted input depends on {@link #setLocale(Locale) the locale}. This
* feature also assumes that your index uses the {@link DateField} class to store dates.
* If you use a different format (e.g. {@link DateTools}) and you still want QueryParser
* to turn local dates in range queries into valid queries you need to create your own
* query parser that inherits QueryParser and overwrites
* {@link #getRangeQuery(String, String, String, boolean)}.</p>
*
* <p>Note that QueryParser is <em>not</em> thread-safe.</p>
*
* @author Brian Goetz
* @author Peter Halacsy
* @author Tatu Saloranta
@@ -71,36 +71,37 @@ public class QueryParser implements QueryParserConstants {
private static final int MOD_NOT = 10;
private static final int MOD_REQ = 11;
public static final int DEFAULT_OPERATOR_OR = 0;
public static final int DEFAULT_OPERATOR_AND = 1;
// make it possible to call setDefaultOperator() without accessing
// the nested class:
/** Alternative form of QueryParser.Operator.AND */
public static final Operator AND_OPERATOR = Operator.AND;
/** Alternative form of QueryParser.Operator.OR */
public static final Operator OR_OPERATOR = Operator.OR;
/** The actual operator that parser uses to combine query terms */
private int operator = DEFAULT_OPERATOR_OR;
private Operator operator = OR_OPERATOR;
/**
* Whether terms of wildcard and prefix queries are to be automatically
* lower-cased or not. Default is <code>true</code>.
*/
boolean lowercaseWildcardTerms = true;
boolean lowercaseExpandedTerms = true;
Analyzer analyzer;
String field;
int phraseSlop = 0;
float fuzzyMinSim = FuzzyQuery.defaultMinSimilarity;
int fuzzyPrefixLength = FuzzyQuery.defaultPrefixLength;
Locale locale = Locale.getDefault();
/** Parses a query string, returning a {@link org.apache.lucene.search.Query}.
* @param query the query string to be parsed.
* @param field the default field for query terms.
* @param analyzer used to find terms in the query text.
* @throws ParseException if the parsing fails
/** The default operator for parsing queries.
* Use {@link QueryParser#setDefaultOperator} to change it.
*/
static public Query parse(String query, String field, Analyzer analyzer)
throws ParseException {
QueryParser parser = new QueryParser(field, analyzer);
return parser.parse(query);
static public final class Operator extends Parameter {
private Operator(String name) {
super(name);
}
static public final Operator OR = new Operator("OR");
static public final Operator AND = new Operator("AND");
}
/** Constructs a query parser.
* @param f the default field for query terms.
* @param a used to find terms in the query text.
@@ -111,8 +112,7 @@ public class QueryParser implements QueryParserConstants {
field = f;
}
/** Parses a query string, returning a
* <a href="lucene.search.Query.html">Query</a>.
/** Parses a query string, returning a {@link org.apache.lucene.search.Query}.
* @param query the query string to be parsed.
* @throws ParseException if the parsing fails
*/
@@ -144,18 +144,36 @@ public class QueryParser implements QueryParserConstants {
}
/**
* Get the default minimal similarity for fuzzy queries.
* Get the minimal similarity for fuzzy queries.
*/
public float getFuzzyMinSim() {
return fuzzyMinSim;
}
/**
*Set the default minimum similarity for fuzzy queries.
* Set the minimum similarity for fuzzy queries.
* Default is 0.5f.
*/
public void setFuzzyMinSim(float fuzzyMinSim) {
this.fuzzyMinSim = fuzzyMinSim;
}
/**
* Get the prefix length for fuzzy queries.
* @return Returns the fuzzyPrefixLength.
*/
public int getFuzzyPrefixLength() {
return fuzzyPrefixLength;
}
/**
* Set the prefix length for fuzzy queries. Default is 0.
* @param fuzzyPrefixLength The fuzzyPrefixLength to set.
*/
public void setFuzzyPrefixLength(int fuzzyPrefixLength) {
this.fuzzyPrefixLength = fuzzyPrefixLength;
}
/**
* Sets the default slop for phrases. If zero, then exact phrase matches
* are required. Default value is zero.
@@ -171,32 +189,43 @@ public class QueryParser implements QueryParserConstants {
return phraseSlop;
}
/**
* Sets the boolean operator of the QueryParser.
* In classic mode (<code>DEFAULT_OPERATOR_OR</code>) terms without any modifiers
* are considered optional: for example <code>capital of Hungary</code> is equal to
* <code>capital OR of OR Hungary</code>.<br/>
* In <code>DEFAULT_OPERATOR_AND</code> terms are considered to be in conjuction: the
* above mentioned query is parsed as <code>capital AND of AND Hungary</code>
*/
public void setOperator(int operator) {
this.operator = operator;
}
/**
* Gets implicit operator setting, which will be either DEFAULT_OPERATOR_AND
* or DEFAULT_OPERATOR_OR.
* Sets the boolean operator of the QueryParser.
* In default mode (<code>OR_OPERATOR</code>) terms without any modifiers
* are considered optional: for example <code>capital of Hungary</code> is equal to
* <code>capital OR of OR Hungary</code>.<br/>
* In <code>AND_OPERATOR</code> mode terms are considered to be in conjuction: the
* above mentioned query is parsed as <code>capital AND of AND Hungary</code>
*/
public int getOperator() {
public void setDefaultOperator(Operator op) {
this.operator = op;
}
/**
* Gets implicit operator setting, which will be either AND_OPERATOR
* or OR_OPERATOR.
*/
public Operator getDefaultOperator() {
return operator;
}
public void setLowercaseWildcardTerms(boolean lowercaseWildcardTerms) {
this.lowercaseWildcardTerms = lowercaseWildcardTerms;
/**
* Whether terms of wildcard, prefix, fuzzy and range queries are to be automatically
* lower-cased or not. Default is <code>true</code>.
*/
public void setLowercaseExpandedTerms(boolean lowercaseExpandedTerms) {
this.lowercaseExpandedTerms = lowercaseExpandedTerms;
}
public boolean getLowercaseWildcardTerms() {
return lowercaseWildcardTerms;
/**
* @see #setLowercaseExpandedTerms(boolean)
*/
public boolean getLowercaseExpandedTerms() {
return lowercaseExpandedTerms;
}
/**
@@ -213,7 +242,7 @@ public class QueryParser implements QueryParserConstants {
return locale;
}
protected void addClause(Vector clauses, int conj, int mods, Query q) {
protected void addClause(Vector clauses, int conj, int mods, Query q) {
boolean required, prohibited;
// If this term is introduced by AND, make the preceding term required,
@@ -221,17 +250,17 @@ public class QueryParser implements QueryParserConstants {
if (clauses.size() > 0 && conj == CONJ_AND) {
BooleanClause c = (BooleanClause) clauses.elementAt(clauses.size()-1);
if (!c.isProhibited())
c.setOccur(Occur.MUST);
c.setOccur(BooleanClause.Occur.MUST);
}
if (clauses.size() > 0 && operator == DEFAULT_OPERATOR_AND && conj == CONJ_OR) {
if (clauses.size() > 0 && operator == AND_OPERATOR && conj == CONJ_OR) {
// If this term is introduced by OR, make the preceding term optional,
// unless it's prohibited (that means we leave -a OR b but +a OR b-->a OR b)
// notice if the input is a OR b, first term is parsed as required; without
// this modification a OR b would parsed as +a OR b
BooleanClause c = (BooleanClause) clauses.elementAt(clauses.size()-1);
if (!c.isProhibited())
c.setOccur(Occur.SHOULD);
c.setOccur(BooleanClause.Occur.SHOULD);
}
// We might have been passed a null query; the term might have been
@@ -239,7 +268,7 @@ public class QueryParser implements QueryParserConstants {
if (q == null)
return;
if (operator == DEFAULT_OPERATOR_OR) {
if (operator == OR_OPERATOR) {
// We set REQUIRED if we're introduced by AND or +; PROHIBITED if
// introduced by NOT or -; make sure not to set both.
prohibited = (mods == MOD_NOT);
@@ -253,31 +282,16 @@ public class QueryParser implements QueryParserConstants {
prohibited = (mods == MOD_NOT);
required = (!prohibited && conj != CONJ_OR);
}
Occur occur = Occur.SHOULD;
if(prohibited)
{
occur = Occur.MUST_NOT;
}
if(required)
{
occur = Occur.MUST;
}
clauses.addElement(new BooleanClause(q, occur));
if (required && !prohibited)
clauses.addElement(new BooleanClause(q, BooleanClause.Occur.MUST));
else if (!required && !prohibited)
clauses.addElement(new BooleanClause(q, BooleanClause.Occur.SHOULD));
else if (!required && prohibited)
clauses.addElement(new BooleanClause(q, BooleanClause.Occur.MUST_NOT));
else
throw new RuntimeException("Clause cannot be both required and prohibited");
}
/**
* Note that parameter analyzer is ignored. Calls inside the parser always
* use class member analyser. This method will be deprecated and substituted
* by {@link #getFieldQuery(String, String)} in future versions of Lucene.
* Currently overwriting either of these methods works.
*
* @exception ParseException throw in overridden method to disallow
*/
protected Query getFieldQuery(String field,
Analyzer analyzer,
String queryText) throws ParseException {
return getFieldQuery(field, queryText);
}
/**
* @exception ParseException throw in overridden method to disallow
@@ -286,10 +300,11 @@ public class QueryParser implements QueryParserConstants {
// Use the analyzer to get all the tokens, and then build a TermQuery,
// PhraseQuery, or nothing based on the term count
TokenStream source = analyzer.tokenStream(field,
new StringReader(queryText));
TokenStream source = analyzer.tokenStream(field, new StringReader(queryText));
Vector v = new Vector();
org.apache.lucene.analysis.Token t;
int positionCount = 0;
boolean severalTokensAtSamePosition = false;
while (true) {
try {
@@ -300,7 +315,11 @@ public class QueryParser implements QueryParserConstants {
}
if (t == null)
break;
v.addElement(t.termText());
v.addElement(t);
if (t.getPositionIncrement() != 0)
positionCount += t.getPositionIncrement();
else
severalTokensAtSamePosition = true;
}
try {
source.close();
@@ -311,36 +330,52 @@ public class QueryParser implements QueryParserConstants {
if (v.size() == 0)
return null;
else if (v.size() == 1)
return new TermQuery(new Term(field, (String) v.elementAt(0)));
else {
PhraseQuery q = new PhraseQuery();
q.setSlop(phraseSlop);
for (int i=0; i<v.size(); i++) {
q.add(new Term(field, (String) v.elementAt(i)));
else if (v.size() == 1) {
t = (org.apache.lucene.analysis.Token) v.elementAt(0);
return new TermQuery(new Term(field, t.termText()));
} else {
if (severalTokensAtSamePosition) {
if (positionCount == 1) {
// no phrase query:
BooleanQuery q = new BooleanQuery(true);
for (int i = 0; i < v.size(); i++) {
t = (org.apache.lucene.analysis.Token) v.elementAt(i);
TermQuery currentQuery = new TermQuery(
new Term(field, t.termText()));
q.add(currentQuery, BooleanClause.Occur.SHOULD);
}
return q;
}
else {
// phrase query:
MultiPhraseQuery mpq = new MultiPhraseQuery();
mpq.setSlop(phraseSlop);
List multiTerms = new ArrayList();
for (int i = 0; i < v.size(); i++) {
t = (org.apache.lucene.analysis.Token) v.elementAt(i);
if (t.getPositionIncrement() == 1 && multiTerms.size() > 0) {
mpq.add((Term[])multiTerms.toArray(new Term[0]));
multiTerms.clear();
}
multiTerms.add(new Term(field, t.termText()));
}
mpq.add((Term[])multiTerms.toArray(new Term[0]));
return mpq;
}
}
else {
PhraseQuery q = new PhraseQuery();
q.setSlop(phraseSlop);
for (int i = 0; i < v.size(); i++) {
q.add(new Term(field, ((org.apache.lucene.analysis.Token)
v.elementAt(i)).termText()));
}
return q;
}
return q;
}
}
/**
* Base implementation delegates to {@link #getFieldQuery(String, Analyzer, String)}.
* This method may be overwritten, for example, to return
* a SpanNearQuery instead of a PhraseQuery.
*
* Note that parameter analyzer is ignored. Calls inside the parser always
* use class member analyser. This method will be deprecated and substituted
* by {@link #getFieldQuery(String, String, int)} in future versions of Lucene.
* Currently overwriting either of these methods works.
*
* @exception ParseException throw in overridden method to disallow
*/
protected Query getFieldQuery(String field,
Analyzer analyzer,
String queryText,
int slop) throws ParseException {
return getFieldQuery(field, queryText, slop);
}
/**
* Base implementation delegates to {@link #getFieldQuery(String,String)}.
@@ -356,25 +391,13 @@ public class QueryParser implements QueryParserConstants {
if (query instanceof PhraseQuery) {
((PhraseQuery) query).setSlop(slop);
}
if (query instanceof MultiPhraseQuery) {
((MultiPhraseQuery) query).setSlop(slop);
}
return query;
}
/**
* Note that parameter analyzer is ignored. Calls inside the parser always
* use class member analyser. This method will be deprecated and substituted
* by {@link #getRangeQuery(String, String, String, boolean)} in future versions of Lucene.
* Currently overwriting either of these methods works.
*
* @exception ParseException throw in overridden method to disallow
*/
protected Query getRangeQuery(String field,
Analyzer analyzer,
String part1,
String part2,
boolean inclusive) throws ParseException {
return getRangeQuery(field, part1, part2, inclusive);
}
/**
* @exception ParseException throw in overridden method to disallow
@@ -384,11 +407,27 @@ public class QueryParser implements QueryParserConstants {
String part2,
boolean inclusive) throws ParseException
{
if (lowercaseExpandedTerms) {
part1 = part1.toLowerCase();
part2 = part2.toLowerCase();
}
try {
DateFormat df = DateFormat.getDateInstance(DateFormat.SHORT, locale);
df.setLenient(true);
Date d1 = df.parse(part1);
Date d2 = df.parse(part2);
if (inclusive) {
// The user can only specify the date, not the time, so make sure
// the time is set to the latest possible time of that date to really
// include all documents:
Calendar cal = Calendar.getInstance(locale);
cal.setTime(d2);
cal.set(Calendar.HOUR_OF_DAY, 23);
cal.set(Calendar.MINUTE, 59);
cal.set(Calendar.SECOND, 59);
cal.set(Calendar.MILLISECOND, 999);
d2 = cal.getTime();
}
part1 = DateField.dateToString(d1);
part2 = DateField.dateToString(d2);
}
@@ -412,9 +451,28 @@ public class QueryParser implements QueryParserConstants {
* @return Resulting {@link Query} object.
* @exception ParseException throw in overridden method to disallow
*/
protected Query getBooleanQuery(Vector clauses) throws ParseException
protected Query getBooleanQuery(Vector clauses) throws ParseException {
return getBooleanQuery(clauses, false);
}
/**
* Factory method for generating query, given a set of clauses.
* By default creates a boolean query composed of clauses passed in.
*
* Can be overridden by extending classes, to modify query being
* returned.
*
* @param clauses Vector that contains {@link BooleanClause} instances
* to join.
* @param disableCoord true if coord scoring should be disabled.
*
* @return Resulting {@link Query} object.
* @exception ParseException throw in overridden method to disallow
*/
protected Query getBooleanQuery(Vector clauses, boolean disableCoord)
throws ParseException
{
BooleanQuery query = new BooleanQuery();
BooleanQuery query = new BooleanQuery(disableCoord);
for (int i = 0; i < clauses.size(); i++) {
query.add((BooleanClause)clauses.elementAt(i));
}
@@ -444,8 +502,8 @@ public class QueryParser implements QueryParserConstants {
*/
protected Query getWildcardQuery(String field, String termStr) throws ParseException
{
if (lowercaseWildcardTerms) {
termStr = termStr.toLowerCase();
if (lowercaseExpandedTerms) {
termStr = termStr.toLowerCase();
}
Term t = new Term(field, termStr);
return new WildcardQuery(t);
@@ -453,7 +511,7 @@ public class QueryParser implements QueryParserConstants {
/**
* Factory method for generating a query (similar to
* ({@link #getWildcardQuery}). Called when parser parses an input term
* {@link #getWildcardQuery}). Called when parser parses an input term
* token that uses prefix notation; that is, contains a single '*' wildcard
* character as its last character. Since this is a special case
* of generic wildcard term, and such a query can be optimized easily,
@@ -476,16 +534,17 @@ public class QueryParser implements QueryParserConstants {
*/
protected Query getPrefixQuery(String field, String termStr) throws ParseException
{
if (lowercaseWildcardTerms) {
termStr = termStr.toLowerCase();
if (lowercaseExpandedTerms) {
termStr = termStr.toLowerCase();
}
Term t = new Term(field, termStr);
return new PrefixQuery(t);
}
/**
/**
* Factory method for generating a query (similar to
* ({@link #getWildcardQuery}). Called when parser parses
* {@link #getWildcardQuery}). Called when parser parses
* an input term token that has the fuzzy suffix (~) appended.
*
* @param field Name of the field query will use.
@@ -494,26 +553,13 @@ public class QueryParser implements QueryParserConstants {
* @return Resulting {@link Query} built for the term
* @exception ParseException throw in overridden method to disallow
*/
protected Query getFuzzyQuery(String field, String termStr) throws ParseException {
return getFuzzyQuery(field, termStr, fuzzyMinSim);
}
/**
* Factory method for generating a query (similar to
* ({@link #getWildcardQuery}). Called when parser parses
* an input term token that has the fuzzy suffix (~floatNumber) appended.
*
* @param field Name of the field query will use.
* @param termStr Term token to use for building term for the query
* @param minSimilarity the minimum similarity required for a fuzzy match
*
* @return Resulting {@link Query} built for the term
* @exception ParseException throw in overridden method to disallow
*/
protected Query getFuzzyQuery(String field, String termStr, float minSimilarity) throws ParseException
{
if (lowercaseExpandedTerms) {
termStr = termStr.toLowerCase();
}
Term t = new Term(field, termStr);
return new FuzzyQuery(t, minSimilarity);
return new FuzzyQuery(t, minSimilarity, fuzzyPrefixLength);
}
/**
@@ -534,10 +580,10 @@ public class QueryParser implements QueryParserConstants {
/**
* Returns a String where those characters that QueryParser
* expects to be escaped are escaped, i.e. preceded by a <code>\</code>.
* expects to be escaped are escaped by a preceding <code>\</code>.
*/
public static String escape(String s) {
StringBuilder sb = new StringBuilder(s.length());
StringBuffer sb = new StringBuffer();
for (int i = 0; i < s.length(); i++) {
char c = s.charAt(i);
// NOTE: keep this in sync with _ESCAPED_CHAR below!
@@ -551,7 +597,16 @@ public class QueryParser implements QueryParserConstants {
return sb.toString();
}
/**
* Command line tool to test QueryParser, using {@link org.apache.lucene.analysis.SimpleAnalyzer}.
* Usage:<br>
* <code>java org.apache.lucene.queryParser.QueryParser &lt;input&gt;</code>
*/
public static void main(String[] args) throws Exception {
if (args.length == 0) {
System.out.println("Usage: java org.apache.lucene.queryParser.QueryParser <input>");
System.exit(0);
}
QueryParser qp = new QueryParser("field",
new org.apache.lucene.analysis.SimpleAnalyzer());
Query q = qp.parse(args[0]);
@@ -789,12 +844,9 @@ public class QueryParser implements QueryParserConstants {
if(fms < 0.0f || fms > 1.0f){
{if (true) throw new ParseException("Minimum similarity for a FuzzyQuery has to be between 0.0f and 1.0f !");}
}
if(fms == fuzzyMinSim)
q = getFuzzyQuery(field, termImage);
else
q = getFuzzyQuery(field, termImage, fms);
q = getFuzzyQuery(field, termImage,fms);
} else {
q = getFieldQuery(field, analyzer, termImage);
q = getFieldQuery(field, termImage);
}
break;
case RANGEIN_START:
@@ -851,7 +903,7 @@ public class QueryParser implements QueryParserConstants {
} else {
goop2.image = discardEscapeChar(goop2.image);
}
q = getRangeQuery(field, analyzer, goop1.image, goop2.image, true);
q = getRangeQuery(field, goop1.image, goop2.image, true);
break;
case RANGEEX_START:
jj_consume_token(RANGEEX_START);
@@ -908,7 +960,7 @@ public class QueryParser implements QueryParserConstants {
goop2.image = discardEscapeChar(goop2.image);
}
q = getRangeQuery(field, analyzer, goop1.image, goop2.image, false);
q = getRangeQuery(field, goop1.image, goop2.image, false);
break;
case QUOTED:
term = jj_consume_token(QUOTED);
@@ -937,7 +989,7 @@ public class QueryParser implements QueryParserConstants {
}
catch (Exception ignored) { }
}
q = getFieldQuery(field, analyzer, term.image.substring(1, term.image.length()-1), s);
q = getFieldQuery(field, term.image.substring(1, term.image.length()-1), s);
break;
default:
jj_la1[21] = jj_gen;
@@ -1183,6 +1235,7 @@ public class QueryParser implements QueryParserConstants {
final private void jj_rescan_token() {
jj_rescan = true;
for (int i = 0; i < 1; i++) {
try {
JJCalls p = jj_2_rtns[i];
do {
if (p.gen > jj_gen) {
@@ -1193,6 +1246,7 @@ public class QueryParser implements QueryParserConstants {
}
p = p.next;
} while (p != null);
} catch(LookaheadSuccess ls) { }
}
jj_rescan = false;
}

View File

@@ -32,10 +32,11 @@ import org.apache.lucene.index.Term;
import org.apache.lucene.analysis.*;
import org.apache.lucene.document.*;
import org.apache.lucene.search.*;
import org.apache.lucene.util.Parameter;
/**
* This class is generated by JavaCC. The only method that clients should need
* to call is <a href="#parse">parse()</a>.
* This class is generated by JavaCC. The most important method is
* {@link #parse(String)}.
*
* The syntax for query strings is as follows:
* A Query is a series of clauses.
@@ -63,9 +64,21 @@ import org.apache.lucene.search.*;
*
* <p>
* Examples of appropriately formatted queries can be found in the <a
* href="http://jakarta.apache.org/lucene/src/test/org/apache/lucene/queryParser/TestQueryParser.java">test cases</a>.
* href="http://lucene.apache.org/java/docs/queryparsersyntax.html">query syntax
* documentation</a>.
* </p>
*
* <p>In {@link RangeQuery}s, QueryParser tries to detect date values, e.g. <tt>date:[6/1/2005 TO 6/4/2005]</tt>
* produces a range query that searches for "date" fields between 2005-06-01 and 2005-06-04. Note
* that the format of the accpeted input depends on {@link #setLocale(Locale) the locale}. This
* feature also assumes that your index uses the {@link DateField} class to store dates.
* If you use a different format (e.g. {@link DateTools}) and you still want QueryParser
* to turn local dates in range queries into valid queries you need to create your own
* query parser that inherits QueryParser and overwrites
* {@link #getRangeQuery(String, String, String, boolean)}.</p>
*
* <p>Note that QueryParser is <em>not</em> thread-safe.</p>
*
* @author Brian Goetz
* @author Peter Halacsy
* @author Tatu Saloranta
@@ -81,36 +94,37 @@ public class QueryParser {
private static final int MOD_NOT = 10;
private static final int MOD_REQ = 11;
public static final int DEFAULT_OPERATOR_OR = 0;
public static final int DEFAULT_OPERATOR_AND = 1;
// make it possible to call setDefaultOperator() without accessing
// the nested class:
/** Alternative form of QueryParser.Operator.AND */
public static final Operator AND_OPERATOR = Operator.AND;
/** Alternative form of QueryParser.Operator.OR */
public static final Operator OR_OPERATOR = Operator.OR;
/** The actual operator that parser uses to combine query terms */
private int operator = DEFAULT_OPERATOR_OR;
private Operator operator = OR_OPERATOR;
/**
* Whether terms of wildcard and prefix queries are to be automatically
* lower-cased or not. Default is <code>true</code>.
*/
boolean lowercaseWildcardTerms = true;
boolean lowercaseExpandedTerms = true;
Analyzer analyzer;
String field;
int phraseSlop = 0;
float fuzzyMinSim = FuzzyQuery.defaultMinSimilarity;
int fuzzyPrefixLength = FuzzyQuery.defaultPrefixLength;
Locale locale = Locale.getDefault();
/** Parses a query string, returning a {@link org.apache.lucene.search.Query}.
* @param query the query string to be parsed.
* @param field the default field for query terms.
* @param analyzer used to find terms in the query text.
* @throws ParseException if the parsing fails
/** The default operator for parsing queries.
* Use {@link QueryParser#setDefaultOperator} to change it.
*/
static public Query parse(String query, String field, Analyzer analyzer)
throws ParseException {
QueryParser parser = new QueryParser(field, analyzer);
return parser.parse(query);
static public final class Operator extends Parameter {
private Operator(String name) {
super(name);
}
static public final Operator OR = new Operator("OR");
static public final Operator AND = new Operator("AND");
}
/** Constructs a query parser.
* @param f the default field for query terms.
* @param a used to find terms in the query text.
@@ -121,8 +135,7 @@ public class QueryParser {
field = f;
}
/** Parses a query string, returning a
* <a href="lucene.search.Query.html">Query</a>.
/** Parses a query string, returning a {@link org.apache.lucene.search.Query}.
* @param query the query string to be parsed.
* @throws ParseException if the parsing fails
*/
@@ -154,18 +167,36 @@ public class QueryParser {
}
/**
* Get the default minimal similarity for fuzzy queries.
* Get the minimal similarity for fuzzy queries.
*/
public float getFuzzyMinSim() {
return fuzzyMinSim;
}
/**
*Set the default minimum similarity for fuzzy queries.
* Set the minimum similarity for fuzzy queries.
* Default is 0.5f.
*/
public void setFuzzyMinSim(float fuzzyMinSim) {
this.fuzzyMinSim = fuzzyMinSim;
}
/**
* Get the prefix length for fuzzy queries.
* @return Returns the fuzzyPrefixLength.
*/
public int getFuzzyPrefixLength() {
return fuzzyPrefixLength;
}
/**
* Set the prefix length for fuzzy queries. Default is 0.
* @param fuzzyPrefixLength The fuzzyPrefixLength to set.
*/
public void setFuzzyPrefixLength(int fuzzyPrefixLength) {
this.fuzzyPrefixLength = fuzzyPrefixLength;
}
/**
* Sets the default slop for phrases. If zero, then exact phrase matches
* are required. Default value is zero.
@@ -181,32 +212,43 @@ public class QueryParser {
return phraseSlop;
}
/**
* Sets the boolean operator of the QueryParser.
* In classic mode (<code>DEFAULT_OPERATOR_OR</code>) terms without any modifiers
* are considered optional: for example <code>capital of Hungary</code> is equal to
* <code>capital OR of OR Hungary</code>.<br/>
* In <code>DEFAULT_OPERATOR_AND</code> terms are considered to be in conjuction: the
* above mentioned query is parsed as <code>capital AND of AND Hungary</code>
*/
public void setOperator(int operator) {
this.operator = operator;
}
/**
* Gets implicit operator setting, which will be either DEFAULT_OPERATOR_AND
* or DEFAULT_OPERATOR_OR.
* Sets the boolean operator of the QueryParser.
* In default mode (<code>OR_OPERATOR</code>) terms without any modifiers
* are considered optional: for example <code>capital of Hungary</code> is equal to
* <code>capital OR of OR Hungary</code>.<br/>
* In <code>AND_OPERATOR</code> mode terms are considered to be in conjuction: the
* above mentioned query is parsed as <code>capital AND of AND Hungary</code>
*/
public int getOperator() {
public void setDefaultOperator(Operator op) {
this.operator = op;
}
/**
* Gets implicit operator setting, which will be either AND_OPERATOR
* or OR_OPERATOR.
*/
public Operator getDefaultOperator() {
return operator;
}
public void setLowercaseWildcardTerms(boolean lowercaseWildcardTerms) {
this.lowercaseWildcardTerms = lowercaseWildcardTerms;
/**
* Whether terms of wildcard, prefix, fuzzy and range queries are to be automatically
* lower-cased or not. Default is <code>true</code>.
*/
public void setLowercaseExpandedTerms(boolean lowercaseExpandedTerms) {
this.lowercaseExpandedTerms = lowercaseExpandedTerms;
}
public boolean getLowercaseWildcardTerms() {
return lowercaseWildcardTerms;
/**
* @see #setLowercaseExpandedTerms(boolean)
*/
public boolean getLowercaseExpandedTerms() {
return lowercaseExpandedTerms;
}
/**
@@ -223,25 +265,25 @@ public class QueryParser {
return locale;
}
protected void addClause(Vector clauses, int conj, int mods, Query q) {
protected void addClause(Vector clauses, int conj, int mods, Query q) {
boolean required, prohibited;
// If this term is introduced by AND, make the preceding term required,
// unless it's already prohibited
if (clauses.size() > 0 && conj == CONJ_AND) {
BooleanClause c = (BooleanClause) clauses.elementAt(clauses.size()-1);
if (!c.prohibited)
c.required = true;
if (!c.isProhibited())
c.setOccur(BooleanClause.Occur.MUST);
}
if (clauses.size() > 0 && operator == DEFAULT_OPERATOR_AND && conj == CONJ_OR) {
if (clauses.size() > 0 && operator == AND_OPERATOR && conj == CONJ_OR) {
// If this term is introduced by OR, make the preceding term optional,
// unless it's prohibited (that means we leave -a OR b but +a OR b-->a OR b)
// notice if the input is a OR b, first term is parsed as required; without
// this modification a OR b would parsed as +a OR b
BooleanClause c = (BooleanClause) clauses.elementAt(clauses.size()-1);
if (!c.prohibited)
c.required = false;
if (!c.isProhibited())
c.setOccur(BooleanClause.Occur.SHOULD);
}
// We might have been passed a null query; the term might have been
@@ -249,7 +291,7 @@ public class QueryParser {
if (q == null)
return;
if (operator == DEFAULT_OPERATOR_OR) {
if (operator == OR_OPERATOR) {
// We set REQUIRED if we're introduced by AND or +; PROHIBITED if
// introduced by NOT or -; make sure not to set both.
prohibited = (mods == MOD_NOT);
@@ -263,22 +305,16 @@ public class QueryParser {
prohibited = (mods == MOD_NOT);
required = (!prohibited && conj != CONJ_OR);
}
clauses.addElement(new BooleanClause(q, required, prohibited));
if (required && !prohibited)
clauses.addElement(new BooleanClause(q, BooleanClause.Occur.MUST));
else if (!required && !prohibited)
clauses.addElement(new BooleanClause(q, BooleanClause.Occur.SHOULD));
else if (!required && prohibited)
clauses.addElement(new BooleanClause(q, BooleanClause.Occur.MUST_NOT));
else
throw new RuntimeException("Clause cannot be both required and prohibited");
}
/**
* Note that parameter analyzer is ignored. Calls inside the parser always
* use class member analyser. This method will be deprecated and substituted
* by {@link #getFieldQuery(String, String)} in future versions of Lucene.
* Currently overwriting either of these methods works.
*
* @exception ParseException throw in overridden method to disallow
*/
protected Query getFieldQuery(String field,
Analyzer analyzer,
String queryText) throws ParseException {
return getFieldQuery(field, queryText);
}
/**
* @exception ParseException throw in overridden method to disallow
@@ -287,10 +323,11 @@ public class QueryParser {
// Use the analyzer to get all the tokens, and then build a TermQuery,
// PhraseQuery, or nothing based on the term count
TokenStream source = analyzer.tokenStream(field,
new StringReader(queryText));
TokenStream source = analyzer.tokenStream(field, new StringReader(queryText));
Vector v = new Vector();
org.apache.lucene.analysis.Token t;
int positionCount = 0;
boolean severalTokensAtSamePosition = false;
while (true) {
try {
@@ -301,7 +338,11 @@ public class QueryParser {
}
if (t == null)
break;
v.addElement(t.termText());
v.addElement(t);
if (t.getPositionIncrement() != 0)
positionCount += t.getPositionIncrement();
else
severalTokensAtSamePosition = true;
}
try {
source.close();
@@ -312,36 +353,52 @@ public class QueryParser {
if (v.size() == 0)
return null;
else if (v.size() == 1)
return new TermQuery(new Term(field, (String) v.elementAt(0)));
else {
PhraseQuery q = new PhraseQuery();
q.setSlop(phraseSlop);
for (int i=0; i<v.size(); i++) {
q.add(new Term(field, (String) v.elementAt(i)));
else if (v.size() == 1) {
t = (org.apache.lucene.analysis.Token) v.elementAt(0);
return new TermQuery(new Term(field, t.termText()));
} else {
if (severalTokensAtSamePosition) {
if (positionCount == 1) {
// no phrase query:
BooleanQuery q = new BooleanQuery(true);
for (int i = 0; i < v.size(); i++) {
t = (org.apache.lucene.analysis.Token) v.elementAt(i);
TermQuery currentQuery = new TermQuery(
new Term(field, t.termText()));
q.add(currentQuery, BooleanClause.Occur.SHOULD);
}
return q;
}
else {
// phrase query:
MultiPhraseQuery mpq = new MultiPhraseQuery();
mpq.setSlop(phraseSlop);
List multiTerms = new ArrayList();
for (int i = 0; i < v.size(); i++) {
t = (org.apache.lucene.analysis.Token) v.elementAt(i);
if (t.getPositionIncrement() == 1 && multiTerms.size() > 0) {
mpq.add((Term[])multiTerms.toArray(new Term[0]));
multiTerms.clear();
}
multiTerms.add(new Term(field, t.termText()));
}
mpq.add((Term[])multiTerms.toArray(new Term[0]));
return mpq;
}
}
else {
PhraseQuery q = new PhraseQuery();
q.setSlop(phraseSlop);
for (int i = 0; i < v.size(); i++) {
q.add(new Term(field, ((org.apache.lucene.analysis.Token)
v.elementAt(i)).termText()));
}
return q;
}
return q;
}
}
/**
* Base implementation delegates to {@link #getFieldQuery(String, Analyzer, String)}.
* This method may be overwritten, for example, to return
* a SpanNearQuery instead of a PhraseQuery.
*
* Note that parameter analyzer is ignored. Calls inside the parser always
* use class member analyser. This method will be deprecated and substituted
* by {@link #getFieldQuery(String, String, int)} in future versions of Lucene.
* Currently overwriting either of these methods works.
*
* @exception ParseException throw in overridden method to disallow
*/
protected Query getFieldQuery(String field,
Analyzer analyzer,
String queryText,
int slop) throws ParseException {
return getFieldQuery(field, queryText, slop);
}
/**
* Base implementation delegates to {@link #getFieldQuery(String,String)}.
@@ -357,25 +414,13 @@ public class QueryParser {
if (query instanceof PhraseQuery) {
((PhraseQuery) query).setSlop(slop);
}
if (query instanceof MultiPhraseQuery) {
((MultiPhraseQuery) query).setSlop(slop);
}
return query;
}
/**
* Note that parameter analyzer is ignored. Calls inside the parser always
* use class member analyser. This method will be deprecated and substituted
* by {@link #getRangeQuery(String, String, String, boolean)} in future versions of Lucene.
* Currently overwriting either of these methods works.
*
* @exception ParseException throw in overridden method to disallow
*/
protected Query getRangeQuery(String field,
Analyzer analyzer,
String part1,
String part2,
boolean inclusive) throws ParseException {
return getRangeQuery(field, part1, part2, inclusive);
}
/**
* @exception ParseException throw in overridden method to disallow
@@ -385,11 +430,27 @@ public class QueryParser {
String part2,
boolean inclusive) throws ParseException
{
if (lowercaseExpandedTerms) {
part1 = part1.toLowerCase();
part2 = part2.toLowerCase();
}
try {
DateFormat df = DateFormat.getDateInstance(DateFormat.SHORT, locale);
df.setLenient(true);
Date d1 = df.parse(part1);
Date d2 = df.parse(part2);
if (inclusive) {
// The user can only specify the date, not the time, so make sure
// the time is set to the latest possible time of that date to really
// include all documents:
Calendar cal = Calendar.getInstance(locale);
cal.setTime(d2);
cal.set(Calendar.HOUR_OF_DAY, 23);
cal.set(Calendar.MINUTE, 59);
cal.set(Calendar.SECOND, 59);
cal.set(Calendar.MILLISECOND, 999);
d2 = cal.getTime();
}
part1 = DateField.dateToString(d1);
part2 = DateField.dateToString(d2);
}
@@ -413,10 +474,28 @@ public class QueryParser {
* @return Resulting {@link Query} object.
* @exception ParseException throw in overridden method to disallow
*/
protected Query getBooleanQuery(Vector clauses) throws ParseException
protected Query getBooleanQuery(Vector clauses) throws ParseException {
return getBooleanQuery(clauses, false);
}
/**
* Factory method for generating query, given a set of clauses.
* By default creates a boolean query composed of clauses passed in.
*
* Can be overridden by extending classes, to modify query being
* returned.
*
* @param clauses Vector that contains {@link BooleanClause} instances
* to join.
* @param disableCoord true if coord scoring should be disabled.
*
* @return Resulting {@link Query} object.
* @exception ParseException throw in overridden method to disallow
*/
protected Query getBooleanQuery(Vector clauses, boolean disableCoord)
throws ParseException
{
BooleanQuery query = new BooleanQuery();
query.
BooleanQuery query = new BooleanQuery(disableCoord);
for (int i = 0; i < clauses.size(); i++) {
query.add((BooleanClause)clauses.elementAt(i));
}
@@ -446,8 +525,8 @@ public class QueryParser {
*/
protected Query getWildcardQuery(String field, String termStr) throws ParseException
{
if (lowercaseWildcardTerms) {
termStr = termStr.toLowerCase();
if (lowercaseExpandedTerms) {
termStr = termStr.toLowerCase();
}
Term t = new Term(field, termStr);
return new WildcardQuery(t);
@@ -455,7 +534,7 @@ public class QueryParser {
/**
* Factory method for generating a query (similar to
* ({@link #getWildcardQuery}). Called when parser parses an input term
* {@link #getWildcardQuery}). Called when parser parses an input term
* token that uses prefix notation; that is, contains a single '*' wildcard
* character as its last character. Since this is a special case
* of generic wildcard term, and such a query can be optimized easily,
@@ -478,16 +557,17 @@ public class QueryParser {
*/
protected Query getPrefixQuery(String field, String termStr) throws ParseException
{
if (lowercaseWildcardTerms) {
termStr = termStr.toLowerCase();
if (lowercaseExpandedTerms) {
termStr = termStr.toLowerCase();
}
Term t = new Term(field, termStr);
return new PrefixQuery(t);
}
/**
/**
* Factory method for generating a query (similar to
* ({@link #getWildcardQuery}). Called when parser parses
* {@link #getWildcardQuery}). Called when parser parses
* an input term token that has the fuzzy suffix (~) appended.
*
* @param field Name of the field query will use.
@@ -496,26 +576,13 @@ public class QueryParser {
* @return Resulting {@link Query} built for the term
* @exception ParseException throw in overridden method to disallow
*/
protected Query getFuzzyQuery(String field, String termStr) throws ParseException {
return getFuzzyQuery(field, termStr, fuzzyMinSim);
}
/**
* Factory method for generating a query (similar to
* ({@link #getWildcardQuery}). Called when parser parses
* an input term token that has the fuzzy suffix (~floatNumber) appended.
*
* @param field Name of the field query will use.
* @param termStr Term token to use for building term for the query
* @param minSimilarity the minimum similarity required for a fuzzy match
*
* @return Resulting {@link Query} built for the term
* @exception ParseException throw in overridden method to disallow
*/
protected Query getFuzzyQuery(String field, String termStr, float minSimilarity) throws ParseException
{
if (lowercaseExpandedTerms) {
termStr = termStr.toLowerCase();
}
Term t = new Term(field, termStr);
return new FuzzyQuery(t, minSimilarity);
return new FuzzyQuery(t, minSimilarity, fuzzyPrefixLength);
}
/**
@@ -536,7 +603,7 @@ public class QueryParser {
/**
* Returns a String where those characters that QueryParser
* expects to be escaped are escaped, i.e. preceded by a <code>\</code>.
* expects to be escaped are escaped by a preceding <code>\</code>.
*/
public static String escape(String s) {
StringBuffer sb = new StringBuffer();
@@ -553,7 +620,16 @@ public class QueryParser {
return sb.toString();
}
/**
* Command line tool to test QueryParser, using {@link org.apache.lucene.analysis.SimpleAnalyzer}.
* Usage:<br>
* <code>java org.apache.lucene.queryParser.QueryParser &lt;input&gt;</code>
*/
public static void main(String[] args) throws Exception {
if (args.length == 0) {
System.out.println("Usage: java org.apache.lucene.queryParser.QueryParser <input>");
System.exit(0);
}
QueryParser qp = new QueryParser("field",
new org.apache.lucene.analysis.SimpleAnalyzer());
Query q = qp.parse(args[0]);
@@ -580,17 +656,18 @@ PARSER_END(QueryParser)
}
<DEFAULT, RangeIn, RangeEx> SKIP : {
<<_WHITESPACE>>
< <_WHITESPACE>>
}
// OG: to support prefix queries:
// http://nagoya.apache.org/bugzilla/show_bug.cgi?id=12137
// http://issues.apache.org/bugzilla/show_bug.cgi?id=12137
// Change from:
//
// | <WILDTERM: <_TERM_START_CHAR>
// (<_TERM_CHAR> | ( [ "*", "?" ] ))* >
// To:
//
// | <WILDTERM: (<_TERM_CHAR> | ( [ "*", "?" ] ))* >
// (<_TERM_START_CHAR> | [ "*", "?" ]) (<_TERM_CHAR> | ( [ "*", "?" ] ))* >
<DEFAULT> TOKEN : {
<AND: ("AND" | "&&") >
@@ -606,8 +683,7 @@ PARSER_END(QueryParser)
| <TERM: <_TERM_START_CHAR> (<_TERM_CHAR>)* >
| <FUZZY_SLOP: "~" ( (<_NUM_CHAR>)+ ( "." (<_NUM_CHAR>)+ )? )? >
| <PREFIXTERM: <_TERM_START_CHAR> (<_TERM_CHAR>)* "*" >
| <WILDTERM: (<_TERM_START_CHAR> | ( [ "*", "?" ] ))
(<_TERM_CHAR> | ( [ "*", "?" ] ))* >
| <WILDTERM: (<_TERM_START_CHAR> | [ "*", "?" ]) (<_TERM_CHAR> | ( [ "*", "?" ] ))* >
| <RANGEIN_START: "[" > : RangeIn
| <RANGEEX_START: "{" > : RangeEx
}
@@ -746,12 +822,9 @@ Query Term(String field) : {
if(fms < 0.0f || fms > 1.0f){
throw new ParseException("Minimum similarity for a FuzzyQuery has to be between 0.0f and 1.0f !");
}
if(fms == fuzzyMinSim)
q = getFuzzyQuery(field, termImage);
else
q = getFuzzyQuery(field, termImage, fms);
q = getFuzzyQuery(field, termImage,fms);
} else {
q = getFieldQuery(field, analyzer, termImage);
q = getFieldQuery(field, termImage);
}
}
| ( <RANGEIN_START> ( goop1=<RANGEIN_GOOP>|goop1=<RANGEIN_QUOTED> )
@@ -769,7 +842,7 @@ Query Term(String field) : {
} else {
goop2.image = discardEscapeChar(goop2.image);
}
q = getRangeQuery(field, analyzer, goop1.image, goop2.image, true);
q = getRangeQuery(field, goop1.image, goop2.image, true);
}
| ( <RANGEEX_START> ( goop1=<RANGEEX_GOOP>|goop1=<RANGEEX_QUOTED> )
[ <RANGEEX_TO> ] ( goop2=<RANGEEX_GOOP>|goop2=<RANGEEX_QUOTED> )
@@ -787,7 +860,7 @@ Query Term(String field) : {
goop2.image = discardEscapeChar(goop2.image);
}
q = getRangeQuery(field, analyzer, goop1.image, goop2.image, false);
q = getRangeQuery(field, goop1.image, goop2.image, false);
}
| term=<QUOTED>
[ fuzzySlop=<FUZZY_SLOP> ]
@@ -801,7 +874,7 @@ Query Term(String field) : {
}
catch (Exception ignored) { }
}
q = getFieldQuery(field, analyzer, term.image.substring(1, term.image.length()-1), s);
q = getFieldQuery(field, term.image.substring(1, term.image.length()-1), s);
}
)
{

View File

@@ -1,5 +1,14 @@
/* Generated By:JavaCC: Do not edit this line. QueryParserTokenManager.java */
package org.alfresco.repo.search.impl.lucene;
import java.util.Vector;
import java.io.*;
import java.text.*;
import java.util.*;
import org.apache.lucene.index.Term;
import org.apache.lucene.analysis.*;
import org.apache.lucene.document.*;
import org.apache.lucene.search.*;
import org.apache.lucene.util.Parameter;
public class QueryParserTokenManager implements QueryParserConstants
{
@@ -937,12 +946,10 @@ protected CharStream input_stream;
private final int[] jjrounds = new int[34];
private final int[] jjstateSet = new int[68];
protected char curChar;
public QueryParserTokenManager(CharStream stream)
{
public QueryParserTokenManager(CharStream stream){
input_stream = stream;
}
public QueryParserTokenManager(CharStream stream, int lexState)
{
public QueryParserTokenManager(CharStream stream, int lexState){
this(stream);
SwitchTo(lexState);
}

View File

@@ -1,21 +1,4 @@
/*
* Copyright (C) 2005 Alfresco, Inc.
*
* Licensed under the Mozilla Public License version 1.1
* with a permitted attribution clause. You may obtain a
* copy of the License at
*
* http://www.alfresco.org/legal/license.txt
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
* either express or implied. See the License for the specific
* language governing permissions and limitations under the
* License.
*/
/* Generated By:JavaCC: Do not edit this line. Token.java Version 3.0 */
package org.alfresco.repo.search.impl.lucene;
/* Generated By:JavaCC: Do not edit this line. Token.java Version 3.0 */package org.alfresco.repo.search.impl.lucene;
/**
* Describes the input token stream.

View File

@@ -38,7 +38,7 @@ public class TokenMgrError extends Error
* equivalents in the given string
*/
protected static final String addEscapes(String str) {
StringBuilder retval = new StringBuilder(str.length() + 8);
StringBuffer retval = new StringBuffer();
char ch;
for (int i = 0; i < str.length(); i++) {
switch (str.charAt(i))