mirror of
https://github.com/Alfresco/alfresco-community-repo.git
synced 2025-08-07 17:49:17 +00:00
Move lucene analysis into the DataModel project
git-svn-id: https://svn.alfresco.com/repos/alfresco-enterprise/alfresco/HEAD/root@20975 c4b6b30b-aa2e-2d43-bbcb-ca4b014f7261
This commit is contained in:
@@ -1,38 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2005-2010 Alfresco Software Limited.
|
||||
*
|
||||
* This file is part of Alfresco
|
||||
*
|
||||
* Alfresco is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Lesser General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* Alfresco is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package org.alfresco.repo.search.impl.lucene;
|
||||
|
||||
public enum AnalysisMode
|
||||
{
|
||||
DEFAULT
|
||||
,
|
||||
TOKENISE
|
||||
,
|
||||
IDENTIFIER
|
||||
,
|
||||
FUZZY
|
||||
,
|
||||
PREFIX
|
||||
,
|
||||
WILD
|
||||
,
|
||||
LIKE
|
||||
;
|
||||
|
||||
}
|
@@ -1,328 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2005-2010 Alfresco Software Limited.
|
||||
*
|
||||
* This file is part of Alfresco
|
||||
*
|
||||
* Alfresco is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Lesser General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* Alfresco is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package org.alfresco.repo.search.impl.lucene;
|
||||
|
||||
import java.io.Reader;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
import org.alfresco.model.ContentModel;
|
||||
import org.alfresco.repo.dictionary.IndexTokenisationMode;
|
||||
import org.alfresco.repo.search.MLAnalysisMode;
|
||||
import org.alfresco.repo.search.impl.lucene.analysis.AlfrescoStandardAnalyser;
|
||||
import org.alfresco.repo.search.impl.lucene.analysis.LongAnalyser;
|
||||
import org.alfresco.repo.search.impl.lucene.analysis.MLAnalayser;
|
||||
import org.alfresco.repo.search.impl.lucene.analysis.PathAnalyser;
|
||||
import org.alfresco.repo.search.impl.lucene.analysis.VerbatimAnalyser;
|
||||
import org.alfresco.repo.search.impl.lucene.analysis.VerbatimMLAnalayser;
|
||||
import org.alfresco.service.cmr.dictionary.DataTypeDefinition;
|
||||
import org.alfresco.service.cmr.dictionary.DictionaryService;
|
||||
import org.alfresco.service.cmr.dictionary.PropertyDefinition;
|
||||
import org.alfresco.service.namespace.QName;
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.WhitespaceAnalyzer;
|
||||
|
||||
/**
|
||||
* Analyse properties according to the property definition. The default is to use the standard tokeniser. The tokeniser
|
||||
* should not have been called when indexing properties that require no tokenisation. (tokenise should be set to false
|
||||
* when adding the field to the document)
|
||||
*
|
||||
* @author andyh
|
||||
*/
|
||||
|
||||
public class LuceneAnalyser extends Analyzer
|
||||
{
|
||||
private static Log s_logger = LogFactory.getLog(LuceneAnalyser.class);
|
||||
|
||||
// Dictinary service to look up analyser classes by data type and locale.
|
||||
private DictionaryService dictionaryService;
|
||||
|
||||
// If all else fails a fall back analyser
|
||||
private Analyzer defaultAnalyser;
|
||||
|
||||
// Cached analysers for non ML data types.
|
||||
private Map<String, Analyzer> analysers = new HashMap<String, Analyzer>();
|
||||
|
||||
private MLAnalysisMode mlAlaysisMode;
|
||||
|
||||
/**
|
||||
* Constructs with a default standard analyser
|
||||
*
|
||||
* @param defaultAnalyzer
|
||||
* Any fields not specifically defined to use a different analyzer will use the one provided here.
|
||||
*/
|
||||
public LuceneAnalyser(DictionaryService dictionaryService, MLAnalysisMode mlAlaysisMode)
|
||||
{
|
||||
this(new AlfrescoStandardAnalyser());
|
||||
this.dictionaryService = dictionaryService;
|
||||
this.mlAlaysisMode = mlAlaysisMode;
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructs with default analyzer.
|
||||
*
|
||||
* @param defaultAnalyzer
|
||||
* Any fields not specifically defined to use a different analyzer will use the one provided here.
|
||||
*/
|
||||
public LuceneAnalyser(Analyzer defaultAnalyser)
|
||||
{
|
||||
this.defaultAnalyser = defaultAnalyser;
|
||||
}
|
||||
|
||||
public TokenStream tokenStream(String fieldName, Reader reader, AnalysisMode analysisMode)
|
||||
{
|
||||
Analyzer analyser = (Analyzer) analysers.get(fieldName);
|
||||
if (analyser == null)
|
||||
{
|
||||
analyser = findAnalyser(fieldName, analysisMode);
|
||||
}
|
||||
return analyser.tokenStream(fieldName, reader);
|
||||
}
|
||||
|
||||
public TokenStream tokenStream(String fieldName, Reader reader)
|
||||
{
|
||||
return tokenStream(fieldName, reader, AnalysisMode.DEFAULT);
|
||||
}
|
||||
|
||||
/**
|
||||
* Pick the analyser from the field name
|
||||
*
|
||||
* @param fieldName
|
||||
* @return
|
||||
*/
|
||||
private Analyzer findAnalyser(String fieldName, AnalysisMode analysisMode)
|
||||
{
|
||||
Analyzer analyser;
|
||||
if (fieldName.equals("PATH"))
|
||||
{
|
||||
analyser = new PathAnalyser();
|
||||
}
|
||||
else if (fieldName.equals("QNAME"))
|
||||
{
|
||||
analyser = new PathAnalyser();
|
||||
}
|
||||
else if (fieldName.equals("PRIMARYASSOCTYPEQNAME"))
|
||||
{
|
||||
analyser = new PathAnalyser();
|
||||
}
|
||||
else if (fieldName.equals("ASSOCTYPEQNAME"))
|
||||
{
|
||||
analyser = new PathAnalyser();
|
||||
}
|
||||
else if (fieldName.equals("TYPE"))
|
||||
{
|
||||
throw new UnsupportedOperationException("TYPE must not be tokenised");
|
||||
}
|
||||
else if (fieldName.equals("ASPECT"))
|
||||
{
|
||||
throw new UnsupportedOperationException("ASPECT must not be tokenised");
|
||||
}
|
||||
else if (fieldName.equals("ANCESTOR"))
|
||||
{
|
||||
analyser = new WhitespaceAnalyzer();
|
||||
}
|
||||
else if (fieldName.startsWith("@"))
|
||||
{
|
||||
if (fieldName.endsWith(".mimetype"))
|
||||
{
|
||||
analyser = new VerbatimAnalyser();
|
||||
}
|
||||
else if (fieldName.endsWith(".size"))
|
||||
{
|
||||
analyser = new LongAnalyser();
|
||||
}
|
||||
else if (fieldName.endsWith(".locale"))
|
||||
{
|
||||
analyser = new VerbatimAnalyser(true);
|
||||
}
|
||||
else
|
||||
{
|
||||
QName propertyQName = QName.createQName(fieldName.substring(1));
|
||||
// Temporary fix for person and user uids
|
||||
|
||||
if (propertyQName.equals(ContentModel.PROP_USER_USERNAME)
|
||||
|| propertyQName.equals(ContentModel.PROP_USERNAME) || propertyQName.equals(ContentModel.PROP_AUTHORITY_NAME))
|
||||
{
|
||||
analyser = new VerbatimAnalyser(true);
|
||||
}
|
||||
else
|
||||
{
|
||||
PropertyDefinition propertyDef = dictionaryService.getProperty(propertyQName);
|
||||
IndexTokenisationMode tokenise = IndexTokenisationMode.TRUE;
|
||||
if (propertyDef != null)
|
||||
{
|
||||
DataTypeDefinition dataType = propertyDef.getDataType();
|
||||
tokenise = propertyDef.getIndexTokenisationMode();
|
||||
if (tokenise == null)
|
||||
{
|
||||
tokenise = IndexTokenisationMode.TRUE;
|
||||
}
|
||||
switch (tokenise)
|
||||
{
|
||||
case TRUE:
|
||||
if (dataType.getName().equals(DataTypeDefinition.CONTENT))
|
||||
{
|
||||
analyser = new MLAnalayser(dictionaryService, MLAnalysisMode.ALL_ONLY);
|
||||
}
|
||||
else if (dataType.getName().equals(DataTypeDefinition.TEXT))
|
||||
{
|
||||
analyser = new MLAnalayser(dictionaryService, MLAnalysisMode.ALL_ONLY);
|
||||
}
|
||||
else if (dataType.getName().equals(DataTypeDefinition.MLTEXT))
|
||||
{
|
||||
analyser = new MLAnalayser(dictionaryService, mlAlaysisMode);
|
||||
}
|
||||
else
|
||||
{
|
||||
analyser = loadAnalyzer(dataType);
|
||||
}
|
||||
break;
|
||||
case BOTH:
|
||||
switch (analysisMode)
|
||||
{
|
||||
case DEFAULT:
|
||||
case TOKENISE:
|
||||
if (dataType.getName().equals(DataTypeDefinition.CONTENT))
|
||||
{
|
||||
analyser = new MLAnalayser(dictionaryService, MLAnalysisMode.ALL_ONLY);
|
||||
}
|
||||
else if (dataType.getName().equals(DataTypeDefinition.TEXT))
|
||||
{
|
||||
analyser = new MLAnalayser(dictionaryService, MLAnalysisMode.ALL_ONLY);
|
||||
}
|
||||
else if (dataType.getName().equals(DataTypeDefinition.MLTEXT))
|
||||
{
|
||||
analyser = new MLAnalayser(dictionaryService, mlAlaysisMode);
|
||||
}
|
||||
else
|
||||
{
|
||||
analyser = loadAnalyzer(dataType);
|
||||
}
|
||||
break;
|
||||
case IDENTIFIER:
|
||||
if (dataType.getName().equals(DataTypeDefinition.MLTEXT))
|
||||
{
|
||||
analyser = new VerbatimMLAnalayser(mlAlaysisMode);
|
||||
}
|
||||
else
|
||||
{
|
||||
analyser = new VerbatimAnalyser();
|
||||
}
|
||||
break;
|
||||
default:
|
||||
throw new UnsupportedOperationException("TYPE must not be tokenised");
|
||||
}
|
||||
|
||||
break;
|
||||
case FALSE:
|
||||
// TODO: MLText verbatim analyser
|
||||
analyser = new VerbatimAnalyser();
|
||||
break;
|
||||
default:
|
||||
throw new UnsupportedOperationException("TYPE must not be tokenised");
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
switch (analysisMode)
|
||||
{
|
||||
case IDENTIFIER:
|
||||
analyser = new VerbatimAnalyser();
|
||||
break;
|
||||
case DEFAULT:
|
||||
case TOKENISE:
|
||||
DataTypeDefinition dataType = dictionaryService.getDataType(DataTypeDefinition.TEXT);
|
||||
analyser = loadAnalyzer(dataType);
|
||||
break;
|
||||
default:
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
analyser = defaultAnalyser;
|
||||
}
|
||||
analysers.put(fieldName, analyser);
|
||||
return analyser;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Find an instantiate an analyser. The shuld all be thread sade as Analyser.tokenStream should be re-entrant.
|
||||
*
|
||||
* @param dataType
|
||||
* @return
|
||||
*/
|
||||
private Analyzer loadAnalyzer(DataTypeDefinition dataType)
|
||||
{
|
||||
String analyserClassName = dataType.getAnalyserClassName().trim();
|
||||
try
|
||||
{
|
||||
Class<?> clazz = Class.forName(analyserClassName);
|
||||
Analyzer analyser = (Analyzer) clazz.newInstance();
|
||||
if (s_logger.isDebugEnabled())
|
||||
{
|
||||
s_logger.debug("Loaded " + analyserClassName + " for type " + dataType.getName());
|
||||
}
|
||||
return analyser;
|
||||
}
|
||||
catch (ClassNotFoundException e)
|
||||
{
|
||||
throw new RuntimeException("Unable to load analyser for property of type " + dataType.getName() + " using " + analyserClassName);
|
||||
}
|
||||
catch (InstantiationException e)
|
||||
{
|
||||
throw new RuntimeException("Unable to load analyser for property of type " + dataType.getName() + " using " + analyserClassName);
|
||||
}
|
||||
catch (IllegalAccessException e)
|
||||
{
|
||||
throw new RuntimeException("Unable to load analyser for property of type " + dataType.getName() + " using " + analyserClassName);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* For multilingual fields we separate the tokens for each instance to break phrase queries spanning different
|
||||
* languages etc.
|
||||
*/
|
||||
@Override
|
||||
public int getPositionIncrementGap(String fieldName)
|
||||
{
|
||||
if (fieldName.startsWith("@") && !fieldName.endsWith(".mimetype"))
|
||||
{
|
||||
QName propertyQName = QName.createQName(fieldName.substring(1));
|
||||
PropertyDefinition propertyDef = dictionaryService.getProperty(propertyQName);
|
||||
if (propertyDef != null)
|
||||
{
|
||||
if (propertyDef.getDataType().getName().equals(DataTypeDefinition.MLTEXT))
|
||||
{
|
||||
return 1000;
|
||||
}
|
||||
}
|
||||
}
|
||||
return super.getPositionIncrementGap(fieldName);
|
||||
}
|
||||
|
||||
}
|
@@ -1,67 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2005-2010 Alfresco Software Limited.
|
||||
*
|
||||
* This file is part of Alfresco
|
||||
*
|
||||
* Alfresco is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Lesser General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* Alfresco is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package org.alfresco.repo.search.impl.lucene.analysis;
|
||||
|
||||
import java.io.Reader;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.ISOLatin1AccentFilter;
|
||||
import org.apache.lucene.analysis.LowerCaseFilter;
|
||||
import org.apache.lucene.analysis.StopAnalyzer;
|
||||
import org.apache.lucene.analysis.StopFilter;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.standard.StandardFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardTokenizer;
|
||||
|
||||
|
||||
public class AlfrescoStandardAnalyser extends Analyzer
|
||||
{
|
||||
private Set stopSet;
|
||||
|
||||
/**
|
||||
* An array containing some common English words that are usually not useful for searching.
|
||||
*/
|
||||
public static final String[] STOP_WORDS = StopAnalyzer.ENGLISH_STOP_WORDS;
|
||||
|
||||
/** Builds an analyzer. */
|
||||
public AlfrescoStandardAnalyser()
|
||||
{
|
||||
this(STOP_WORDS);
|
||||
}
|
||||
|
||||
/** Builds an analyzer with the given stop words. */
|
||||
public AlfrescoStandardAnalyser(String[] stopWords)
|
||||
{
|
||||
stopSet = StopFilter.makeStopSet(stopWords);
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructs a {@link StandardTokenizer} filtered by a {@link StandardFilter}, a {@link LowerCaseFilter} and a {@link StopFilter}.
|
||||
*/
|
||||
public TokenStream tokenStream(String fieldName, Reader reader)
|
||||
{
|
||||
TokenStream result = new StandardTokenizer(reader);
|
||||
result = new AlfrescoStandardFilter(result);
|
||||
result = new LowerCaseFilter(result);
|
||||
result = new StopFilter(result, stopSet);
|
||||
result = new ISOLatin1AccentFilter(result);
|
||||
return result;
|
||||
}
|
||||
}
|
@@ -1,140 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2005-2010 Alfresco Software Limited.
|
||||
*
|
||||
* This file is part of Alfresco
|
||||
*
|
||||
* Alfresco is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Lesser General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* Alfresco is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package org.alfresco.repo.search.impl.lucene.analysis;
|
||||
|
||||
import java.util.LinkedList;
|
||||
import java.util.Queue;
|
||||
import java.util.StringTokenizer;
|
||||
|
||||
import org.apache.lucene.analysis.TokenFilter;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.standard.StandardAnalyzer;
|
||||
import org.apache.lucene.analysis.standard.StandardTokenizer;
|
||||
|
||||
public class AlfrescoStandardFilter extends TokenFilter
|
||||
{
|
||||
|
||||
/** Construct filtering <i>in</i>. */
|
||||
public AlfrescoStandardFilter(TokenStream in)
|
||||
{
|
||||
super(in);
|
||||
}
|
||||
|
||||
private static final String APOSTROPHE_TYPE = StandardTokenizer.TOKEN_TYPES[StandardTokenizer.APOSTROPHE];
|
||||
|
||||
private static final String ACRONYM_TYPE = StandardTokenizer.TOKEN_TYPES[StandardTokenizer.ACRONYM];
|
||||
|
||||
private static final String HOST_TYPE = StandardTokenizer.TOKEN_TYPES[StandardTokenizer.HOST];
|
||||
|
||||
private static final String ALPHANUM_TYPE = StandardTokenizer.TOKEN_TYPES[StandardTokenizer.ALPHANUM];
|
||||
|
||||
private Queue<org.apache.lucene.analysis.Token> hostTokens = null;
|
||||
|
||||
/**
|
||||
* Returns the next token in the stream, or null at EOS.
|
||||
* <p>
|
||||
* Removes <tt>'s</tt> from the end of words.
|
||||
* <p>
|
||||
* Removes dots from acronyms.
|
||||
* <p>
|
||||
* Splits host names ...
|
||||
*/
|
||||
public final org.apache.lucene.analysis.Token next() throws java.io.IOException
|
||||
{
|
||||
if (hostTokens == null)
|
||||
{
|
||||
org.apache.lucene.analysis.Token t = input.next();
|
||||
|
||||
if (t == null)
|
||||
return null;
|
||||
|
||||
String text = t.termText();
|
||||
String type = t.type();
|
||||
|
||||
if (type == APOSTROPHE_TYPE && // remove 's
|
||||
(text.endsWith("'s") || text.endsWith("'S")))
|
||||
{
|
||||
return new org.apache.lucene.analysis.Token(text.substring(0, text.length() - 2), t.startOffset(), t
|
||||
.endOffset(), type);
|
||||
|
||||
}
|
||||
else if (type == ACRONYM_TYPE)
|
||||
{ // remove dots
|
||||
StringBuffer trimmed = new StringBuffer();
|
||||
for (int i = 0; i < text.length(); i++)
|
||||
{
|
||||
char c = text.charAt(i);
|
||||
if (c != '.')
|
||||
trimmed.append(c);
|
||||
}
|
||||
return new org.apache.lucene.analysis.Token(trimmed.toString(), t.startOffset(), t.endOffset(), type);
|
||||
|
||||
}
|
||||
else if (type == HOST_TYPE)
|
||||
{
|
||||
// <HOST: <ALPHANUM> ("." <ALPHANUM>)+ >
|
||||
// There must be at least two tokens ....
|
||||
hostTokens = new LinkedList<org.apache.lucene.analysis.Token>();
|
||||
StringTokenizer tokeniser = new StringTokenizer(text, ".");
|
||||
int start = t.startOffset();
|
||||
int end;
|
||||
while (tokeniser.hasMoreTokens())
|
||||
{
|
||||
String token = tokeniser.nextToken();
|
||||
end = start + token.length();
|
||||
hostTokens.offer(new org.apache.lucene.analysis.Token(token, start, end, ALPHANUM_TYPE));
|
||||
start = end + 1;
|
||||
}
|
||||
// check if we have an acronym ..... yes a.b.c ends up here ...
|
||||
|
||||
if (text.length() == hostTokens.size() * 2 - 1)
|
||||
{
|
||||
hostTokens = null;
|
||||
// acronym
|
||||
StringBuffer trimmed = new StringBuffer();
|
||||
for (int i = 0; i < text.length(); i++)
|
||||
{
|
||||
char c = text.charAt(i);
|
||||
if (c != '.')
|
||||
trimmed.append(c);
|
||||
}
|
||||
return new org.apache.lucene.analysis.Token(trimmed.toString(), t.startOffset(), t.endOffset(),
|
||||
ALPHANUM_TYPE);
|
||||
}
|
||||
else
|
||||
{
|
||||
return hostTokens.remove();
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
return t;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
org.apache.lucene.analysis.Token token = hostTokens.remove();
|
||||
if (hostTokens.isEmpty())
|
||||
{
|
||||
hostTokens = null;
|
||||
}
|
||||
return token;
|
||||
}
|
||||
}
|
||||
}
|
@@ -1,55 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2005-2010 Alfresco Software Limited.
|
||||
*
|
||||
* This file is part of Alfresco
|
||||
*
|
||||
* Alfresco is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Lesser General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* Alfresco is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package org.alfresco.repo.search.impl.lucene.analysis;
|
||||
|
||||
import org.alfresco.error.AlfrescoRuntimeException;
|
||||
|
||||
public class AnalysisException extends AlfrescoRuntimeException
|
||||
{
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
private static final long serialVersionUID = -7722380192490118459L;
|
||||
|
||||
public AnalysisException(String msgId)
|
||||
{
|
||||
super(msgId);
|
||||
// TODO Auto-generated constructor stub
|
||||
}
|
||||
|
||||
public AnalysisException(String msgId, Object[] msgParams)
|
||||
{
|
||||
super(msgId, msgParams);
|
||||
// TODO Auto-generated constructor stub
|
||||
}
|
||||
|
||||
public AnalysisException(String msgId, Throwable cause)
|
||||
{
|
||||
super(msgId, cause);
|
||||
// TODO Auto-generated constructor stub
|
||||
}
|
||||
|
||||
public AnalysisException(String msgId, Object[] msgParams, Throwable cause)
|
||||
{
|
||||
super(msgId, msgParams, cause);
|
||||
// TODO Auto-generated constructor stub
|
||||
}
|
||||
|
||||
}
|
@@ -1,46 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2005-2010 Alfresco Software Limited.
|
||||
*
|
||||
* This file is part of Alfresco
|
||||
*
|
||||
* Alfresco is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Lesser General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* Alfresco is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package org.alfresco.repo.search.impl.lucene.analysis;
|
||||
|
||||
import java.io.Reader;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
|
||||
/**
|
||||
* @author andyh
|
||||
*
|
||||
* TODO To change the template for this generated type comment go to Window -
|
||||
* Preferences - Java - Code Style - Code Templates
|
||||
*/
|
||||
public class CategoryAnalyser extends Analyzer
|
||||
{
|
||||
/*
|
||||
* (non-Javadoc)
|
||||
*
|
||||
* @see org.apache.lucene.analysis.Analyzer#tokenStream(java.lang.String,
|
||||
* java.io.Reader)
|
||||
*/
|
||||
public TokenStream tokenStream(String fieldName, Reader reader)
|
||||
{
|
||||
return new PathTokenFilter(reader, PathTokenFilter.PATH_SEPARATOR,
|
||||
PathTokenFilter.SEPARATOR_TOKEN_TEXT, PathTokenFilter.NO_NS_TOKEN_TEXT,
|
||||
PathTokenFilter.NAMESPACE_START_DELIMITER, PathTokenFilter.NAMESPACE_END_DELIMITER, false);
|
||||
}
|
||||
}
|
@@ -1,30 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2005-2010 Alfresco Software Limited.
|
||||
*
|
||||
* This file is part of Alfresco
|
||||
*
|
||||
* Alfresco is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Lesser General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* Alfresco is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package org.alfresco.repo.search.impl.lucene.analysis;
|
||||
|
||||
import org.apache.lucene.analysis.snowball.SnowballAnalyzer;
|
||||
|
||||
public class DanishSnowballAnalyser extends SnowballAnalyzer
|
||||
{
|
||||
|
||||
public DanishSnowballAnalyser()
|
||||
{
|
||||
super("Danish");
|
||||
}
|
||||
}
|
@@ -1,39 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2005-2010 Alfresco Software Limited.
|
||||
*
|
||||
* This file is part of Alfresco
|
||||
*
|
||||
* Alfresco is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Lesser General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* Alfresco is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package org.alfresco.repo.search.impl.lucene.analysis;
|
||||
|
||||
import java.io.Reader;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
|
||||
public class DateAnalyser extends Analyzer
|
||||
{
|
||||
|
||||
public DateAnalyser()
|
||||
{
|
||||
super();
|
||||
}
|
||||
|
||||
// Split at the T in the XML date form
|
||||
public TokenStream tokenStream(String fieldName, Reader reader)
|
||||
{
|
||||
return new DateTokenFilter(reader);
|
||||
}
|
||||
}
|
@@ -1,39 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2005-2010 Alfresco Software Limited.
|
||||
*
|
||||
* This file is part of Alfresco
|
||||
*
|
||||
* Alfresco is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Lesser General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* Alfresco is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package org.alfresco.repo.search.impl.lucene.analysis;
|
||||
|
||||
import java.io.Reader;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
|
||||
public class DateTimeAnalyser extends Analyzer
|
||||
{
|
||||
|
||||
public DateTimeAnalyser()
|
||||
{
|
||||
super();
|
||||
}
|
||||
|
||||
// Split at the T in the XML date form
|
||||
public TokenStream tokenStream(String fieldName, Reader reader)
|
||||
{
|
||||
return new DateTimeTokenFilter(reader);
|
||||
}
|
||||
}
|
@@ -1,191 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2005-2010 Alfresco Software Limited.
|
||||
*
|
||||
* This file is part of Alfresco
|
||||
*
|
||||
* Alfresco is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Lesser General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* Alfresco is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package org.alfresco.repo.search.impl.lucene.analysis;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.Reader;
|
||||
import java.text.ParseException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Calendar;
|
||||
import java.util.Date;
|
||||
import java.util.Iterator;
|
||||
|
||||
import org.alfresco.util.CachingDateFormat;
|
||||
import org.apache.lucene.analysis.Token;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.WhitespaceTokenizer;
|
||||
|
||||
/**
|
||||
* @author andyh
|
||||
*/
|
||||
public class DateTimeTokenFilter extends Tokenizer
|
||||
{
|
||||
Tokenizer baseTokeniser;
|
||||
|
||||
Iterator<Token> tokenIterator = null;
|
||||
|
||||
public DateTimeTokenFilter(Reader in)
|
||||
{
|
||||
super(in);
|
||||
baseTokeniser = new WhitespaceTokenizer(in);
|
||||
}
|
||||
|
||||
public Token next() throws IOException
|
||||
{
|
||||
if (tokenIterator == null)
|
||||
{
|
||||
buildIterator();
|
||||
}
|
||||
if (tokenIterator.hasNext())
|
||||
{
|
||||
return tokenIterator.next();
|
||||
}
|
||||
else
|
||||
{
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
public void buildIterator() throws IOException
|
||||
{
|
||||
Token candidate;
|
||||
ArrayList<Token> tokens = new ArrayList<Token>();
|
||||
while ((candidate = baseTokeniser.next()) != null)
|
||||
{
|
||||
Date date;
|
||||
if (candidate.termText().equalsIgnoreCase("now"))
|
||||
{
|
||||
date = new Date();
|
||||
}
|
||||
else if (candidate.termText().equalsIgnoreCase("today"))
|
||||
{
|
||||
date = new Date();
|
||||
Calendar cal = Calendar.getInstance();
|
||||
cal.setTime(date);
|
||||
cal.set(Calendar.HOUR_OF_DAY, cal.getMinimum(Calendar.HOUR_OF_DAY));
|
||||
cal.set(Calendar.MINUTE, cal.getMinimum(Calendar.MINUTE));
|
||||
cal.set(Calendar.SECOND, cal.getMinimum(Calendar.SECOND));
|
||||
cal.set(Calendar.MILLISECOND, cal.getMinimum(Calendar.MILLISECOND));
|
||||
|
||||
}
|
||||
else
|
||||
{
|
||||
try
|
||||
{
|
||||
date = CachingDateFormat.lenientParse(candidate.termText());
|
||||
}
|
||||
catch (ParseException e)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
Calendar cal = Calendar.getInstance();
|
||||
cal.setTime(date);
|
||||
|
||||
Token token;
|
||||
|
||||
// four digits
|
||||
token = new Token("YE" + cal.get(Calendar.YEAR), candidate.startOffset(), candidate.startOffset(), candidate.type());
|
||||
tokens.add(token);
|
||||
|
||||
// 2 digits
|
||||
int month = cal.get(Calendar.MONTH);
|
||||
if (month < 10)
|
||||
{
|
||||
token = new Token("MO0" + month, candidate.startOffset(), candidate.startOffset(), candidate.type());
|
||||
tokens.add(token);
|
||||
}
|
||||
else
|
||||
{
|
||||
token = new Token("MO" + month, candidate.startOffset(), candidate.startOffset(), candidate.type());
|
||||
tokens.add(token);
|
||||
}
|
||||
|
||||
int day = cal.get(Calendar.DAY_OF_MONTH);
|
||||
if (day < 10)
|
||||
{
|
||||
token = new Token("DA0" + day, candidate.startOffset(), candidate.startOffset(), candidate.type());
|
||||
tokens.add(token);
|
||||
}
|
||||
else
|
||||
{
|
||||
token = new Token("DA" + day, candidate.startOffset(), candidate.startOffset(), candidate.type());
|
||||
tokens.add(token);
|
||||
}
|
||||
|
||||
int hour = cal.get(Calendar.HOUR_OF_DAY);
|
||||
if (hour < 10)
|
||||
{
|
||||
token = new Token("HO0" + hour, candidate.startOffset(), candidate.startOffset(), candidate.type());
|
||||
tokens.add(token);
|
||||
}
|
||||
else
|
||||
{
|
||||
token = new Token("HO" + hour, candidate.startOffset(), candidate.startOffset(), candidate.type());
|
||||
tokens.add(token);
|
||||
}
|
||||
|
||||
int minute = cal.get(Calendar.MINUTE);
|
||||
if (minute < 10)
|
||||
{
|
||||
token = new Token("MI0" + minute, candidate.startOffset(), candidate.startOffset(), candidate.type());
|
||||
tokens.add(token);
|
||||
}
|
||||
else
|
||||
{
|
||||
token = new Token("MI" + minute, candidate.startOffset(), candidate.startOffset(), candidate.type());
|
||||
tokens.add(token);
|
||||
}
|
||||
|
||||
int second = cal.get(Calendar.SECOND);
|
||||
if (second < 10)
|
||||
{
|
||||
token = new Token("SE0" + second, candidate.startOffset(), candidate.startOffset(), candidate.type());
|
||||
tokens.add(token);
|
||||
}
|
||||
else
|
||||
{
|
||||
token = new Token("SE" + second, candidate.startOffset(), candidate.startOffset(), candidate.type());
|
||||
tokens.add(token);
|
||||
}
|
||||
|
||||
int millis = cal.get(Calendar.MILLISECOND);
|
||||
if (millis < 10)
|
||||
{
|
||||
token = new Token("MS00" + millis, candidate.startOffset(), candidate.startOffset(), candidate.type());
|
||||
tokens.add(token);
|
||||
}
|
||||
else if (millis < 100)
|
||||
{
|
||||
token = new Token("MS0" + millis, candidate.startOffset(), candidate.startOffset(), candidate.type());
|
||||
tokens.add(token);
|
||||
}
|
||||
else
|
||||
{
|
||||
token = new Token("MS" + millis, candidate.startOffset(), candidate.startOffset(), candidate.type());
|
||||
tokens.add(token);
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
tokenIterator = tokens.iterator();
|
||||
}
|
||||
}
|
@@ -1,85 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2005-2010 Alfresco Software Limited.
|
||||
*
|
||||
* This file is part of Alfresco
|
||||
*
|
||||
* Alfresco is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Lesser General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* Alfresco is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package org.alfresco.repo.search.impl.lucene.analysis;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.Reader;
|
||||
import java.text.ParseException;
|
||||
import java.text.SimpleDateFormat;
|
||||
import java.util.Calendar;
|
||||
import java.util.Date;
|
||||
|
||||
import org.alfresco.util.CachingDateFormat;
|
||||
import org.apache.lucene.analysis.Token;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.WhitespaceTokenizer;
|
||||
|
||||
/**
|
||||
* @author andyh
|
||||
*/
|
||||
public class DateTokenFilter extends Tokenizer
|
||||
{
|
||||
Tokenizer baseTokeniser;
|
||||
|
||||
public DateTokenFilter(Reader in)
|
||||
{
|
||||
super(in);
|
||||
baseTokeniser = new WhitespaceTokenizer(in);
|
||||
}
|
||||
|
||||
public Token next() throws IOException
|
||||
{
|
||||
SimpleDateFormat dof = CachingDateFormat.getDateOnlyFormat();
|
||||
Token candidate;
|
||||
while ((candidate = baseTokeniser.next()) != null)
|
||||
{
|
||||
Date date;
|
||||
if (candidate.termText().equalsIgnoreCase("now"))
|
||||
{
|
||||
date = new Date();
|
||||
}
|
||||
else if (candidate.termText().equalsIgnoreCase("today"))
|
||||
{
|
||||
date = new Date();
|
||||
Calendar cal = Calendar.getInstance();
|
||||
cal.setTime(date);
|
||||
cal.set(Calendar.HOUR_OF_DAY, cal.getMinimum(Calendar.HOUR_OF_DAY));
|
||||
cal.set(Calendar.MINUTE, cal.getMinimum(Calendar.MINUTE));
|
||||
cal.set(Calendar.SECOND, cal.getMinimum(Calendar.SECOND));
|
||||
cal.set(Calendar.MILLISECOND, cal.getMinimum(Calendar.MILLISECOND));
|
||||
|
||||
}
|
||||
else
|
||||
{
|
||||
try
|
||||
{
|
||||
date = CachingDateFormat.lenientParse(candidate.termText());
|
||||
}
|
||||
catch (ParseException e)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
}
|
||||
String valueString = dof.format(date);
|
||||
Token integerToken = new Token(valueString, candidate.startOffset(), candidate.startOffset(), candidate.type());
|
||||
return integerToken;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
}
|
@@ -1,44 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2005-2010 Alfresco Software Limited.
|
||||
*
|
||||
* This file is part of Alfresco
|
||||
*
|
||||
* Alfresco is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Lesser General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* Alfresco is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package org.alfresco.repo.search.impl.lucene.analysis;
|
||||
|
||||
/**
|
||||
* Simple analyser to wrap the tokenisation of doubles.
|
||||
*
|
||||
* @author Andy Hind
|
||||
*/
|
||||
import java.io.Reader;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
|
||||
public class DoubleAnalyser extends Analyzer
|
||||
{
|
||||
|
||||
public DoubleAnalyser()
|
||||
{
|
||||
super();
|
||||
}
|
||||
|
||||
|
||||
public TokenStream tokenStream(String fieldName, Reader reader)
|
||||
{
|
||||
return new DoubleTokenFilter(reader);
|
||||
}
|
||||
}
|
@@ -1,69 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2005-2010 Alfresco Software Limited.
|
||||
*
|
||||
* This file is part of Alfresco
|
||||
*
|
||||
* Alfresco is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Lesser General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* Alfresco is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package org.alfresco.repo.search.impl.lucene.analysis;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.Reader;
|
||||
|
||||
import org.apache.lucene.analysis.Token;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.WhitespaceTokenizer;
|
||||
|
||||
/**
|
||||
* Simple tokeniser for doubles.
|
||||
*
|
||||
* @author Andy Hind
|
||||
*/
|
||||
public class DoubleTokenFilter extends Tokenizer
|
||||
{
|
||||
Tokenizer baseTokeniser;
|
||||
|
||||
public DoubleTokenFilter(Reader in)
|
||||
{
|
||||
super(in);
|
||||
baseTokeniser = new WhitespaceTokenizer(in);
|
||||
}
|
||||
|
||||
/*
|
||||
* (non-Javadoc)
|
||||
*
|
||||
* @see org.apache.lucene.analysis.TokenStream#next()
|
||||
*/
|
||||
|
||||
public Token next() throws IOException
|
||||
{
|
||||
Token candidate;
|
||||
while((candidate = baseTokeniser.next()) != null)
|
||||
{
|
||||
try
|
||||
{
|
||||
Double d = Double.valueOf(candidate.termText());
|
||||
String valueString = NumericEncoder.encode(d.doubleValue());
|
||||
Token doubleToken = new Token(valueString, candidate.startOffset(), candidate.startOffset(),
|
||||
candidate.type());
|
||||
return doubleToken;
|
||||
}
|
||||
catch (NumberFormatException e)
|
||||
{
|
||||
// just ignore and try the next one
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
}
|
@@ -1,30 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2005-2010 Alfresco Software Limited.
|
||||
*
|
||||
* This file is part of Alfresco
|
||||
*
|
||||
* Alfresco is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Lesser General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* Alfresco is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package org.alfresco.repo.search.impl.lucene.analysis;
|
||||
|
||||
import org.apache.lucene.analysis.snowball.SnowballAnalyzer;
|
||||
|
||||
public class DutchSnowballAnalyser extends SnowballAnalyzer
|
||||
{
|
||||
|
||||
public DutchSnowballAnalyser()
|
||||
{
|
||||
super("Dutch");
|
||||
}
|
||||
}
|
@@ -1,30 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2005-2010 Alfresco Software Limited.
|
||||
*
|
||||
* This file is part of Alfresco
|
||||
*
|
||||
* Alfresco is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Lesser General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* Alfresco is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package org.alfresco.repo.search.impl.lucene.analysis;
|
||||
|
||||
import org.apache.lucene.analysis.snowball.SnowballAnalyzer;
|
||||
|
||||
public class EnglishSnowballAnalyser extends SnowballAnalyzer
|
||||
{
|
||||
|
||||
public EnglishSnowballAnalyser()
|
||||
{
|
||||
super("English");
|
||||
}
|
||||
}
|
@@ -1,30 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2005-2010 Alfresco Software Limited.
|
||||
*
|
||||
* This file is part of Alfresco
|
||||
*
|
||||
* Alfresco is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Lesser General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* Alfresco is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package org.alfresco.repo.search.impl.lucene.analysis;
|
||||
|
||||
import org.apache.lucene.analysis.snowball.SnowballAnalyzer;
|
||||
|
||||
public class FinnishSnowballAnalyser extends SnowballAnalyzer
|
||||
{
|
||||
|
||||
public FinnishSnowballAnalyser()
|
||||
{
|
||||
super("Finnish");
|
||||
}
|
||||
}
|
@@ -1,43 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2005-2010 Alfresco Software Limited.
|
||||
*
|
||||
* This file is part of Alfresco
|
||||
*
|
||||
* Alfresco is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Lesser General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* Alfresco is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package org.alfresco.repo.search.impl.lucene.analysis;
|
||||
|
||||
import java.io.Reader;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
|
||||
/**
|
||||
* Simple analyser for floats.
|
||||
*
|
||||
* @author Andy Hind
|
||||
*/
|
||||
public class FloatAnalyser extends Analyzer
|
||||
{
|
||||
|
||||
public FloatAnalyser()
|
||||
{
|
||||
super();
|
||||
}
|
||||
|
||||
public TokenStream tokenStream(String fieldName, Reader reader)
|
||||
{
|
||||
return new FloatTokenFilter(reader);
|
||||
}
|
||||
}
|
@@ -1,69 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2005-2010 Alfresco Software Limited.
|
||||
*
|
||||
* This file is part of Alfresco
|
||||
*
|
||||
* Alfresco is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Lesser General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* Alfresco is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package org.alfresco.repo.search.impl.lucene.analysis;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.Reader;
|
||||
|
||||
import org.apache.lucene.analysis.Token;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.WhitespaceTokenizer;
|
||||
|
||||
/**
|
||||
* Simple tokeniser for floats.
|
||||
*
|
||||
* @author Andy Hind
|
||||
*/
|
||||
public class FloatTokenFilter extends Tokenizer
|
||||
{
|
||||
Tokenizer baseTokeniser;
|
||||
|
||||
public FloatTokenFilter(Reader in)
|
||||
{
|
||||
super(in);
|
||||
baseTokeniser = new WhitespaceTokenizer(in);
|
||||
}
|
||||
|
||||
/*
|
||||
* (non-Javadoc)
|
||||
*
|
||||
* @see org.apache.lucene.analysis.TokenStream#next()
|
||||
*/
|
||||
|
||||
public Token next() throws IOException
|
||||
{
|
||||
Token candidate;
|
||||
while((candidate = baseTokeniser.next()) != null)
|
||||
{
|
||||
try
|
||||
{
|
||||
Float floatValue = Float.valueOf(candidate.termText());
|
||||
String valueString = NumericEncoder.encode(floatValue.floatValue());
|
||||
Token floatToken = new Token(valueString, candidate.startOffset(), candidate.startOffset(),
|
||||
candidate.type());
|
||||
return floatToken;
|
||||
}
|
||||
catch (NumberFormatException e)
|
||||
{
|
||||
// just ignore and try the next one
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
}
|
@@ -1,30 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2005-2010 Alfresco Software Limited.
|
||||
*
|
||||
* This file is part of Alfresco
|
||||
*
|
||||
* Alfresco is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Lesser General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* Alfresco is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package org.alfresco.repo.search.impl.lucene.analysis;
|
||||
|
||||
import org.apache.lucene.analysis.snowball.SnowballAnalyzer;
|
||||
|
||||
public class FrenchSnowballAnalyser extends SnowballAnalyzer
|
||||
{
|
||||
|
||||
public FrenchSnowballAnalyser()
|
||||
{
|
||||
super("French");
|
||||
}
|
||||
}
|
@@ -1,43 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2005-2010 Alfresco Software Limited.
|
||||
*
|
||||
* This file is part of Alfresco
|
||||
*
|
||||
* Alfresco is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Lesser General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* Alfresco is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package org.alfresco.repo.search.impl.lucene.analysis;
|
||||
|
||||
import java.io.Reader;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.ISOLatin1AccentFilter;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
|
||||
public class FrenchSnowballAnalyserThatRemovesAccents extends Analyzer
|
||||
{
|
||||
Analyzer analyzer = new FrenchSnowballAnalyser();
|
||||
|
||||
public FrenchSnowballAnalyserThatRemovesAccents()
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
public TokenStream tokenStream(String fieldName, Reader reader)
|
||||
{
|
||||
TokenStream result = analyzer.tokenStream(fieldName, reader);
|
||||
result = new ISOLatin1AccentFilter(result);
|
||||
return result;
|
||||
}
|
||||
|
||||
}
|
@@ -1,30 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2005-2010 Alfresco Software Limited.
|
||||
*
|
||||
* This file is part of Alfresco
|
||||
*
|
||||
* Alfresco is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Lesser General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* Alfresco is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package org.alfresco.repo.search.impl.lucene.analysis;
|
||||
|
||||
import org.apache.lucene.analysis.snowball.SnowballAnalyzer;
|
||||
|
||||
public class German2SnowballAnalyser extends SnowballAnalyzer
|
||||
{
|
||||
|
||||
public German2SnowballAnalyser()
|
||||
{
|
||||
super("German2");
|
||||
}
|
||||
}
|
@@ -1,30 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2005-2010 Alfresco Software Limited.
|
||||
*
|
||||
* This file is part of Alfresco
|
||||
*
|
||||
* Alfresco is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Lesser General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* Alfresco is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package org.alfresco.repo.search.impl.lucene.analysis;
|
||||
|
||||
import org.apache.lucene.analysis.snowball.SnowballAnalyzer;
|
||||
|
||||
public class GermanSnowballAnalyser extends SnowballAnalyzer
|
||||
{
|
||||
|
||||
public GermanSnowballAnalyser()
|
||||
{
|
||||
super("German");
|
||||
}
|
||||
}
|
@@ -1,43 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2005-2010 Alfresco Software Limited.
|
||||
*
|
||||
* This file is part of Alfresco
|
||||
*
|
||||
* Alfresco is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Lesser General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* Alfresco is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package org.alfresco.repo.search.impl.lucene.analysis;
|
||||
|
||||
import java.io.Reader;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
|
||||
/**
|
||||
* Simple analyser for integers.
|
||||
*
|
||||
* @author Andy Hind
|
||||
*/
|
||||
public class IntegerAnalyser extends Analyzer
|
||||
{
|
||||
|
||||
public IntegerAnalyser()
|
||||
{
|
||||
super();
|
||||
}
|
||||
|
||||
public TokenStream tokenStream(String fieldName, Reader reader)
|
||||
{
|
||||
return new IntegerTokenFilter(reader);
|
||||
}
|
||||
}
|
@@ -1,69 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2005-2010 Alfresco Software Limited.
|
||||
*
|
||||
* This file is part of Alfresco
|
||||
*
|
||||
* Alfresco is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Lesser General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* Alfresco is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package org.alfresco.repo.search.impl.lucene.analysis;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.Reader;
|
||||
|
||||
import org.apache.lucene.analysis.Token;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.WhitespaceTokenizer;
|
||||
|
||||
/**
|
||||
* Simple tokeniser for integers.
|
||||
*
|
||||
* @author Andy Hind
|
||||
*/
|
||||
public class IntegerTokenFilter extends Tokenizer
|
||||
{
|
||||
Tokenizer baseTokeniser;
|
||||
|
||||
public IntegerTokenFilter(Reader in)
|
||||
{
|
||||
super(in);
|
||||
baseTokeniser = new WhitespaceTokenizer(in);
|
||||
}
|
||||
|
||||
/*
|
||||
* (non-Javadoc)
|
||||
*
|
||||
* @see org.apache.lucene.analysis.TokenStream#next()
|
||||
*/
|
||||
|
||||
public Token next() throws IOException
|
||||
{
|
||||
Token candidate;
|
||||
while((candidate = baseTokeniser.next()) != null)
|
||||
{
|
||||
try
|
||||
{
|
||||
Integer integer = Integer.valueOf(candidate.termText());
|
||||
String valueString = NumericEncoder.encode(integer.intValue());
|
||||
Token integerToken = new Token(valueString, candidate.startOffset(), candidate.startOffset(),
|
||||
candidate.type());
|
||||
return integerToken;
|
||||
}
|
||||
catch (NumberFormatException e)
|
||||
{
|
||||
// just ignore and try the next one
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
}
|
@@ -1,30 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2005-2010 Alfresco Software Limited.
|
||||
*
|
||||
* This file is part of Alfresco
|
||||
*
|
||||
* Alfresco is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Lesser General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* Alfresco is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package org.alfresco.repo.search.impl.lucene.analysis;
|
||||
|
||||
import org.apache.lucene.analysis.snowball.SnowballAnalyzer;
|
||||
|
||||
public class ItalianSnowballAnalyser extends SnowballAnalyzer
|
||||
{
|
||||
|
||||
public ItalianSnowballAnalyser()
|
||||
{
|
||||
super("Italian");
|
||||
}
|
||||
}
|
@@ -1,30 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2005-2010 Alfresco Software Limited.
|
||||
*
|
||||
* This file is part of Alfresco
|
||||
*
|
||||
* Alfresco is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Lesser General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* Alfresco is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package org.alfresco.repo.search.impl.lucene.analysis;
|
||||
|
||||
import org.apache.lucene.analysis.snowball.SnowballAnalyzer;
|
||||
|
||||
public class KPSnowballAnalyser extends SnowballAnalyzer
|
||||
{
|
||||
|
||||
public KPSnowballAnalyser()
|
||||
{
|
||||
super("Kp");
|
||||
}
|
||||
}
|
@@ -1,44 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2005-2010 Alfresco Software Limited.
|
||||
*
|
||||
* This file is part of Alfresco
|
||||
*
|
||||
* Alfresco is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Lesser General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* Alfresco is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package org.alfresco.repo.search.impl.lucene.analysis;
|
||||
|
||||
import java.io.Reader;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
|
||||
/**
|
||||
* Simple analyser for longs.
|
||||
*
|
||||
* @author Andy Hind
|
||||
*/
|
||||
public class LongAnalyser extends Analyzer
|
||||
{
|
||||
|
||||
public LongAnalyser()
|
||||
{
|
||||
super();
|
||||
}
|
||||
|
||||
|
||||
public TokenStream tokenStream(String fieldName, Reader reader)
|
||||
{
|
||||
return new LongTokenFilter(reader);
|
||||
}
|
||||
}
|
@@ -1,69 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2005-2010 Alfresco Software Limited.
|
||||
*
|
||||
* This file is part of Alfresco
|
||||
*
|
||||
* Alfresco is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Lesser General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* Alfresco is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package org.alfresco.repo.search.impl.lucene.analysis;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.Reader;
|
||||
|
||||
import org.apache.lucene.analysis.Token;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.WhitespaceTokenizer;
|
||||
|
||||
/**
|
||||
* Simple tokeniser for longs.
|
||||
*
|
||||
* @author Andy Hind
|
||||
*/
|
||||
public class LongTokenFilter extends Tokenizer
|
||||
{
|
||||
Tokenizer baseTokeniser;
|
||||
|
||||
public LongTokenFilter(Reader in)
|
||||
{
|
||||
super(in);
|
||||
baseTokeniser = new WhitespaceTokenizer(in);
|
||||
}
|
||||
|
||||
/*
|
||||
* (non-Javadoc)
|
||||
*
|
||||
* @see org.apache.lucene.analysis.TokenStream#next()
|
||||
*/
|
||||
|
||||
public Token next() throws IOException
|
||||
{
|
||||
Token candidate;
|
||||
while((candidate = baseTokeniser.next()) != null)
|
||||
{
|
||||
try
|
||||
{
|
||||
Long longValue = Long.valueOf(candidate.termText());
|
||||
String valueString = NumericEncoder.encode(longValue.longValue());
|
||||
Token longToken = new Token(valueString, candidate.startOffset(), candidate.startOffset(),
|
||||
candidate.type());
|
||||
return longToken;
|
||||
}
|
||||
catch (NumberFormatException e)
|
||||
{
|
||||
// just ignore and try the next one
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
}
|
@@ -1,30 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2005-2010 Alfresco Software Limited.
|
||||
*
|
||||
* This file is part of Alfresco
|
||||
*
|
||||
* Alfresco is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Lesser General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* Alfresco is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package org.alfresco.repo.search.impl.lucene.analysis;
|
||||
|
||||
import org.apache.lucene.analysis.snowball.SnowballAnalyzer;
|
||||
|
||||
public class LovinsSnowballAnalyser extends SnowballAnalyzer
|
||||
{
|
||||
|
||||
public LovinsSnowballAnalyser()
|
||||
{
|
||||
super("Lovins");
|
||||
}
|
||||
}
|
@@ -1,27 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2005-2010 Alfresco Software Limited.
|
||||
*
|
||||
* This file is part of Alfresco
|
||||
*
|
||||
* Alfresco is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Lesser General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* Alfresco is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package org.alfresco.repo.search.impl.lucene.analysis;
|
||||
|
||||
public class LowerCaseVerbatimAnalyser extends VerbatimAnalyser
|
||||
{
|
||||
public LowerCaseVerbatimAnalyser()
|
||||
{
|
||||
super(true);
|
||||
}
|
||||
}
|
@@ -1,213 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2005-2010 Alfresco Software Limited.
|
||||
*
|
||||
* This file is part of Alfresco
|
||||
*
|
||||
* Alfresco is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Lesser General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* Alfresco is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package org.alfresco.repo.search.impl.lucene.analysis;
|
||||
|
||||
import java.io.BufferedReader;
|
||||
import java.io.IOException;
|
||||
import java.io.Reader;
|
||||
import java.util.HashMap;
|
||||
import java.util.Locale;
|
||||
|
||||
import org.springframework.extensions.surf.util.I18NUtil;
|
||||
import org.alfresco.repo.search.MLAnalysisMode;
|
||||
import org.alfresco.service.cmr.dictionary.DataTypeDefinition;
|
||||
import org.alfresco.service.cmr.dictionary.DictionaryService;
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
|
||||
public class MLAnalayser extends Analyzer
|
||||
{
|
||||
private static Log s_logger = LogFactory.getLog(MLAnalayser.class);
|
||||
|
||||
private DictionaryService dictionaryService;
|
||||
|
||||
private HashMap<Locale, Analyzer> analysers = new HashMap<Locale, Analyzer>();
|
||||
|
||||
private MLAnalysisMode mlAnalaysisMode;
|
||||
|
||||
public MLAnalayser(DictionaryService dictionaryService, MLAnalysisMode mlAnalaysisMode)
|
||||
{
|
||||
this.dictionaryService = dictionaryService;
|
||||
this.mlAnalaysisMode = mlAnalaysisMode;
|
||||
}
|
||||
|
||||
@Override
|
||||
public TokenStream tokenStream(String fieldName, Reader reader)
|
||||
{
|
||||
// We use read ahead to get the language info - if this does not exist we need to restart
|
||||
// an use the default - there foer we need mark and restore.
|
||||
|
||||
if (!(reader instanceof BufferedReader))
|
||||
{
|
||||
BufferedReader breader = new BufferedReader(reader);
|
||||
try
|
||||
{
|
||||
if (!breader.markSupported())
|
||||
{
|
||||
throw new AnalysisException(
|
||||
"Multilingual tokenisation requires a reader that supports marks and reset");
|
||||
}
|
||||
breader.mark(100);
|
||||
StringBuilder builder = new StringBuilder();
|
||||
if (breader.read() == '\u0000')
|
||||
{
|
||||
String language = "";
|
||||
String country = "";
|
||||
String varient = "";
|
||||
char c;
|
||||
int count = 0;
|
||||
while ((c = (char) breader.read()) != '\u0000')
|
||||
{
|
||||
if (count++ > 99)
|
||||
{
|
||||
breader.reset();
|
||||
return getDefaultAnalyser().tokenStream(fieldName, breader);
|
||||
}
|
||||
if (c == '_')
|
||||
{
|
||||
if (language.length() == 0)
|
||||
{
|
||||
language = builder.toString();
|
||||
}
|
||||
else if (country.length() == 0)
|
||||
{
|
||||
country = builder.toString();
|
||||
}
|
||||
else if (varient.length() == 0)
|
||||
{
|
||||
varient = builder.toString();
|
||||
}
|
||||
else
|
||||
{
|
||||
breader.reset();
|
||||
return getDefaultAnalyser().tokenStream(fieldName, breader);
|
||||
}
|
||||
builder = new StringBuilder();
|
||||
}
|
||||
else
|
||||
{
|
||||
builder.append(c);
|
||||
}
|
||||
}
|
||||
if (builder.length() > 0)
|
||||
{
|
||||
if (language.length() == 0)
|
||||
{
|
||||
language = builder.toString();
|
||||
}
|
||||
else if (country.length() == 0)
|
||||
{
|
||||
country = builder.toString();
|
||||
}
|
||||
else if (varient.length() == 0)
|
||||
{
|
||||
varient = builder.toString();
|
||||
}
|
||||
else
|
||||
{
|
||||
breader.reset();
|
||||
return getDefaultAnalyser().tokenStream(fieldName, breader);
|
||||
}
|
||||
}
|
||||
Locale locale = new Locale(language, country, varient);
|
||||
// leave the reader where it is ....
|
||||
return new MLTokenDuplicator(getAnalyser(locale).tokenStream(fieldName, breader), locale, breader, mlAnalaysisMode);
|
||||
}
|
||||
else
|
||||
{
|
||||
breader.reset();
|
||||
return getDefaultAnalyser().tokenStream(fieldName, breader);
|
||||
}
|
||||
}
|
||||
catch (IOException io)
|
||||
{
|
||||
try
|
||||
{
|
||||
breader.reset();
|
||||
}
|
||||
catch (IOException e)
|
||||
{
|
||||
throw new AnalysisException("Failed to reset buffered reader - token stream will be invalid", e);
|
||||
}
|
||||
return getDefaultAnalyser().tokenStream(fieldName, breader);
|
||||
}
|
||||
|
||||
}
|
||||
else
|
||||
{
|
||||
throw new AnalysisException("Multilingual tokenisation requires a buffered reader");
|
||||
}
|
||||
}
|
||||
|
||||
private Analyzer getDefaultAnalyser()
|
||||
{
|
||||
return getAnalyser(I18NUtil.getLocale());
|
||||
}
|
||||
|
||||
private Analyzer getAnalyser(Locale locale)
|
||||
{
|
||||
Analyzer analyser = (Analyzer) analysers.get(locale);
|
||||
if (analyser == null)
|
||||
{
|
||||
analyser = findAnalyser(locale);
|
||||
}
|
||||
// wrap analyser to produce plain and prefixed tokens
|
||||
return analyser;
|
||||
}
|
||||
|
||||
private Analyzer findAnalyser(Locale locale)
|
||||
{
|
||||
Analyzer analyser = loadAnalyzer(locale);
|
||||
analysers.put(locale, analyser);
|
||||
return analyser;
|
||||
}
|
||||
|
||||
private Analyzer loadAnalyzer(Locale locale)
|
||||
{
|
||||
DataTypeDefinition dataType = dictionaryService.getDataType(DataTypeDefinition.TEXT);
|
||||
String analyserClassName = dataType.getAnalyserClassName(locale);
|
||||
if (s_logger.isDebugEnabled())
|
||||
{
|
||||
s_logger.debug("Loading " + analyserClassName + " for " + locale);
|
||||
}
|
||||
try
|
||||
{
|
||||
Class<?> clazz = Class.forName(analyserClassName);
|
||||
Analyzer analyser = (Analyzer) clazz.newInstance();
|
||||
return analyser;
|
||||
}
|
||||
catch (ClassNotFoundException e)
|
||||
{
|
||||
throw new RuntimeException("Unable to load analyser for property of type "
|
||||
+ dataType.getName() + " using " + analyserClassName);
|
||||
}
|
||||
catch (InstantiationException e)
|
||||
{
|
||||
throw new RuntimeException("Unable to load analyser for property of type "
|
||||
+ dataType.getName() + " using " + analyserClassName);
|
||||
}
|
||||
catch (IllegalAccessException e)
|
||||
{
|
||||
throw new RuntimeException("Unable to load analyser for property of type "
|
||||
+ dataType.getName() + " using " + analyserClassName);
|
||||
}
|
||||
}
|
||||
}
|
@@ -1,148 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2005-2010 Alfresco Software Limited.
|
||||
*
|
||||
* This file is part of Alfresco
|
||||
*
|
||||
* Alfresco is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Lesser General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* Alfresco is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package org.alfresco.repo.search.impl.lucene.analysis;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.Reader;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.HashSet;
|
||||
import java.util.Iterator;
|
||||
import java.util.Locale;
|
||||
|
||||
import org.alfresco.repo.search.MLAnalysisMode;
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
import org.apache.lucene.analysis.Token;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
|
||||
/**
|
||||
* Create duplicate tokens for multilingual varients The forms are Tokens: Token - all languages {fr}Token - if a
|
||||
* language is specified {fr_CA}Token - if a language and country is specified {fr_CA_Varient}Token - for all three
|
||||
* {fr__Varient}Token - for a language varient with no country
|
||||
*
|
||||
* @author andyh
|
||||
*/
|
||||
public class MLTokenDuplicator extends Tokenizer
|
||||
{
|
||||
private static Log s_logger = LogFactory.getLog(MLTokenDuplicator.class);
|
||||
|
||||
TokenStream source;
|
||||
|
||||
Locale locale;
|
||||
|
||||
Iterator<Token> it;
|
||||
|
||||
HashSet<String> prefixes;
|
||||
|
||||
public MLTokenDuplicator(TokenStream source, Locale locale, Reader reader, MLAnalysisMode mlAnalaysisMode)
|
||||
{
|
||||
super(reader);
|
||||
this.source = source;
|
||||
this.locale = locale;
|
||||
|
||||
Collection<Locale> locales = MLAnalysisMode.getLocales(mlAnalaysisMode, locale, false);
|
||||
prefixes = new HashSet<String>(locales.size());
|
||||
for(Locale toAdd : locales)
|
||||
{
|
||||
String localeString = toAdd.toString();
|
||||
if(localeString.length() == 0)
|
||||
{
|
||||
prefixes.add("");
|
||||
}
|
||||
else
|
||||
{
|
||||
StringBuilder builder = new StringBuilder(16);
|
||||
builder.append("{").append(localeString).append("}");
|
||||
prefixes.add(builder.toString());
|
||||
}
|
||||
}
|
||||
if(s_logger.isDebugEnabled())
|
||||
{
|
||||
s_logger.debug("Locale "+ locale +" using "+mlAnalaysisMode+" is "+prefixes);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
public MLTokenDuplicator(Locale locale, MLAnalysisMode mlAnalaysisMode)
|
||||
{
|
||||
this(null, locale, null, mlAnalaysisMode);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Token next() throws IOException
|
||||
{
|
||||
Token t = null;
|
||||
if (it == null)
|
||||
{
|
||||
it = buildIterator();
|
||||
}
|
||||
if (it == null)
|
||||
{
|
||||
return null;
|
||||
}
|
||||
if (it.hasNext())
|
||||
{
|
||||
t = it.next();
|
||||
return t;
|
||||
}
|
||||
else
|
||||
{
|
||||
it = null;
|
||||
t = this.next();
|
||||
return t;
|
||||
}
|
||||
}
|
||||
|
||||
private Iterator<Token> buildIterator() throws IOException
|
||||
{
|
||||
Token token = source.next();
|
||||
return buildIterator(token);
|
||||
|
||||
}
|
||||
|
||||
|
||||
public Iterator<Token> buildIterator(Token token)
|
||||
{
|
||||
if (token == null)
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
ArrayList<Token> tokens = new ArrayList<Token>(prefixes.size());
|
||||
for (String prefix : prefixes)
|
||||
{
|
||||
Token newToken = new Token(prefix + token.termText(), token.startOffset(), token.endOffset(), token.type());
|
||||
if (tokens.size() == 0)
|
||||
{
|
||||
newToken.setPositionIncrement(token.getPositionIncrement());
|
||||
}
|
||||
else
|
||||
{
|
||||
newToken.setPositionIncrement(0);
|
||||
}
|
||||
tokens.add(newToken);
|
||||
}
|
||||
return tokens.iterator();
|
||||
|
||||
}
|
||||
|
||||
|
||||
}
|
@@ -1,30 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2005-2010 Alfresco Software Limited.
|
||||
*
|
||||
* This file is part of Alfresco
|
||||
*
|
||||
* Alfresco is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Lesser General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* Alfresco is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package org.alfresco.repo.search.impl.lucene.analysis;
|
||||
|
||||
import org.apache.lucene.analysis.snowball.SnowballAnalyzer;
|
||||
|
||||
public class NorwegianSnowballAnalyser extends SnowballAnalyzer
|
||||
{
|
||||
|
||||
public NorwegianSnowballAnalyser()
|
||||
{
|
||||
super("Norwegian");
|
||||
}
|
||||
}
|
@@ -1,228 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2005-2010 Alfresco Software Limited.
|
||||
*
|
||||
* This file is part of Alfresco
|
||||
*
|
||||
* Alfresco is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Lesser General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* Alfresco is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package org.alfresco.repo.search.impl.lucene.analysis;
|
||||
|
||||
/**
|
||||
* Support to encode numeric types in the lucene index.
|
||||
*
|
||||
* To support range queries in the lucene index numeric types need to be indexed
|
||||
* specially. This has been addressed for int and long types for lucene and
|
||||
* limited support (via scaling) for float and double.
|
||||
*
|
||||
* The implementation splits an int, long, float or double into the sign bit,
|
||||
* optional exponent and mantissa either from the int or long format or its IEEE
|
||||
* 754 byte representation.
|
||||
*
|
||||
* To index content so small negative numbers are indexed correctly and are
|
||||
* after big negative numbers in range queries.
|
||||
*
|
||||
* The algorithm finds the sign, if the number is negative, then the mantissa
|
||||
* and exponent are XORed against the appropriate masks. This reverses the
|
||||
* order. As negative numbers appear first in the list their sign bit is 0 and
|
||||
* positive numbers are 1.
|
||||
*
|
||||
* @author Andy Hind
|
||||
*/
|
||||
public class NumericEncoder
|
||||
{
|
||||
/*
|
||||
* Constants for integer encoding
|
||||
*/
|
||||
|
||||
static int INTEGER_SIGN_MASK = 0x80000000;
|
||||
|
||||
/*
|
||||
* Constants for long encoding
|
||||
*/
|
||||
|
||||
static long LONG_SIGN_MASK = 0x8000000000000000L;
|
||||
|
||||
/*
|
||||
* Constants for float encoding
|
||||
*/
|
||||
|
||||
static int FLOAT_SIGN_MASK = 0x80000000;
|
||||
|
||||
static int FLOAT_EXPONENT_MASK = 0x7F800000;
|
||||
|
||||
static int FLOAT_MANTISSA_MASK = 0x007FFFFF;
|
||||
|
||||
/*
|
||||
* Constants for double encoding
|
||||
*/
|
||||
|
||||
static long DOUBLE_SIGN_MASK = 0x8000000000000000L;
|
||||
|
||||
static long DOUBLE_EXPONENT_MASK = 0x7FF0000000000000L;
|
||||
|
||||
static long DOUBLE_MANTISSA_MASK = 0x000FFFFFFFFFFFFFL;
|
||||
|
||||
private NumericEncoder()
|
||||
{
|
||||
super();
|
||||
}
|
||||
|
||||
/**
|
||||
* Encode an integer into a string that orders correctly using string
|
||||
* comparison Integer.MIN_VALUE encodes as 00000000 and MAX_VALUE as
|
||||
* ffffffff.
|
||||
*
|
||||
* @param intToEncode
|
||||
* @return the encoded string
|
||||
*/
|
||||
public static String encode(int intToEncode)
|
||||
{
|
||||
int replacement = intToEncode ^ INTEGER_SIGN_MASK;
|
||||
return encodeToHex(replacement);
|
||||
}
|
||||
|
||||
/**
|
||||
* Encode a long into a string that orders correctly using string comparison
|
||||
* Long.MIN_VALUE encodes as 0000000000000000 and MAX_VALUE as
|
||||
* ffffffffffffffff.
|
||||
*
|
||||
* @param longToEncode
|
||||
* @return - the encoded string
|
||||
*/
|
||||
public static String encode(long longToEncode)
|
||||
{
|
||||
long replacement = longToEncode ^ LONG_SIGN_MASK;
|
||||
return encodeToHex(replacement);
|
||||
}
|
||||
|
||||
/**
|
||||
* Secode a long
|
||||
* @param hex
|
||||
* @return - the decoded string
|
||||
*/
|
||||
public static long decodeLong(String hex)
|
||||
{
|
||||
return decodeFromHex(hex) ^ LONG_SIGN_MASK;
|
||||
}
|
||||
|
||||
|
||||
public static int decodeInt(String hex)
|
||||
{
|
||||
return decodeIntFromHex(hex) ^ INTEGER_SIGN_MASK;
|
||||
}
|
||||
|
||||
/**
|
||||
* Encode a float into a string that orders correctly according to string
|
||||
* comparison. Note that there is no negative NaN but there are codings that
|
||||
* imply this. So NaN and -Infinity may not compare as expected.
|
||||
*
|
||||
* @param floatToEncode
|
||||
* @return - the encoded string
|
||||
*/
|
||||
public static String encode(float floatToEncode)
|
||||
{
|
||||
int bits = Float.floatToIntBits(floatToEncode);
|
||||
int sign = bits & FLOAT_SIGN_MASK;
|
||||
int exponent = bits & FLOAT_EXPONENT_MASK;
|
||||
int mantissa = bits & FLOAT_MANTISSA_MASK;
|
||||
if (sign != 0)
|
||||
{
|
||||
exponent ^= FLOAT_EXPONENT_MASK;
|
||||
mantissa ^= FLOAT_MANTISSA_MASK;
|
||||
}
|
||||
sign ^= FLOAT_SIGN_MASK;
|
||||
int replacement = sign | exponent | mantissa;
|
||||
return encodeToHex(replacement);
|
||||
}
|
||||
|
||||
/**
|
||||
* Encode a double into a string that orders correctly according to string
|
||||
* comparison. Note that there is no negative NaN but there are codings that
|
||||
* imply this. So NaN and -Infinity may not compare as expected.
|
||||
*
|
||||
* @param doubleToEncode
|
||||
* @return the encoded string
|
||||
*/
|
||||
public static String encode(double doubleToEncode)
|
||||
{
|
||||
long bits = Double.doubleToLongBits(doubleToEncode);
|
||||
long sign = bits & DOUBLE_SIGN_MASK;
|
||||
long exponent = bits & DOUBLE_EXPONENT_MASK;
|
||||
long mantissa = bits & DOUBLE_MANTISSA_MASK;
|
||||
if (sign != 0)
|
||||
{
|
||||
exponent ^= DOUBLE_EXPONENT_MASK;
|
||||
mantissa ^= DOUBLE_MANTISSA_MASK;
|
||||
}
|
||||
sign ^= DOUBLE_SIGN_MASK;
|
||||
long replacement = sign | exponent | mantissa;
|
||||
return encodeToHex(replacement);
|
||||
}
|
||||
|
||||
private static String encodeToHex(int i)
|
||||
{
|
||||
char[] buf = new char[] { '0', '0', '0', '0', '0', '0', '0', '0' };
|
||||
int charPos = 8;
|
||||
do
|
||||
{
|
||||
buf[--charPos] = DIGITS[i & MASK];
|
||||
i >>>= 4;
|
||||
}
|
||||
while (i != 0);
|
||||
return new String(buf);
|
||||
}
|
||||
|
||||
private static String encodeToHex(long l)
|
||||
{
|
||||
char[] buf = new char[] { '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0' };
|
||||
int charPos = 16;
|
||||
do
|
||||
{
|
||||
buf[--charPos] = DIGITS[(int) l & MASK];
|
||||
l >>>= 4;
|
||||
}
|
||||
while (l != 0);
|
||||
return new String(buf);
|
||||
}
|
||||
|
||||
private static long decodeFromHex(String hex)
|
||||
{
|
||||
long l = 0;
|
||||
long factor = 1;
|
||||
for(int i = 15; i >= 0; i--, factor <<= 4)
|
||||
{
|
||||
int digit = Character.digit(hex.charAt(i), 16);
|
||||
l += digit*factor;
|
||||
}
|
||||
return l;
|
||||
}
|
||||
|
||||
private static int decodeIntFromHex(String hex)
|
||||
{
|
||||
int l = 0;
|
||||
int factor = 1;
|
||||
for(int i = 7; i >= 0; i--, factor <<= 4)
|
||||
{
|
||||
int digit = Character.digit(hex.charAt(i), 16);
|
||||
l += digit*factor;
|
||||
}
|
||||
return l;
|
||||
}
|
||||
|
||||
|
||||
private static final char[] DIGITS = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e',
|
||||
'f' };
|
||||
|
||||
private static final int MASK = (1 << 4) - 1;
|
||||
}
|
@@ -1,215 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2005-2010 Alfresco Software Limited.
|
||||
*
|
||||
* This file is part of Alfresco
|
||||
*
|
||||
* Alfresco is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Lesser General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* Alfresco is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package org.alfresco.repo.search.impl.lucene.analysis;
|
||||
|
||||
import junit.framework.TestCase;
|
||||
|
||||
/**
|
||||
* Tests for string encoding
|
||||
* @author andyh
|
||||
*
|
||||
*/
|
||||
public class NumericEncodingTest extends TestCase
|
||||
{
|
||||
|
||||
/**
|
||||
*
|
||||
*
|
||||
*/
|
||||
public NumericEncodingTest()
|
||||
{
|
||||
super();
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @param arg0
|
||||
*/
|
||||
public NumericEncodingTest(String arg0)
|
||||
{
|
||||
super(arg0);
|
||||
}
|
||||
|
||||
/**
|
||||
* Do an exhaustive test for integers
|
||||
*
|
||||
*/
|
||||
public void xtestAllIntegerEncodings()
|
||||
{
|
||||
String lastString = null;
|
||||
String nextString = null;
|
||||
for (long i = Integer.MIN_VALUE; i <= Integer.MAX_VALUE; i++)
|
||||
{
|
||||
nextString = NumericEncoder.encode((int) i);
|
||||
if (lastString != null)
|
||||
{
|
||||
assertFalse(lastString.compareTo(nextString) > 0);
|
||||
}
|
||||
lastString = nextString;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Do an exhaustive test for float
|
||||
*
|
||||
*/
|
||||
public void xtestAllFloatEncodings()
|
||||
{
|
||||
Float last = null;
|
||||
Float next = null;
|
||||
String lastString = null;
|
||||
String nextString = null;
|
||||
|
||||
for (int sign = 1; sign >= 0; sign--)
|
||||
{
|
||||
if (sign == 0)
|
||||
{
|
||||
for (int exponent = 0; exponent <= 0xFF; exponent++)
|
||||
{
|
||||
for (int mantissa = 0; mantissa <= 0x007FFFFF; mantissa++)
|
||||
{
|
||||
int bitPattern = sign << 31 | exponent << 23 | mantissa;
|
||||
next = Float.intBitsToFloat(bitPattern);
|
||||
|
||||
if (!next.equals(Float.NaN) && (last != null) && (last.compareTo(next) > 0))
|
||||
{
|
||||
System.err.println(last + " > " + next);
|
||||
}
|
||||
if (!next.equals(Float.NaN))
|
||||
{
|
||||
nextString = NumericEncoder.encode(next);
|
||||
if ((lastString != null) && (lastString.compareTo(nextString) > 0))
|
||||
{
|
||||
System.err.println(lastString + " > " + nextString);
|
||||
}
|
||||
lastString = nextString;
|
||||
}
|
||||
last = next;
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for (int exponent = 0xFF; exponent >= 0; exponent--)
|
||||
{
|
||||
for (int mantissa = 0x007FFFFF; mantissa >= 0; mantissa--)
|
||||
{
|
||||
int bitPattern = sign << 31 | exponent << 23 | mantissa;
|
||||
next = Float.intBitsToFloat(bitPattern);
|
||||
if (!next.equals(Float.NaN) && (last != null) && (last.compareTo(next) > 0))
|
||||
{
|
||||
System.err.println(last + " > " + next);
|
||||
}
|
||||
if (!next.equals(Float.NaN))
|
||||
{
|
||||
nextString = NumericEncoder.encode(next);
|
||||
if ((lastString != null) && (lastString.compareTo(nextString) > 0))
|
||||
{
|
||||
System.err.println(lastString + " > " + nextString);
|
||||
}
|
||||
lastString = nextString;
|
||||
}
|
||||
last = next;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Sample test for int
|
||||
*/
|
||||
|
||||
public void testIntegerEncoding()
|
||||
{
|
||||
assertEquals("00000000", NumericEncoder.encode(Integer.MIN_VALUE));
|
||||
assertEquals("00000001", NumericEncoder.encode(Integer.MIN_VALUE + 1));
|
||||
assertEquals("7fffffff", NumericEncoder.encode(-1));
|
||||
assertEquals("80000000", NumericEncoder.encode(0));
|
||||
assertEquals("80000001", NumericEncoder.encode(1));
|
||||
assertEquals("fffffffe", NumericEncoder.encode(Integer.MAX_VALUE - 1));
|
||||
assertEquals("ffffffff", NumericEncoder.encode(Integer.MAX_VALUE));
|
||||
}
|
||||
|
||||
/**
|
||||
* Sample test for long
|
||||
*/
|
||||
|
||||
public void testLongEncoding()
|
||||
{
|
||||
assertEquals("0000000000000000", NumericEncoder.encode(Long.MIN_VALUE));
|
||||
assertEquals("0000000000000001", NumericEncoder.encode(Long.MIN_VALUE + 1));
|
||||
assertEquals("7fffffffffffffff", NumericEncoder.encode(-1L));
|
||||
assertEquals("8000000000000000", NumericEncoder.encode(0L));
|
||||
assertEquals("8000000000000001", NumericEncoder.encode(1L));
|
||||
assertEquals("fffffffffffffffe", NumericEncoder.encode(Long.MAX_VALUE - 1));
|
||||
assertEquals("ffffffffffffffff", NumericEncoder.encode(Long.MAX_VALUE));
|
||||
|
||||
assertEquals(NumericEncoder.decodeLong(NumericEncoder.encode(Long.MIN_VALUE)), Long.MIN_VALUE);
|
||||
assertEquals(NumericEncoder.decodeLong(NumericEncoder.encode(Long.MIN_VALUE + 1)),Long.MIN_VALUE + 1);
|
||||
assertEquals(NumericEncoder.decodeLong(NumericEncoder.encode(-1L)), -1L);
|
||||
assertEquals(NumericEncoder.decodeLong(NumericEncoder.encode(0L)), 0L);
|
||||
assertEquals(NumericEncoder.decodeLong(NumericEncoder.encode(1L)), 1L);
|
||||
assertEquals(NumericEncoder.decodeLong(NumericEncoder.encode(Long.MAX_VALUE - 1)),Long.MAX_VALUE - 1);
|
||||
assertEquals(NumericEncoder.decodeLong(NumericEncoder.encode(Long.MAX_VALUE)), Long.MAX_VALUE);
|
||||
}
|
||||
|
||||
/**
|
||||
* Sample test for float
|
||||
*/
|
||||
|
||||
public void testFloatEncoding()
|
||||
{
|
||||
assertEquals("007fffff", NumericEncoder.encode(Float.NEGATIVE_INFINITY));
|
||||
assertEquals("00800000", NumericEncoder.encode(-Float.MAX_VALUE));
|
||||
assertEquals("7ffffffe", NumericEncoder.encode(-Float.MIN_VALUE));
|
||||
assertEquals("7fffffff", NumericEncoder.encode(-0f));
|
||||
assertEquals("80000000", NumericEncoder.encode(0f));
|
||||
assertEquals("80000001", NumericEncoder.encode(Float.MIN_VALUE));
|
||||
assertEquals("ff7fffff", NumericEncoder.encode(Float.MAX_VALUE));
|
||||
assertEquals("ff800000", NumericEncoder.encode(Float.POSITIVE_INFINITY));
|
||||
assertEquals("ffc00000", NumericEncoder.encode(Float.NaN));
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Sample test for double
|
||||
*/
|
||||
|
||||
public void testDoubleEncoding()
|
||||
{
|
||||
assertEquals("000fffffffffffff", NumericEncoder.encode(Double.NEGATIVE_INFINITY));
|
||||
assertEquals("0010000000000000", NumericEncoder.encode(-Double.MAX_VALUE));
|
||||
assertEquals("7ffffffffffffffe", NumericEncoder.encode(-Double.MIN_VALUE));
|
||||
assertEquals("7fffffffffffffff", NumericEncoder.encode(-0d));
|
||||
assertEquals("8000000000000000", NumericEncoder.encode(0d));
|
||||
assertEquals("8000000000000001", NumericEncoder.encode(Double.MIN_VALUE));
|
||||
assertEquals("ffefffffffffffff", NumericEncoder.encode(Double.MAX_VALUE));
|
||||
assertEquals("fff0000000000000", NumericEncoder.encode(Double.POSITIVE_INFINITY));
|
||||
assertEquals("fff8000000000000", NumericEncoder.encode(Double.NaN));
|
||||
|
||||
assertTrue( NumericEncoder.encode(-0.9).compareTo(NumericEncoder.encode(0.88)) < 0);
|
||||
assertTrue( NumericEncoder.encode(-0.9).compareTo(NumericEncoder.encode(0.91)) < 0);
|
||||
assertTrue( NumericEncoder.encode(0.88).compareTo(NumericEncoder.encode(0.91)) < 0);
|
||||
|
||||
|
||||
|
||||
}
|
||||
}
|
@@ -1,39 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2005-2010 Alfresco Software Limited.
|
||||
*
|
||||
* This file is part of Alfresco
|
||||
*
|
||||
* Alfresco is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Lesser General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* Alfresco is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package org.alfresco.repo.search.impl.lucene.analysis;
|
||||
|
||||
import java.io.Reader;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
|
||||
/**
|
||||
* Analyse repository paths
|
||||
*
|
||||
* @author andyh
|
||||
*/
|
||||
public class PathAnalyser extends Analyzer
|
||||
{
|
||||
public TokenStream tokenStream(String fieldName, Reader reader)
|
||||
{
|
||||
return new PathTokenFilter(reader, PathTokenFilter.PATH_SEPARATOR,
|
||||
PathTokenFilter.SEPARATOR_TOKEN_TEXT, PathTokenFilter.NO_NS_TOKEN_TEXT,
|
||||
PathTokenFilter.NAMESPACE_START_DELIMITER, PathTokenFilter.NAMESPACE_END_DELIMITER, true);
|
||||
}
|
||||
}
|
@@ -1,291 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2005-2010 Alfresco Software Limited.
|
||||
*
|
||||
* This file is part of Alfresco
|
||||
*
|
||||
* Alfresco is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Lesser General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* Alfresco is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package org.alfresco.repo.search.impl.lucene.analysis;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.Reader;
|
||||
import java.text.DecimalFormat;
|
||||
import java.text.NumberFormat;
|
||||
import java.util.Iterator;
|
||||
import java.util.LinkedList;
|
||||
|
||||
import org.apache.lucene.analysis.Token;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
|
||||
/**
|
||||
* @author andyh TODO To change the template for this generated type comment go to Window - Preferences - Java - Code
|
||||
* Style - Code Templates
|
||||
*/
|
||||
public class PathTokenFilter extends Tokenizer
|
||||
{
|
||||
public final static String INTEGER_FORMAT = "0000000000";
|
||||
|
||||
public final static char PATH_SEPARATOR = ';';
|
||||
|
||||
public final static char NAMESPACE_START_DELIMITER = '{';
|
||||
|
||||
public final static char NAMESPACE_END_DELIMITER = '}';
|
||||
|
||||
public final static String SEPARATOR_TOKEN_TEXT = ";";
|
||||
|
||||
public final static String NO_NS_TOKEN_TEXT = "<No Namespace>";
|
||||
|
||||
public final static String TOKEN_TYPE_PATH_SEP = "PATH_SEPARATOR";
|
||||
|
||||
public final static String TOKEN_TYPE_PATH_LENGTH = "PATH_LENGTH";
|
||||
|
||||
public final static String TOKEN_TYPE_PATH_ELEMENT_NAME = "PATH_ELEMENT_NAME";
|
||||
|
||||
public final static String TOKEN_TYPE_PATH_ELEMENT_NAMESPACE = "PATH_ELEMENT_NAMESPACE";
|
||||
|
||||
public final static String TOKEN_TYPE_PATH_ELEMENT_NAMESPACE_PREFIX = "PATH_ELEMENT_NAMESPACE_PREFIX";
|
||||
|
||||
char pathSeparator;
|
||||
|
||||
String separatorTokenText;
|
||||
|
||||
String noNsTokenText;
|
||||
|
||||
char nsStartDelimiter;
|
||||
|
||||
int nsStartDelimiterLength;
|
||||
|
||||
char nsEndDelimiter;
|
||||
|
||||
int nsEndDelimiterLength;
|
||||
|
||||
char nsPrefixDelimiter = ':';
|
||||
|
||||
LinkedList<Token> tokens = new LinkedList<Token>();
|
||||
|
||||
Iterator<Token> it = null;
|
||||
|
||||
private boolean includeNamespace;
|
||||
|
||||
public PathTokenFilter(Reader in, char pathSeparator, String separatorTokenText, String noNsTokenText,
|
||||
char nsStartDelimiter, char nsEndDelimiter, boolean includeNameSpace)
|
||||
{
|
||||
super(in);
|
||||
this.pathSeparator = pathSeparator;
|
||||
this.separatorTokenText = separatorTokenText;
|
||||
this.noNsTokenText = noNsTokenText;
|
||||
this.nsStartDelimiter = nsStartDelimiter;
|
||||
this.nsEndDelimiter = nsEndDelimiter;
|
||||
this.includeNamespace = includeNameSpace;
|
||||
|
||||
this.nsStartDelimiterLength = 1;
|
||||
this.nsEndDelimiterLength = 1;
|
||||
|
||||
}
|
||||
|
||||
/*
|
||||
* (non-Javadoc)
|
||||
*
|
||||
* @see org.apache.lucene.analysis.TokenStream#next()
|
||||
*/
|
||||
|
||||
public Token next() throws IOException
|
||||
{
|
||||
Token nextToken;
|
||||
if (it == null)
|
||||
{
|
||||
buildTokenListAndIterator();
|
||||
}
|
||||
if (it.hasNext())
|
||||
{
|
||||
nextToken = it.next();
|
||||
}
|
||||
else
|
||||
{
|
||||
nextToken = null;
|
||||
}
|
||||
return nextToken;
|
||||
}
|
||||
|
||||
private void buildTokenListAndIterator() throws IOException
|
||||
{
|
||||
NumberFormat nf = new DecimalFormat(INTEGER_FORMAT);
|
||||
|
||||
// Could optimise to read each path ata time - not just all paths
|
||||
int insertCountAt = 0;
|
||||
int lengthCounter = 0;
|
||||
Token t;
|
||||
Token pathSplitToken = null;
|
||||
Token nameToken = null;
|
||||
Token countToken = null;
|
||||
Token namespaceToken = null;
|
||||
while ((t = nextToken()) != null)
|
||||
{
|
||||
String text = t.termText();
|
||||
|
||||
if (text.length() == 0)
|
||||
{
|
||||
continue; // Skip if we find // or /; or ;; etc
|
||||
}
|
||||
|
||||
if (text.charAt(text.length() - 1) == pathSeparator)
|
||||
{
|
||||
text = text.substring(0, text.length() - 1);
|
||||
pathSplitToken = new Token(separatorTokenText, t.startOffset(), t.endOffset(), TOKEN_TYPE_PATH_SEP);
|
||||
pathSplitToken.setPositionIncrement(1);
|
||||
|
||||
}
|
||||
|
||||
int split = -1;
|
||||
boolean isPrefix = false;
|
||||
|
||||
if ((text.length() > 0) && (text.charAt(0) == nsStartDelimiter))
|
||||
{
|
||||
split = text.indexOf(nsEndDelimiter);
|
||||
}
|
||||
|
||||
if (split == -1)
|
||||
{
|
||||
split = text.indexOf(nsPrefixDelimiter);
|
||||
isPrefix = true;
|
||||
}
|
||||
|
||||
if (split == -1)
|
||||
{
|
||||
namespaceToken = new Token(noNsTokenText, t.startOffset(), t.startOffset(),
|
||||
TOKEN_TYPE_PATH_ELEMENT_NAMESPACE);
|
||||
nameToken = new Token(text, t.startOffset(), t.endOffset(), TOKEN_TYPE_PATH_ELEMENT_NAME);
|
||||
|
||||
}
|
||||
else
|
||||
{
|
||||
if (isPrefix)
|
||||
{
|
||||
namespaceToken = new Token(text.substring(0, split), t.startOffset(), t.startOffset() + split,
|
||||
TOKEN_TYPE_PATH_ELEMENT_NAMESPACE_PREFIX);
|
||||
nameToken = new Token(text.substring(split + 1), t.startOffset() + split + 1, t.endOffset(),
|
||||
TOKEN_TYPE_PATH_ELEMENT_NAME);
|
||||
}
|
||||
else
|
||||
{
|
||||
namespaceToken = new Token(text.substring(nsStartDelimiterLength,
|
||||
(split + nsEndDelimiterLength - 1)), t.startOffset(), t.startOffset() + split,
|
||||
TOKEN_TYPE_PATH_ELEMENT_NAMESPACE);
|
||||
nameToken = new Token(text.substring(split + nsEndDelimiterLength), t.startOffset()
|
||||
+ split + nsEndDelimiterLength, t.endOffset(), TOKEN_TYPE_PATH_ELEMENT_NAME);
|
||||
}
|
||||
}
|
||||
|
||||
namespaceToken.setPositionIncrement(1);
|
||||
nameToken.setPositionIncrement(1);
|
||||
|
||||
if (includeNamespace)
|
||||
{
|
||||
if (namespaceToken.termText().equals(""))
|
||||
{
|
||||
namespaceToken = new Token(noNsTokenText, t.startOffset(), t.startOffset(),
|
||||
TOKEN_TYPE_PATH_ELEMENT_NAMESPACE);
|
||||
namespaceToken.setPositionIncrement(1);
|
||||
}
|
||||
|
||||
tokens.add(namespaceToken);
|
||||
|
||||
}
|
||||
tokens.add(nameToken);
|
||||
|
||||
lengthCounter++;
|
||||
|
||||
if (pathSplitToken != null)
|
||||
{
|
||||
|
||||
String countString = nf.format(lengthCounter);
|
||||
countToken = new Token(countString, t.startOffset(), t.endOffset(), TOKEN_TYPE_PATH_SEP);
|
||||
countToken.setPositionIncrement(1);
|
||||
|
||||
tokens.add(insertCountAt, countToken);
|
||||
tokens.add(pathSplitToken);
|
||||
|
||||
lengthCounter = 0;
|
||||
insertCountAt = tokens.size();
|
||||
|
||||
pathSplitToken = null;
|
||||
}
|
||||
}
|
||||
|
||||
String countString = nf.format(lengthCounter);
|
||||
countToken = new Token(countString, 0, 0, TOKEN_TYPE_PATH_SEP);
|
||||
countToken.setPositionIncrement(1);
|
||||
|
||||
tokens.add(insertCountAt, countToken);
|
||||
|
||||
if ((tokens.size() == 0) || !(tokens.get(tokens.size() - 1).termText().equals(TOKEN_TYPE_PATH_SEP)))
|
||||
{
|
||||
pathSplitToken = new Token(separatorTokenText, 0, 0, TOKEN_TYPE_PATH_SEP);
|
||||
pathSplitToken.setPositionIncrement(1);
|
||||
tokens.add(pathSplitToken);
|
||||
}
|
||||
|
||||
it = tokens.iterator();
|
||||
}
|
||||
|
||||
int readerPosition = 0;
|
||||
|
||||
private Token nextToken() throws IOException
|
||||
{
|
||||
if (readerPosition == -1)
|
||||
{
|
||||
return null;
|
||||
}
|
||||
StringBuilder buffer = new StringBuilder(64);
|
||||
boolean inNameSpace = false;
|
||||
int start = readerPosition;
|
||||
int current;
|
||||
char c;
|
||||
while ((current = input.read()) != -1)
|
||||
{
|
||||
c = (char) current;
|
||||
readerPosition++;
|
||||
if (c == nsStartDelimiter)
|
||||
{
|
||||
inNameSpace = true;
|
||||
}
|
||||
else if (c == nsEndDelimiter)
|
||||
{
|
||||
inNameSpace = false;
|
||||
}
|
||||
else if (!inNameSpace && (c == '/'))
|
||||
{
|
||||
return new Token(buffer.toString(), start, readerPosition - 1, "QNAME");
|
||||
}
|
||||
else if (!inNameSpace && (c == ';'))
|
||||
{
|
||||
buffer.append(c);
|
||||
return new Token(buffer.toString(), start, readerPosition, "LASTQNAME");
|
||||
}
|
||||
|
||||
buffer.append(c);
|
||||
}
|
||||
int end = readerPosition - 1;
|
||||
readerPosition = -1;
|
||||
if (!inNameSpace)
|
||||
{
|
||||
return new Token(buffer.toString(), start, end, "QNAME");
|
||||
}
|
||||
else
|
||||
{
|
||||
throw new IllegalStateException("QName terminated incorrectly: " + buffer.toString());
|
||||
}
|
||||
|
||||
}
|
||||
}
|
@@ -1,133 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2005-2010 Alfresco Software Limited.
|
||||
*
|
||||
* This file is part of Alfresco
|
||||
*
|
||||
* Alfresco is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Lesser General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* Alfresco is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package org.alfresco.repo.search.impl.lucene.analysis;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.StringReader;
|
||||
|
||||
import junit.framework.TestCase;
|
||||
|
||||
import org.apache.lucene.analysis.Token;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
|
||||
public class PathTokenFilterTest extends TestCase
|
||||
{
|
||||
|
||||
public PathTokenFilterTest()
|
||||
{
|
||||
super();
|
||||
}
|
||||
|
||||
public PathTokenFilterTest(String arg0)
|
||||
{
|
||||
super(arg0);
|
||||
}
|
||||
|
||||
|
||||
public void testFullPath() throws IOException
|
||||
{
|
||||
tokenise("{uri1}one", new String[]{"uri1", "one"});
|
||||
tokenise("/{uri1}one", new String[]{"uri1", "one"});
|
||||
tokenise("{uri1}one/{uri2}two/", new String[]{"uri1", "one", "uri2", "two"});
|
||||
tokenise("/{uri1}one/{uri2}two/", new String[]{"uri1", "one", "uri2", "two"});
|
||||
tokenise("{uri1}one/{uri2}two/{uri3}three", new String[]{"uri1", "one", "uri2", "two", "uri3", "three"});
|
||||
tokenise("/{uri1}one/{uri2}two/{uri3}three", new String[]{"uri1", "one", "uri2", "two", "uri3", "three"});
|
||||
try
|
||||
{
|
||||
tokenise("{uri1}one;{uri2}two/", new String[]{"uri1", "one", "uri2", "two"});
|
||||
}
|
||||
catch(IllegalStateException ise)
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
public void testPrefixPath() throws IOException
|
||||
{
|
||||
tokenise("uri1:one", new String[]{"uri1", "one"});
|
||||
tokenise("/uri1:one", new String[]{"uri1", "one"});
|
||||
tokenise("uri1:one/uri2:two/", new String[]{"uri1", "one", "uri2", "two"});
|
||||
tokenise("/uri1:one/uri2:two/", new String[]{"uri1", "one", "uri2", "two"});
|
||||
tokenise("uri1:one/uri2:two/uri3:three", new String[]{"uri1", "one", "uri2", "two", "uri3", "three"});
|
||||
tokenise("/uri1:one/uri2:two/uri3:three", new String[]{"uri1", "one", "uri2", "two", "uri3", "three"});
|
||||
try
|
||||
{
|
||||
tokenise("{uri1}one;{uri2}two/", new String[]{"uri1", "one", "uri2", "two"});
|
||||
}
|
||||
catch(IllegalStateException ise)
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
public void testMixedPath() throws IOException
|
||||
{
|
||||
|
||||
tokenise("{uri1}one/uri2:two/", new String[]{"uri1", "one", "uri2", "two"});
|
||||
tokenise("/{uri1}one/uri2:two/", new String[]{"uri1", "one", "uri2", "two"});
|
||||
tokenise("uri1:one/{uri2}two/uri3:three", new String[]{"uri1", "one", "uri2", "two", "uri3", "three"});
|
||||
tokenise("/uri1:one/{uri2}two/uri3:three", new String[]{"uri1", "one", "uri2", "two", "uri3", "three"});
|
||||
try
|
||||
{
|
||||
tokenise("{uri1}one;{uri2}two/", new String[]{"uri1", "one", "uri2", "two"});
|
||||
}
|
||||
catch(IllegalStateException ise)
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
private void tokenise(String path, String[] tokens) throws IOException
|
||||
{
|
||||
StringReader reader = new StringReader(path);
|
||||
TokenStream ts = new PathTokenFilter(reader, PathTokenFilter.PATH_SEPARATOR,
|
||||
PathTokenFilter.SEPARATOR_TOKEN_TEXT, PathTokenFilter.NO_NS_TOKEN_TEXT,
|
||||
PathTokenFilter.NAMESPACE_START_DELIMITER, PathTokenFilter.NAMESPACE_END_DELIMITER, true);
|
||||
Token t;
|
||||
int i = 0;
|
||||
while( (t = ts.next()) != null)
|
||||
{
|
||||
if(t.type().equals(PathTokenFilter.TOKEN_TYPE_PATH_ELEMENT_NAMESPACE))
|
||||
{
|
||||
assert(i % 2 == 0);
|
||||
assertEquals(t.termText(), tokens[i++]);
|
||||
}
|
||||
else if(t.type().equals(PathTokenFilter.TOKEN_TYPE_PATH_ELEMENT_NAMESPACE_PREFIX))
|
||||
{
|
||||
assert(i % 2 == 0);
|
||||
assertEquals(t.termText(), tokens[i++]);
|
||||
}
|
||||
else if(t.type().equals(PathTokenFilter.TOKEN_TYPE_PATH_ELEMENT_NAME))
|
||||
{
|
||||
assert(i % 2 == 1);
|
||||
assertEquals(t.termText(), tokens[i++]);
|
||||
}
|
||||
}
|
||||
if(i != tokens.length)
|
||||
{
|
||||
fail("Invalid number of tokens, found "+i+" and expected "+tokens.length);
|
||||
}
|
||||
}
|
||||
}
|
@@ -1,30 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2005-2010 Alfresco Software Limited.
|
||||
*
|
||||
* This file is part of Alfresco
|
||||
*
|
||||
* Alfresco is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Lesser General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* Alfresco is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package org.alfresco.repo.search.impl.lucene.analysis;
|
||||
|
||||
import org.apache.lucene.analysis.snowball.SnowballAnalyzer;
|
||||
|
||||
public class PorterSnowballAnalyser extends SnowballAnalyzer
|
||||
{
|
||||
|
||||
public PorterSnowballAnalyser()
|
||||
{
|
||||
super("Porter");
|
||||
}
|
||||
}
|
@@ -1,30 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2005-2010 Alfresco Software Limited.
|
||||
*
|
||||
* This file is part of Alfresco
|
||||
*
|
||||
* Alfresco is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Lesser General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* Alfresco is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package org.alfresco.repo.search.impl.lucene.analysis;
|
||||
|
||||
import org.apache.lucene.analysis.snowball.SnowballAnalyzer;
|
||||
|
||||
public class PortugueseSnowballAnalyser extends SnowballAnalyzer
|
||||
{
|
||||
|
||||
public PortugueseSnowballAnalyser()
|
||||
{
|
||||
super("Portuguese");
|
||||
}
|
||||
}
|
@@ -1,30 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2005-2010 Alfresco Software Limited.
|
||||
*
|
||||
* This file is part of Alfresco
|
||||
*
|
||||
* Alfresco is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Lesser General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* Alfresco is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package org.alfresco.repo.search.impl.lucene.analysis;
|
||||
|
||||
import org.apache.lucene.analysis.snowball.SnowballAnalyzer;
|
||||
|
||||
public class RussianSnowballAnalyser extends SnowballAnalyzer
|
||||
{
|
||||
|
||||
public RussianSnowballAnalyser()
|
||||
{
|
||||
super("Russian");
|
||||
}
|
||||
}
|
@@ -1,30 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2005-2010 Alfresco Software Limited.
|
||||
*
|
||||
* This file is part of Alfresco
|
||||
*
|
||||
* Alfresco is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Lesser General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* Alfresco is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package org.alfresco.repo.search.impl.lucene.analysis;
|
||||
|
||||
import org.apache.lucene.analysis.snowball.SnowballAnalyzer;
|
||||
|
||||
public class SpanishSnowballAnalyser extends SnowballAnalyzer
|
||||
{
|
||||
|
||||
public SpanishSnowballAnalyser()
|
||||
{
|
||||
super("Spanish");
|
||||
}
|
||||
}
|
@@ -1,30 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2005-2010 Alfresco Software Limited.
|
||||
*
|
||||
* This file is part of Alfresco
|
||||
*
|
||||
* Alfresco is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Lesser General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* Alfresco is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package org.alfresco.repo.search.impl.lucene.analysis;
|
||||
|
||||
import org.apache.lucene.analysis.snowball.SnowballAnalyzer;
|
||||
|
||||
public class SwedishSnowballAnalyser extends SnowballAnalyzer
|
||||
{
|
||||
|
||||
public SwedishSnowballAnalyser()
|
||||
{
|
||||
super("Swedish");
|
||||
}
|
||||
}
|
@@ -1,45 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2005-2010 Alfresco Software Limited.
|
||||
*
|
||||
* This file is part of Alfresco
|
||||
*
|
||||
* Alfresco is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Lesser General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* Alfresco is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package org.alfresco.repo.search.impl.lucene.analysis;
|
||||
|
||||
import java.io.Reader;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
|
||||
public class VerbatimAnalyser extends Analyzer
|
||||
{
|
||||
boolean lowerCase;
|
||||
|
||||
public VerbatimAnalyser()
|
||||
{
|
||||
lowerCase = false;
|
||||
}
|
||||
|
||||
public VerbatimAnalyser(boolean lowerCase)
|
||||
{
|
||||
super();
|
||||
this.lowerCase = lowerCase;
|
||||
}
|
||||
|
||||
public TokenStream tokenStream(String fieldName, Reader reader)
|
||||
{
|
||||
return new VerbatimTokenFilter(reader, lowerCase);
|
||||
}
|
||||
}
|
@@ -1,163 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2005-2010 Alfresco Software Limited.
|
||||
*
|
||||
* This file is part of Alfresco
|
||||
*
|
||||
* Alfresco is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Lesser General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* Alfresco is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package org.alfresco.repo.search.impl.lucene.analysis;
|
||||
|
||||
import java.io.BufferedReader;
|
||||
import java.io.IOException;
|
||||
import java.io.Reader;
|
||||
import java.util.Locale;
|
||||
|
||||
import org.alfresco.repo.search.MLAnalysisMode;
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
|
||||
public class VerbatimMLAnalayser extends Analyzer
|
||||
{
|
||||
private static Log s_logger = LogFactory.getLog(VerbatimMLAnalayser.class);
|
||||
|
||||
|
||||
private MLAnalysisMode mlAnalaysisMode;
|
||||
|
||||
public VerbatimMLAnalayser(MLAnalysisMode mlAnalaysisMode)
|
||||
{
|
||||
this.mlAnalaysisMode = mlAnalaysisMode;
|
||||
}
|
||||
|
||||
@Override
|
||||
public TokenStream tokenStream(String fieldName, Reader reader)
|
||||
{
|
||||
// We use read ahead to get the language info - if this does not exist we need to restart
|
||||
// an use the default - there foer we need mark and restore.
|
||||
|
||||
if (!(reader instanceof BufferedReader))
|
||||
{
|
||||
BufferedReader breader = new BufferedReader(reader);
|
||||
try
|
||||
{
|
||||
if (!breader.markSupported())
|
||||
{
|
||||
throw new AnalysisException(
|
||||
"Multilingual tokenisation requires a reader that supports marks and reset");
|
||||
}
|
||||
breader.mark(100);
|
||||
StringBuilder builder = new StringBuilder();
|
||||
if (breader.read() == '\u0000')
|
||||
{
|
||||
String language = "";
|
||||
String country = "";
|
||||
String varient = "";
|
||||
char c;
|
||||
int count = 0;
|
||||
while ((c = (char) breader.read()) != '\u0000')
|
||||
{
|
||||
if (count++ > 99)
|
||||
{
|
||||
breader.reset();
|
||||
return getAnalyser().tokenStream(fieldName, breader);
|
||||
}
|
||||
if (c == '_')
|
||||
{
|
||||
if (language.length() == 0)
|
||||
{
|
||||
language = builder.toString();
|
||||
}
|
||||
else if (country.length() == 0)
|
||||
{
|
||||
country = builder.toString();
|
||||
}
|
||||
else if (varient.length() == 0)
|
||||
{
|
||||
varient = builder.toString();
|
||||
}
|
||||
else
|
||||
{
|
||||
breader.reset();
|
||||
return getAnalyser().tokenStream(fieldName, breader);
|
||||
}
|
||||
builder = new StringBuilder();
|
||||
}
|
||||
else
|
||||
{
|
||||
builder.append(c);
|
||||
}
|
||||
}
|
||||
if (builder.length() > 0)
|
||||
{
|
||||
if (language.length() == 0)
|
||||
{
|
||||
language = builder.toString();
|
||||
}
|
||||
else if (country.length() == 0)
|
||||
{
|
||||
country = builder.toString();
|
||||
}
|
||||
else if (varient.length() == 0)
|
||||
{
|
||||
varient = builder.toString();
|
||||
}
|
||||
else
|
||||
{
|
||||
breader.reset();
|
||||
return getAnalyser().tokenStream(fieldName, breader);
|
||||
}
|
||||
}
|
||||
Locale locale = new Locale(language, country, varient);
|
||||
// leave the reader where it is ....
|
||||
return new MLTokenDuplicator(getAnalyser().tokenStream(fieldName, breader), locale, breader, mlAnalaysisMode);
|
||||
}
|
||||
else
|
||||
{
|
||||
breader.reset();
|
||||
return getAnalyser().tokenStream(fieldName, breader);
|
||||
}
|
||||
}
|
||||
catch (IOException io)
|
||||
{
|
||||
try
|
||||
{
|
||||
breader.reset();
|
||||
}
|
||||
catch (IOException e)
|
||||
{
|
||||
throw new AnalysisException("Failed to reset buffered reader - token stream will be invalid", e);
|
||||
}
|
||||
return getAnalyser().tokenStream(fieldName, breader);
|
||||
}
|
||||
|
||||
}
|
||||
else
|
||||
{
|
||||
throw new AnalysisException("Multilingual tokenisation requires a buffered reader");
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @return
|
||||
*/
|
||||
private Analyzer getAnalyser()
|
||||
{
|
||||
return new VerbatimAnalyser(false);
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
}
|
@@ -1,67 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2005-2010 Alfresco Software Limited.
|
||||
*
|
||||
* This file is part of Alfresco
|
||||
*
|
||||
* Alfresco is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Lesser General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* Alfresco is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package org.alfresco.repo.search.impl.lucene.analysis;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.Reader;
|
||||
|
||||
import org.apache.lucene.analysis.Token;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
|
||||
public class VerbatimTokenFilter extends Tokenizer
|
||||
{
|
||||
boolean readInput = true;
|
||||
|
||||
boolean lowerCase;
|
||||
|
||||
VerbatimTokenFilter(Reader in, boolean lowerCase)
|
||||
{
|
||||
super(in);
|
||||
this.lowerCase = lowerCase;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Token next() throws IOException
|
||||
{
|
||||
if (readInput)
|
||||
{
|
||||
readInput = false;
|
||||
StringBuilder buffer = new StringBuilder();
|
||||
int current;
|
||||
char c;
|
||||
while ((current = input.read()) != -1)
|
||||
{
|
||||
c = (char) current;
|
||||
buffer.append(c);
|
||||
}
|
||||
|
||||
String token = buffer.toString();
|
||||
if(lowerCase)
|
||||
{
|
||||
token = token.toLowerCase();
|
||||
}
|
||||
return new Token(token, 0, token.length() - 1, "VERBATIM");
|
||||
}
|
||||
else
|
||||
{
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
Reference in New Issue
Block a user