From 8105f39e33cadcb324444f69cfa66a3f2cc835e8 Mon Sep 17 00:00:00 2001 From: Andrew Hind Date: Wed, 7 Jul 2010 11:02:47 +0000 Subject: [PATCH] Move lucene analysis into the DataModel project git-svn-id: https://svn.alfresco.com/repos/alfresco-enterprise/alfresco/HEAD/root@20975 c4b6b30b-aa2e-2d43-bbcb-ca4b014f7261 --- .../repo/search/impl/lucene/AnalysisMode.java | 38 -- .../search/impl/lucene/LuceneAnalyser.java | 328 ------------------ .../analysis/AlfrescoStandardAnalyser.java | 67 ---- .../analysis/AlfrescoStandardFilter.java | 140 -------- .../lucene/analysis/AnalysisException.java | 55 --- .../lucene/analysis/CategoryAnalyser.java | 46 --- .../analysis/DanishSnowballAnalyser.java | 30 -- .../impl/lucene/analysis/DateAnalyser.java | 39 --- .../lucene/analysis/DateTimeAnalyser.java | 39 --- .../lucene/analysis/DateTimeTokenFilter.java | 191 ---------- .../impl/lucene/analysis/DateTokenFilter.java | 85 ----- .../impl/lucene/analysis/DoubleAnalyser.java | 44 --- .../lucene/analysis/DoubleTokenFilter.java | 69 ---- .../analysis/DutchSnowballAnalyser.java | 30 -- .../analysis/EnglishSnowballAnalyser.java | 30 -- .../analysis/FinnishSnowballAnalyser.java | 30 -- .../impl/lucene/analysis/FloatAnalyser.java | 43 --- .../lucene/analysis/FloatTokenFilter.java | 69 ---- .../analysis/FrenchSnowballAnalyser.java | 30 -- ...nchSnowballAnalyserThatRemovesAccents.java | 43 --- .../analysis/German2SnowballAnalyser.java | 30 -- .../analysis/GermanSnowballAnalyser.java | 30 -- .../impl/lucene/analysis/IntegerAnalyser.java | 43 --- .../lucene/analysis/IntegerTokenFilter.java | 69 ---- .../analysis/ItalianSnowballAnalyser.java | 30 -- .../lucene/analysis/KPSnowballAnalyser.java | 30 -- .../impl/lucene/analysis/LongAnalyser.java | 44 --- .../impl/lucene/analysis/LongTokenFilter.java | 69 ---- .../analysis/LovinsSnowballAnalyser.java | 30 -- .../analysis/LowerCaseVerbatimAnalyser.java | 27 -- .../impl/lucene/analysis/MLAnalayser.java | 213 ------------ .../lucene/analysis/MLTokenDuplicator.java | 148 -------- .../analysis/NorwegianSnowballAnalyser.java | 30 -- .../impl/lucene/analysis/NumericEncoder.java | 228 ------------ .../lucene/analysis/NumericEncodingTest.java | 215 ------------ .../impl/lucene/analysis/PathAnalyser.java | 39 --- .../impl/lucene/analysis/PathTokenFilter.java | 291 ---------------- .../lucene/analysis/PathTokenFilterTest.java | 133 ------- .../analysis/PorterSnowballAnalyser.java | 30 -- .../analysis/PortugueseSnowballAnalyser.java | 30 -- .../analysis/RussianSnowballAnalyser.java | 30 -- .../analysis/SpanishSnowballAnalyser.java | 30 -- .../analysis/SwedishSnowballAnalyser.java | 30 -- .../lucene/analysis/VerbatimAnalyser.java | 45 --- .../lucene/analysis/VerbatimMLAnalayser.java | 163 --------- .../lucene/analysis/VerbatimTokenFilter.java | 67 ---- 46 files changed, 3570 deletions(-) delete mode 100644 source/java/org/alfresco/repo/search/impl/lucene/AnalysisMode.java delete mode 100644 source/java/org/alfresco/repo/search/impl/lucene/LuceneAnalyser.java delete mode 100644 source/java/org/alfresco/repo/search/impl/lucene/analysis/AlfrescoStandardAnalyser.java delete mode 100644 source/java/org/alfresco/repo/search/impl/lucene/analysis/AlfrescoStandardFilter.java delete mode 100644 source/java/org/alfresco/repo/search/impl/lucene/analysis/AnalysisException.java delete mode 100644 source/java/org/alfresco/repo/search/impl/lucene/analysis/CategoryAnalyser.java delete mode 100644 source/java/org/alfresco/repo/search/impl/lucene/analysis/DanishSnowballAnalyser.java delete mode 100644 source/java/org/alfresco/repo/search/impl/lucene/analysis/DateAnalyser.java delete mode 100644 source/java/org/alfresco/repo/search/impl/lucene/analysis/DateTimeAnalyser.java delete mode 100644 source/java/org/alfresco/repo/search/impl/lucene/analysis/DateTimeTokenFilter.java delete mode 100644 source/java/org/alfresco/repo/search/impl/lucene/analysis/DateTokenFilter.java delete mode 100644 source/java/org/alfresco/repo/search/impl/lucene/analysis/DoubleAnalyser.java delete mode 100644 source/java/org/alfresco/repo/search/impl/lucene/analysis/DoubleTokenFilter.java delete mode 100644 source/java/org/alfresco/repo/search/impl/lucene/analysis/DutchSnowballAnalyser.java delete mode 100644 source/java/org/alfresco/repo/search/impl/lucene/analysis/EnglishSnowballAnalyser.java delete mode 100644 source/java/org/alfresco/repo/search/impl/lucene/analysis/FinnishSnowballAnalyser.java delete mode 100644 source/java/org/alfresco/repo/search/impl/lucene/analysis/FloatAnalyser.java delete mode 100644 source/java/org/alfresco/repo/search/impl/lucene/analysis/FloatTokenFilter.java delete mode 100644 source/java/org/alfresco/repo/search/impl/lucene/analysis/FrenchSnowballAnalyser.java delete mode 100644 source/java/org/alfresco/repo/search/impl/lucene/analysis/FrenchSnowballAnalyserThatRemovesAccents.java delete mode 100644 source/java/org/alfresco/repo/search/impl/lucene/analysis/German2SnowballAnalyser.java delete mode 100644 source/java/org/alfresco/repo/search/impl/lucene/analysis/GermanSnowballAnalyser.java delete mode 100644 source/java/org/alfresco/repo/search/impl/lucene/analysis/IntegerAnalyser.java delete mode 100644 source/java/org/alfresco/repo/search/impl/lucene/analysis/IntegerTokenFilter.java delete mode 100644 source/java/org/alfresco/repo/search/impl/lucene/analysis/ItalianSnowballAnalyser.java delete mode 100644 source/java/org/alfresco/repo/search/impl/lucene/analysis/KPSnowballAnalyser.java delete mode 100644 source/java/org/alfresco/repo/search/impl/lucene/analysis/LongAnalyser.java delete mode 100644 source/java/org/alfresco/repo/search/impl/lucene/analysis/LongTokenFilter.java delete mode 100644 source/java/org/alfresco/repo/search/impl/lucene/analysis/LovinsSnowballAnalyser.java delete mode 100644 source/java/org/alfresco/repo/search/impl/lucene/analysis/LowerCaseVerbatimAnalyser.java delete mode 100644 source/java/org/alfresco/repo/search/impl/lucene/analysis/MLAnalayser.java delete mode 100644 source/java/org/alfresco/repo/search/impl/lucene/analysis/MLTokenDuplicator.java delete mode 100644 source/java/org/alfresco/repo/search/impl/lucene/analysis/NorwegianSnowballAnalyser.java delete mode 100644 source/java/org/alfresco/repo/search/impl/lucene/analysis/NumericEncoder.java delete mode 100644 source/java/org/alfresco/repo/search/impl/lucene/analysis/NumericEncodingTest.java delete mode 100644 source/java/org/alfresco/repo/search/impl/lucene/analysis/PathAnalyser.java delete mode 100644 source/java/org/alfresco/repo/search/impl/lucene/analysis/PathTokenFilter.java delete mode 100644 source/java/org/alfresco/repo/search/impl/lucene/analysis/PathTokenFilterTest.java delete mode 100644 source/java/org/alfresco/repo/search/impl/lucene/analysis/PorterSnowballAnalyser.java delete mode 100644 source/java/org/alfresco/repo/search/impl/lucene/analysis/PortugueseSnowballAnalyser.java delete mode 100644 source/java/org/alfresco/repo/search/impl/lucene/analysis/RussianSnowballAnalyser.java delete mode 100644 source/java/org/alfresco/repo/search/impl/lucene/analysis/SpanishSnowballAnalyser.java delete mode 100644 source/java/org/alfresco/repo/search/impl/lucene/analysis/SwedishSnowballAnalyser.java delete mode 100644 source/java/org/alfresco/repo/search/impl/lucene/analysis/VerbatimAnalyser.java delete mode 100644 source/java/org/alfresco/repo/search/impl/lucene/analysis/VerbatimMLAnalayser.java delete mode 100644 source/java/org/alfresco/repo/search/impl/lucene/analysis/VerbatimTokenFilter.java diff --git a/source/java/org/alfresco/repo/search/impl/lucene/AnalysisMode.java b/source/java/org/alfresco/repo/search/impl/lucene/AnalysisMode.java deleted file mode 100644 index 375448df66..0000000000 --- a/source/java/org/alfresco/repo/search/impl/lucene/AnalysisMode.java +++ /dev/null @@ -1,38 +0,0 @@ -/* - * Copyright (C) 2005-2010 Alfresco Software Limited. - * - * This file is part of Alfresco - * - * Alfresco is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Alfresco is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with Alfresco. If not, see . - */ -package org.alfresco.repo.search.impl.lucene; - -public enum AnalysisMode -{ - DEFAULT - , - TOKENISE - , - IDENTIFIER - , - FUZZY - , - PREFIX - , - WILD - , - LIKE - ; - -} diff --git a/source/java/org/alfresco/repo/search/impl/lucene/LuceneAnalyser.java b/source/java/org/alfresco/repo/search/impl/lucene/LuceneAnalyser.java deleted file mode 100644 index 05c332879e..0000000000 --- a/source/java/org/alfresco/repo/search/impl/lucene/LuceneAnalyser.java +++ /dev/null @@ -1,328 +0,0 @@ -/* - * Copyright (C) 2005-2010 Alfresco Software Limited. - * - * This file is part of Alfresco - * - * Alfresco is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Alfresco is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with Alfresco. If not, see . - */ -package org.alfresco.repo.search.impl.lucene; - -import java.io.Reader; -import java.util.HashMap; -import java.util.Map; - -import org.alfresco.model.ContentModel; -import org.alfresco.repo.dictionary.IndexTokenisationMode; -import org.alfresco.repo.search.MLAnalysisMode; -import org.alfresco.repo.search.impl.lucene.analysis.AlfrescoStandardAnalyser; -import org.alfresco.repo.search.impl.lucene.analysis.LongAnalyser; -import org.alfresco.repo.search.impl.lucene.analysis.MLAnalayser; -import org.alfresco.repo.search.impl.lucene.analysis.PathAnalyser; -import org.alfresco.repo.search.impl.lucene.analysis.VerbatimAnalyser; -import org.alfresco.repo.search.impl.lucene.analysis.VerbatimMLAnalayser; -import org.alfresco.service.cmr.dictionary.DataTypeDefinition; -import org.alfresco.service.cmr.dictionary.DictionaryService; -import org.alfresco.service.cmr.dictionary.PropertyDefinition; -import org.alfresco.service.namespace.QName; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; -import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.WhitespaceAnalyzer; - -/** - * Analyse properties according to the property definition. The default is to use the standard tokeniser. The tokeniser - * should not have been called when indexing properties that require no tokenisation. (tokenise should be set to false - * when adding the field to the document) - * - * @author andyh - */ - -public class LuceneAnalyser extends Analyzer -{ - private static Log s_logger = LogFactory.getLog(LuceneAnalyser.class); - - // Dictinary service to look up analyser classes by data type and locale. - private DictionaryService dictionaryService; - - // If all else fails a fall back analyser - private Analyzer defaultAnalyser; - - // Cached analysers for non ML data types. - private Map analysers = new HashMap(); - - private MLAnalysisMode mlAlaysisMode; - - /** - * Constructs with a default standard analyser - * - * @param defaultAnalyzer - * Any fields not specifically defined to use a different analyzer will use the one provided here. - */ - public LuceneAnalyser(DictionaryService dictionaryService, MLAnalysisMode mlAlaysisMode) - { - this(new AlfrescoStandardAnalyser()); - this.dictionaryService = dictionaryService; - this.mlAlaysisMode = mlAlaysisMode; - } - - /** - * Constructs with default analyzer. - * - * @param defaultAnalyzer - * Any fields not specifically defined to use a different analyzer will use the one provided here. - */ - public LuceneAnalyser(Analyzer defaultAnalyser) - { - this.defaultAnalyser = defaultAnalyser; - } - - public TokenStream tokenStream(String fieldName, Reader reader, AnalysisMode analysisMode) - { - Analyzer analyser = (Analyzer) analysers.get(fieldName); - if (analyser == null) - { - analyser = findAnalyser(fieldName, analysisMode); - } - return analyser.tokenStream(fieldName, reader); - } - - public TokenStream tokenStream(String fieldName, Reader reader) - { - return tokenStream(fieldName, reader, AnalysisMode.DEFAULT); - } - - /** - * Pick the analyser from the field name - * - * @param fieldName - * @return - */ - private Analyzer findAnalyser(String fieldName, AnalysisMode analysisMode) - { - Analyzer analyser; - if (fieldName.equals("PATH")) - { - analyser = new PathAnalyser(); - } - else if (fieldName.equals("QNAME")) - { - analyser = new PathAnalyser(); - } - else if (fieldName.equals("PRIMARYASSOCTYPEQNAME")) - { - analyser = new PathAnalyser(); - } - else if (fieldName.equals("ASSOCTYPEQNAME")) - { - analyser = new PathAnalyser(); - } - else if (fieldName.equals("TYPE")) - { - throw new UnsupportedOperationException("TYPE must not be tokenised"); - } - else if (fieldName.equals("ASPECT")) - { - throw new UnsupportedOperationException("ASPECT must not be tokenised"); - } - else if (fieldName.equals("ANCESTOR")) - { - analyser = new WhitespaceAnalyzer(); - } - else if (fieldName.startsWith("@")) - { - if (fieldName.endsWith(".mimetype")) - { - analyser = new VerbatimAnalyser(); - } - else if (fieldName.endsWith(".size")) - { - analyser = new LongAnalyser(); - } - else if (fieldName.endsWith(".locale")) - { - analyser = new VerbatimAnalyser(true); - } - else - { - QName propertyQName = QName.createQName(fieldName.substring(1)); - // Temporary fix for person and user uids - - if (propertyQName.equals(ContentModel.PROP_USER_USERNAME) - || propertyQName.equals(ContentModel.PROP_USERNAME) || propertyQName.equals(ContentModel.PROP_AUTHORITY_NAME)) - { - analyser = new VerbatimAnalyser(true); - } - else - { - PropertyDefinition propertyDef = dictionaryService.getProperty(propertyQName); - IndexTokenisationMode tokenise = IndexTokenisationMode.TRUE; - if (propertyDef != null) - { - DataTypeDefinition dataType = propertyDef.getDataType(); - tokenise = propertyDef.getIndexTokenisationMode(); - if (tokenise == null) - { - tokenise = IndexTokenisationMode.TRUE; - } - switch (tokenise) - { - case TRUE: - if (dataType.getName().equals(DataTypeDefinition.CONTENT)) - { - analyser = new MLAnalayser(dictionaryService, MLAnalysisMode.ALL_ONLY); - } - else if (dataType.getName().equals(DataTypeDefinition.TEXT)) - { - analyser = new MLAnalayser(dictionaryService, MLAnalysisMode.ALL_ONLY); - } - else if (dataType.getName().equals(DataTypeDefinition.MLTEXT)) - { - analyser = new MLAnalayser(dictionaryService, mlAlaysisMode); - } - else - { - analyser = loadAnalyzer(dataType); - } - break; - case BOTH: - switch (analysisMode) - { - case DEFAULT: - case TOKENISE: - if (dataType.getName().equals(DataTypeDefinition.CONTENT)) - { - analyser = new MLAnalayser(dictionaryService, MLAnalysisMode.ALL_ONLY); - } - else if (dataType.getName().equals(DataTypeDefinition.TEXT)) - { - analyser = new MLAnalayser(dictionaryService, MLAnalysisMode.ALL_ONLY); - } - else if (dataType.getName().equals(DataTypeDefinition.MLTEXT)) - { - analyser = new MLAnalayser(dictionaryService, mlAlaysisMode); - } - else - { - analyser = loadAnalyzer(dataType); - } - break; - case IDENTIFIER: - if (dataType.getName().equals(DataTypeDefinition.MLTEXT)) - { - analyser = new VerbatimMLAnalayser(mlAlaysisMode); - } - else - { - analyser = new VerbatimAnalyser(); - } - break; - default: - throw new UnsupportedOperationException("TYPE must not be tokenised"); - } - - break; - case FALSE: - // TODO: MLText verbatim analyser - analyser = new VerbatimAnalyser(); - break; - default: - throw new UnsupportedOperationException("TYPE must not be tokenised"); - } - } - else - { - switch (analysisMode) - { - case IDENTIFIER: - analyser = new VerbatimAnalyser(); - break; - case DEFAULT: - case TOKENISE: - DataTypeDefinition dataType = dictionaryService.getDataType(DataTypeDefinition.TEXT); - analyser = loadAnalyzer(dataType); - break; - default: - throw new UnsupportedOperationException(); - } - - } - } - } - } - else - { - analyser = defaultAnalyser; - } - analysers.put(fieldName, analyser); - return analyser; - } - - - /** - * Find an instantiate an analyser. The shuld all be thread sade as Analyser.tokenStream should be re-entrant. - * - * @param dataType - * @return - */ - private Analyzer loadAnalyzer(DataTypeDefinition dataType) - { - String analyserClassName = dataType.getAnalyserClassName().trim(); - try - { - Class clazz = Class.forName(analyserClassName); - Analyzer analyser = (Analyzer) clazz.newInstance(); - if (s_logger.isDebugEnabled()) - { - s_logger.debug("Loaded " + analyserClassName + " for type " + dataType.getName()); - } - return analyser; - } - catch (ClassNotFoundException e) - { - throw new RuntimeException("Unable to load analyser for property of type " + dataType.getName() + " using " + analyserClassName); - } - catch (InstantiationException e) - { - throw new RuntimeException("Unable to load analyser for property of type " + dataType.getName() + " using " + analyserClassName); - } - catch (IllegalAccessException e) - { - throw new RuntimeException("Unable to load analyser for property of type " + dataType.getName() + " using " + analyserClassName); - } - } - - /** - * For multilingual fields we separate the tokens for each instance to break phrase queries spanning different - * languages etc. - */ - @Override - public int getPositionIncrementGap(String fieldName) - { - if (fieldName.startsWith("@") && !fieldName.endsWith(".mimetype")) - { - QName propertyQName = QName.createQName(fieldName.substring(1)); - PropertyDefinition propertyDef = dictionaryService.getProperty(propertyQName); - if (propertyDef != null) - { - if (propertyDef.getDataType().getName().equals(DataTypeDefinition.MLTEXT)) - { - return 1000; - } - } - } - return super.getPositionIncrementGap(fieldName); - } - -} diff --git a/source/java/org/alfresco/repo/search/impl/lucene/analysis/AlfrescoStandardAnalyser.java b/source/java/org/alfresco/repo/search/impl/lucene/analysis/AlfrescoStandardAnalyser.java deleted file mode 100644 index 63e0690ac3..0000000000 --- a/source/java/org/alfresco/repo/search/impl/lucene/analysis/AlfrescoStandardAnalyser.java +++ /dev/null @@ -1,67 +0,0 @@ -/* - * Copyright (C) 2005-2010 Alfresco Software Limited. - * - * This file is part of Alfresco - * - * Alfresco is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Alfresco is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with Alfresco. If not, see . - */ -package org.alfresco.repo.search.impl.lucene.analysis; - -import java.io.Reader; -import java.util.Set; - -import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.analysis.ISOLatin1AccentFilter; -import org.apache.lucene.analysis.LowerCaseFilter; -import org.apache.lucene.analysis.StopAnalyzer; -import org.apache.lucene.analysis.StopFilter; -import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.standard.StandardFilter; -import org.apache.lucene.analysis.standard.StandardTokenizer; - - -public class AlfrescoStandardAnalyser extends Analyzer -{ - private Set stopSet; - - /** - * An array containing some common English words that are usually not useful for searching. - */ - public static final String[] STOP_WORDS = StopAnalyzer.ENGLISH_STOP_WORDS; - - /** Builds an analyzer. */ - public AlfrescoStandardAnalyser() - { - this(STOP_WORDS); - } - - /** Builds an analyzer with the given stop words. */ - public AlfrescoStandardAnalyser(String[] stopWords) - { - stopSet = StopFilter.makeStopSet(stopWords); - } - - /** - * Constructs a {@link StandardTokenizer} filtered by a {@link StandardFilter}, a {@link LowerCaseFilter} and a {@link StopFilter}. - */ - public TokenStream tokenStream(String fieldName, Reader reader) - { - TokenStream result = new StandardTokenizer(reader); - result = new AlfrescoStandardFilter(result); - result = new LowerCaseFilter(result); - result = new StopFilter(result, stopSet); - result = new ISOLatin1AccentFilter(result); - return result; - } -} diff --git a/source/java/org/alfresco/repo/search/impl/lucene/analysis/AlfrescoStandardFilter.java b/source/java/org/alfresco/repo/search/impl/lucene/analysis/AlfrescoStandardFilter.java deleted file mode 100644 index fef9e7bd3f..0000000000 --- a/source/java/org/alfresco/repo/search/impl/lucene/analysis/AlfrescoStandardFilter.java +++ /dev/null @@ -1,140 +0,0 @@ -/* - * Copyright (C) 2005-2010 Alfresco Software Limited. - * - * This file is part of Alfresco - * - * Alfresco is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Alfresco is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with Alfresco. If not, see . - */ -package org.alfresco.repo.search.impl.lucene.analysis; - -import java.util.LinkedList; -import java.util.Queue; -import java.util.StringTokenizer; - -import org.apache.lucene.analysis.TokenFilter; -import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.standard.StandardAnalyzer; -import org.apache.lucene.analysis.standard.StandardTokenizer; - -public class AlfrescoStandardFilter extends TokenFilter -{ - - /** Construct filtering in. */ - public AlfrescoStandardFilter(TokenStream in) - { - super(in); - } - - private static final String APOSTROPHE_TYPE = StandardTokenizer.TOKEN_TYPES[StandardTokenizer.APOSTROPHE]; - - private static final String ACRONYM_TYPE = StandardTokenizer.TOKEN_TYPES[StandardTokenizer.ACRONYM]; - - private static final String HOST_TYPE = StandardTokenizer.TOKEN_TYPES[StandardTokenizer.HOST]; - - private static final String ALPHANUM_TYPE = StandardTokenizer.TOKEN_TYPES[StandardTokenizer.ALPHANUM]; - - private Queue hostTokens = null; - - /** - * Returns the next token in the stream, or null at EOS. - *

- * Removes 's from the end of words. - *

- * Removes dots from acronyms. - *

- * Splits host names ... - */ - public final org.apache.lucene.analysis.Token next() throws java.io.IOException - { - if (hostTokens == null) - { - org.apache.lucene.analysis.Token t = input.next(); - - if (t == null) - return null; - - String text = t.termText(); - String type = t.type(); - - if (type == APOSTROPHE_TYPE && // remove 's - (text.endsWith("'s") || text.endsWith("'S"))) - { - return new org.apache.lucene.analysis.Token(text.substring(0, text.length() - 2), t.startOffset(), t - .endOffset(), type); - - } - else if (type == ACRONYM_TYPE) - { // remove dots - StringBuffer trimmed = new StringBuffer(); - for (int i = 0; i < text.length(); i++) - { - char c = text.charAt(i); - if (c != '.') - trimmed.append(c); - } - return new org.apache.lucene.analysis.Token(trimmed.toString(), t.startOffset(), t.endOffset(), type); - - } - else if (type == HOST_TYPE) - { - // ("." )+ > - // There must be at least two tokens .... - hostTokens = new LinkedList(); - StringTokenizer tokeniser = new StringTokenizer(text, "."); - int start = t.startOffset(); - int end; - while (tokeniser.hasMoreTokens()) - { - String token = tokeniser.nextToken(); - end = start + token.length(); - hostTokens.offer(new org.apache.lucene.analysis.Token(token, start, end, ALPHANUM_TYPE)); - start = end + 1; - } - // check if we have an acronym ..... yes a.b.c ends up here ... - - if (text.length() == hostTokens.size() * 2 - 1) - { - hostTokens = null; - // acronym - StringBuffer trimmed = new StringBuffer(); - for (int i = 0; i < text.length(); i++) - { - char c = text.charAt(i); - if (c != '.') - trimmed.append(c); - } - return new org.apache.lucene.analysis.Token(trimmed.toString(), t.startOffset(), t.endOffset(), - ALPHANUM_TYPE); - } - else - { - return hostTokens.remove(); - } - } - else - { - return t; - } - } - else - { - org.apache.lucene.analysis.Token token = hostTokens.remove(); - if (hostTokens.isEmpty()) - { - hostTokens = null; - } - return token; - } - } -} diff --git a/source/java/org/alfresco/repo/search/impl/lucene/analysis/AnalysisException.java b/source/java/org/alfresco/repo/search/impl/lucene/analysis/AnalysisException.java deleted file mode 100644 index f1eac4414a..0000000000 --- a/source/java/org/alfresco/repo/search/impl/lucene/analysis/AnalysisException.java +++ /dev/null @@ -1,55 +0,0 @@ -/* - * Copyright (C) 2005-2010 Alfresco Software Limited. - * - * This file is part of Alfresco - * - * Alfresco is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Alfresco is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with Alfresco. If not, see . - */ -package org.alfresco.repo.search.impl.lucene.analysis; - -import org.alfresco.error.AlfrescoRuntimeException; - -public class AnalysisException extends AlfrescoRuntimeException -{ - - /** - * - */ - private static final long serialVersionUID = -7722380192490118459L; - - public AnalysisException(String msgId) - { - super(msgId); - // TODO Auto-generated constructor stub - } - - public AnalysisException(String msgId, Object[] msgParams) - { - super(msgId, msgParams); - // TODO Auto-generated constructor stub - } - - public AnalysisException(String msgId, Throwable cause) - { - super(msgId, cause); - // TODO Auto-generated constructor stub - } - - public AnalysisException(String msgId, Object[] msgParams, Throwable cause) - { - super(msgId, msgParams, cause); - // TODO Auto-generated constructor stub - } - -} diff --git a/source/java/org/alfresco/repo/search/impl/lucene/analysis/CategoryAnalyser.java b/source/java/org/alfresco/repo/search/impl/lucene/analysis/CategoryAnalyser.java deleted file mode 100644 index ffd198c074..0000000000 --- a/source/java/org/alfresco/repo/search/impl/lucene/analysis/CategoryAnalyser.java +++ /dev/null @@ -1,46 +0,0 @@ -/* - * Copyright (C) 2005-2010 Alfresco Software Limited. - * - * This file is part of Alfresco - * - * Alfresco is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Alfresco is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with Alfresco. If not, see . - */ -package org.alfresco.repo.search.impl.lucene.analysis; - -import java.io.Reader; - -import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.analysis.TokenStream; - -/** - * @author andyh - * - * TODO To change the template for this generated type comment go to Window - - * Preferences - Java - Code Style - Code Templates - */ -public class CategoryAnalyser extends Analyzer -{ - /* - * (non-Javadoc) - * - * @see org.apache.lucene.analysis.Analyzer#tokenStream(java.lang.String, - * java.io.Reader) - */ - public TokenStream tokenStream(String fieldName, Reader reader) - { - return new PathTokenFilter(reader, PathTokenFilter.PATH_SEPARATOR, - PathTokenFilter.SEPARATOR_TOKEN_TEXT, PathTokenFilter.NO_NS_TOKEN_TEXT, - PathTokenFilter.NAMESPACE_START_DELIMITER, PathTokenFilter.NAMESPACE_END_DELIMITER, false); - } -} diff --git a/source/java/org/alfresco/repo/search/impl/lucene/analysis/DanishSnowballAnalyser.java b/source/java/org/alfresco/repo/search/impl/lucene/analysis/DanishSnowballAnalyser.java deleted file mode 100644 index b7a273292f..0000000000 --- a/source/java/org/alfresco/repo/search/impl/lucene/analysis/DanishSnowballAnalyser.java +++ /dev/null @@ -1,30 +0,0 @@ -/* - * Copyright (C) 2005-2010 Alfresco Software Limited. - * - * This file is part of Alfresco - * - * Alfresco is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Alfresco is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with Alfresco. If not, see . - */ -package org.alfresco.repo.search.impl.lucene.analysis; - -import org.apache.lucene.analysis.snowball.SnowballAnalyzer; - -public class DanishSnowballAnalyser extends SnowballAnalyzer -{ - - public DanishSnowballAnalyser() - { - super("Danish"); - } -} diff --git a/source/java/org/alfresco/repo/search/impl/lucene/analysis/DateAnalyser.java b/source/java/org/alfresco/repo/search/impl/lucene/analysis/DateAnalyser.java deleted file mode 100644 index c6869a1797..0000000000 --- a/source/java/org/alfresco/repo/search/impl/lucene/analysis/DateAnalyser.java +++ /dev/null @@ -1,39 +0,0 @@ -/* - * Copyright (C) 2005-2010 Alfresco Software Limited. - * - * This file is part of Alfresco - * - * Alfresco is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Alfresco is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with Alfresco. If not, see . - */ -package org.alfresco.repo.search.impl.lucene.analysis; - -import java.io.Reader; - -import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.analysis.TokenStream; - -public class DateAnalyser extends Analyzer -{ - - public DateAnalyser() - { - super(); - } - - // Split at the T in the XML date form - public TokenStream tokenStream(String fieldName, Reader reader) - { - return new DateTokenFilter(reader); - } -} diff --git a/source/java/org/alfresco/repo/search/impl/lucene/analysis/DateTimeAnalyser.java b/source/java/org/alfresco/repo/search/impl/lucene/analysis/DateTimeAnalyser.java deleted file mode 100644 index 2cb7d31cc0..0000000000 --- a/source/java/org/alfresco/repo/search/impl/lucene/analysis/DateTimeAnalyser.java +++ /dev/null @@ -1,39 +0,0 @@ -/* - * Copyright (C) 2005-2010 Alfresco Software Limited. - * - * This file is part of Alfresco - * - * Alfresco is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Alfresco is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with Alfresco. If not, see . - */ -package org.alfresco.repo.search.impl.lucene.analysis; - -import java.io.Reader; - -import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.analysis.TokenStream; - -public class DateTimeAnalyser extends Analyzer -{ - - public DateTimeAnalyser() - { - super(); - } - - // Split at the T in the XML date form - public TokenStream tokenStream(String fieldName, Reader reader) - { - return new DateTimeTokenFilter(reader); - } -} diff --git a/source/java/org/alfresco/repo/search/impl/lucene/analysis/DateTimeTokenFilter.java b/source/java/org/alfresco/repo/search/impl/lucene/analysis/DateTimeTokenFilter.java deleted file mode 100644 index eea62d712f..0000000000 --- a/source/java/org/alfresco/repo/search/impl/lucene/analysis/DateTimeTokenFilter.java +++ /dev/null @@ -1,191 +0,0 @@ -/* - * Copyright (C) 2005-2010 Alfresco Software Limited. - * - * This file is part of Alfresco - * - * Alfresco is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Alfresco is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with Alfresco. If not, see . - */ -package org.alfresco.repo.search.impl.lucene.analysis; - -import java.io.IOException; -import java.io.Reader; -import java.text.ParseException; -import java.util.ArrayList; -import java.util.Calendar; -import java.util.Date; -import java.util.Iterator; - -import org.alfresco.util.CachingDateFormat; -import org.apache.lucene.analysis.Token; -import org.apache.lucene.analysis.Tokenizer; -import org.apache.lucene.analysis.WhitespaceTokenizer; - -/** - * @author andyh - */ -public class DateTimeTokenFilter extends Tokenizer -{ - Tokenizer baseTokeniser; - - Iterator tokenIterator = null; - - public DateTimeTokenFilter(Reader in) - { - super(in); - baseTokeniser = new WhitespaceTokenizer(in); - } - - public Token next() throws IOException - { - if (tokenIterator == null) - { - buildIterator(); - } - if (tokenIterator.hasNext()) - { - return tokenIterator.next(); - } - else - { - return null; - } - } - - public void buildIterator() throws IOException - { - Token candidate; - ArrayList tokens = new ArrayList(); - while ((candidate = baseTokeniser.next()) != null) - { - Date date; - if (candidate.termText().equalsIgnoreCase("now")) - { - date = new Date(); - } - else if (candidate.termText().equalsIgnoreCase("today")) - { - date = new Date(); - Calendar cal = Calendar.getInstance(); - cal.setTime(date); - cal.set(Calendar.HOUR_OF_DAY, cal.getMinimum(Calendar.HOUR_OF_DAY)); - cal.set(Calendar.MINUTE, cal.getMinimum(Calendar.MINUTE)); - cal.set(Calendar.SECOND, cal.getMinimum(Calendar.SECOND)); - cal.set(Calendar.MILLISECOND, cal.getMinimum(Calendar.MILLISECOND)); - - } - else - { - try - { - date = CachingDateFormat.lenientParse(candidate.termText()); - } - catch (ParseException e) - { - continue; - } - } - - Calendar cal = Calendar.getInstance(); - cal.setTime(date); - - Token token; - - // four digits - token = new Token("YE" + cal.get(Calendar.YEAR), candidate.startOffset(), candidate.startOffset(), candidate.type()); - tokens.add(token); - - // 2 digits - int month = cal.get(Calendar.MONTH); - if (month < 10) - { - token = new Token("MO0" + month, candidate.startOffset(), candidate.startOffset(), candidate.type()); - tokens.add(token); - } - else - { - token = new Token("MO" + month, candidate.startOffset(), candidate.startOffset(), candidate.type()); - tokens.add(token); - } - - int day = cal.get(Calendar.DAY_OF_MONTH); - if (day < 10) - { - token = new Token("DA0" + day, candidate.startOffset(), candidate.startOffset(), candidate.type()); - tokens.add(token); - } - else - { - token = new Token("DA" + day, candidate.startOffset(), candidate.startOffset(), candidate.type()); - tokens.add(token); - } - - int hour = cal.get(Calendar.HOUR_OF_DAY); - if (hour < 10) - { - token = new Token("HO0" + hour, candidate.startOffset(), candidate.startOffset(), candidate.type()); - tokens.add(token); - } - else - { - token = new Token("HO" + hour, candidate.startOffset(), candidate.startOffset(), candidate.type()); - tokens.add(token); - } - - int minute = cal.get(Calendar.MINUTE); - if (minute < 10) - { - token = new Token("MI0" + minute, candidate.startOffset(), candidate.startOffset(), candidate.type()); - tokens.add(token); - } - else - { - token = new Token("MI" + minute, candidate.startOffset(), candidate.startOffset(), candidate.type()); - tokens.add(token); - } - - int second = cal.get(Calendar.SECOND); - if (second < 10) - { - token = new Token("SE0" + second, candidate.startOffset(), candidate.startOffset(), candidate.type()); - tokens.add(token); - } - else - { - token = new Token("SE" + second, candidate.startOffset(), candidate.startOffset(), candidate.type()); - tokens.add(token); - } - - int millis = cal.get(Calendar.MILLISECOND); - if (millis < 10) - { - token = new Token("MS00" + millis, candidate.startOffset(), candidate.startOffset(), candidate.type()); - tokens.add(token); - } - else if (millis < 100) - { - token = new Token("MS0" + millis, candidate.startOffset(), candidate.startOffset(), candidate.type()); - tokens.add(token); - } - else - { - token = new Token("MS" + millis, candidate.startOffset(), candidate.startOffset(), candidate.type()); - tokens.add(token); - } - - break; - } - - tokenIterator = tokens.iterator(); - } -} \ No newline at end of file diff --git a/source/java/org/alfresco/repo/search/impl/lucene/analysis/DateTokenFilter.java b/source/java/org/alfresco/repo/search/impl/lucene/analysis/DateTokenFilter.java deleted file mode 100644 index 13caa6ae13..0000000000 --- a/source/java/org/alfresco/repo/search/impl/lucene/analysis/DateTokenFilter.java +++ /dev/null @@ -1,85 +0,0 @@ -/* - * Copyright (C) 2005-2010 Alfresco Software Limited. - * - * This file is part of Alfresco - * - * Alfresco is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Alfresco is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with Alfresco. If not, see . - */ -package org.alfresco.repo.search.impl.lucene.analysis; - -import java.io.IOException; -import java.io.Reader; -import java.text.ParseException; -import java.text.SimpleDateFormat; -import java.util.Calendar; -import java.util.Date; - -import org.alfresco.util.CachingDateFormat; -import org.apache.lucene.analysis.Token; -import org.apache.lucene.analysis.Tokenizer; -import org.apache.lucene.analysis.WhitespaceTokenizer; - -/** - * @author andyh - */ -public class DateTokenFilter extends Tokenizer -{ - Tokenizer baseTokeniser; - - public DateTokenFilter(Reader in) - { - super(in); - baseTokeniser = new WhitespaceTokenizer(in); - } - - public Token next() throws IOException - { - SimpleDateFormat dof = CachingDateFormat.getDateOnlyFormat(); - Token candidate; - while ((candidate = baseTokeniser.next()) != null) - { - Date date; - if (candidate.termText().equalsIgnoreCase("now")) - { - date = new Date(); - } - else if (candidate.termText().equalsIgnoreCase("today")) - { - date = new Date(); - Calendar cal = Calendar.getInstance(); - cal.setTime(date); - cal.set(Calendar.HOUR_OF_DAY, cal.getMinimum(Calendar.HOUR_OF_DAY)); - cal.set(Calendar.MINUTE, cal.getMinimum(Calendar.MINUTE)); - cal.set(Calendar.SECOND, cal.getMinimum(Calendar.SECOND)); - cal.set(Calendar.MILLISECOND, cal.getMinimum(Calendar.MILLISECOND)); - - } - else - { - try - { - date = CachingDateFormat.lenientParse(candidate.termText()); - } - catch (ParseException e) - { - continue; - } - } - String valueString = dof.format(date); - Token integerToken = new Token(valueString, candidate.startOffset(), candidate.startOffset(), candidate.type()); - return integerToken; - } - return null; - } -} \ No newline at end of file diff --git a/source/java/org/alfresco/repo/search/impl/lucene/analysis/DoubleAnalyser.java b/source/java/org/alfresco/repo/search/impl/lucene/analysis/DoubleAnalyser.java deleted file mode 100644 index 0c2ec82bb2..0000000000 --- a/source/java/org/alfresco/repo/search/impl/lucene/analysis/DoubleAnalyser.java +++ /dev/null @@ -1,44 +0,0 @@ -/* - * Copyright (C) 2005-2010 Alfresco Software Limited. - * - * This file is part of Alfresco - * - * Alfresco is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Alfresco is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with Alfresco. If not, see . - */ -package org.alfresco.repo.search.impl.lucene.analysis; - -/** - * Simple analyser to wrap the tokenisation of doubles. - * - * @author Andy Hind - */ -import java.io.Reader; - -import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.analysis.TokenStream; - -public class DoubleAnalyser extends Analyzer -{ - - public DoubleAnalyser() - { - super(); - } - - - public TokenStream tokenStream(String fieldName, Reader reader) - { - return new DoubleTokenFilter(reader); - } -} diff --git a/source/java/org/alfresco/repo/search/impl/lucene/analysis/DoubleTokenFilter.java b/source/java/org/alfresco/repo/search/impl/lucene/analysis/DoubleTokenFilter.java deleted file mode 100644 index 0fa7246004..0000000000 --- a/source/java/org/alfresco/repo/search/impl/lucene/analysis/DoubleTokenFilter.java +++ /dev/null @@ -1,69 +0,0 @@ -/* - * Copyright (C) 2005-2010 Alfresco Software Limited. - * - * This file is part of Alfresco - * - * Alfresco is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Alfresco is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with Alfresco. If not, see . - */ -package org.alfresco.repo.search.impl.lucene.analysis; - -import java.io.IOException; -import java.io.Reader; - -import org.apache.lucene.analysis.Token; -import org.apache.lucene.analysis.Tokenizer; -import org.apache.lucene.analysis.WhitespaceTokenizer; - -/** - * Simple tokeniser for doubles. - * - * @author Andy Hind - */ -public class DoubleTokenFilter extends Tokenizer -{ - Tokenizer baseTokeniser; - - public DoubleTokenFilter(Reader in) - { - super(in); - baseTokeniser = new WhitespaceTokenizer(in); - } - - /* - * (non-Javadoc) - * - * @see org.apache.lucene.analysis.TokenStream#next() - */ - - public Token next() throws IOException - { - Token candidate; - while((candidate = baseTokeniser.next()) != null) - { - try - { - Double d = Double.valueOf(candidate.termText()); - String valueString = NumericEncoder.encode(d.doubleValue()); - Token doubleToken = new Token(valueString, candidate.startOffset(), candidate.startOffset(), - candidate.type()); - return doubleToken; - } - catch (NumberFormatException e) - { - // just ignore and try the next one - } - } - return null; - } -} \ No newline at end of file diff --git a/source/java/org/alfresco/repo/search/impl/lucene/analysis/DutchSnowballAnalyser.java b/source/java/org/alfresco/repo/search/impl/lucene/analysis/DutchSnowballAnalyser.java deleted file mode 100644 index 4d1fab302d..0000000000 --- a/source/java/org/alfresco/repo/search/impl/lucene/analysis/DutchSnowballAnalyser.java +++ /dev/null @@ -1,30 +0,0 @@ -/* - * Copyright (C) 2005-2010 Alfresco Software Limited. - * - * This file is part of Alfresco - * - * Alfresco is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Alfresco is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with Alfresco. If not, see . - */ -package org.alfresco.repo.search.impl.lucene.analysis; - -import org.apache.lucene.analysis.snowball.SnowballAnalyzer; - -public class DutchSnowballAnalyser extends SnowballAnalyzer -{ - - public DutchSnowballAnalyser() - { - super("Dutch"); - } -} diff --git a/source/java/org/alfresco/repo/search/impl/lucene/analysis/EnglishSnowballAnalyser.java b/source/java/org/alfresco/repo/search/impl/lucene/analysis/EnglishSnowballAnalyser.java deleted file mode 100644 index 949e6dff78..0000000000 --- a/source/java/org/alfresco/repo/search/impl/lucene/analysis/EnglishSnowballAnalyser.java +++ /dev/null @@ -1,30 +0,0 @@ -/* - * Copyright (C) 2005-2010 Alfresco Software Limited. - * - * This file is part of Alfresco - * - * Alfresco is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Alfresco is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with Alfresco. If not, see . - */ -package org.alfresco.repo.search.impl.lucene.analysis; - -import org.apache.lucene.analysis.snowball.SnowballAnalyzer; - -public class EnglishSnowballAnalyser extends SnowballAnalyzer -{ - - public EnglishSnowballAnalyser() - { - super("English"); - } -} diff --git a/source/java/org/alfresco/repo/search/impl/lucene/analysis/FinnishSnowballAnalyser.java b/source/java/org/alfresco/repo/search/impl/lucene/analysis/FinnishSnowballAnalyser.java deleted file mode 100644 index 9be2e80267..0000000000 --- a/source/java/org/alfresco/repo/search/impl/lucene/analysis/FinnishSnowballAnalyser.java +++ /dev/null @@ -1,30 +0,0 @@ -/* - * Copyright (C) 2005-2010 Alfresco Software Limited. - * - * This file is part of Alfresco - * - * Alfresco is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Alfresco is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with Alfresco. If not, see . - */ -package org.alfresco.repo.search.impl.lucene.analysis; - -import org.apache.lucene.analysis.snowball.SnowballAnalyzer; - -public class FinnishSnowballAnalyser extends SnowballAnalyzer -{ - - public FinnishSnowballAnalyser() - { - super("Finnish"); - } -} diff --git a/source/java/org/alfresco/repo/search/impl/lucene/analysis/FloatAnalyser.java b/source/java/org/alfresco/repo/search/impl/lucene/analysis/FloatAnalyser.java deleted file mode 100644 index 407d602d0c..0000000000 --- a/source/java/org/alfresco/repo/search/impl/lucene/analysis/FloatAnalyser.java +++ /dev/null @@ -1,43 +0,0 @@ -/* - * Copyright (C) 2005-2010 Alfresco Software Limited. - * - * This file is part of Alfresco - * - * Alfresco is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Alfresco is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with Alfresco. If not, see . - */ -package org.alfresco.repo.search.impl.lucene.analysis; - -import java.io.Reader; - -import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.analysis.TokenStream; - -/** - * Simple analyser for floats. - * - * @author Andy Hind - */ -public class FloatAnalyser extends Analyzer -{ - - public FloatAnalyser() - { - super(); - } - - public TokenStream tokenStream(String fieldName, Reader reader) - { - return new FloatTokenFilter(reader); - } -} diff --git a/source/java/org/alfresco/repo/search/impl/lucene/analysis/FloatTokenFilter.java b/source/java/org/alfresco/repo/search/impl/lucene/analysis/FloatTokenFilter.java deleted file mode 100644 index f9de4f5624..0000000000 --- a/source/java/org/alfresco/repo/search/impl/lucene/analysis/FloatTokenFilter.java +++ /dev/null @@ -1,69 +0,0 @@ -/* - * Copyright (C) 2005-2010 Alfresco Software Limited. - * - * This file is part of Alfresco - * - * Alfresco is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Alfresco is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with Alfresco. If not, see . - */ -package org.alfresco.repo.search.impl.lucene.analysis; - -import java.io.IOException; -import java.io.Reader; - -import org.apache.lucene.analysis.Token; -import org.apache.lucene.analysis.Tokenizer; -import org.apache.lucene.analysis.WhitespaceTokenizer; - -/** - * Simple tokeniser for floats. - * - * @author Andy Hind - */ -public class FloatTokenFilter extends Tokenizer -{ - Tokenizer baseTokeniser; - - public FloatTokenFilter(Reader in) - { - super(in); - baseTokeniser = new WhitespaceTokenizer(in); - } - - /* - * (non-Javadoc) - * - * @see org.apache.lucene.analysis.TokenStream#next() - */ - - public Token next() throws IOException - { - Token candidate; - while((candidate = baseTokeniser.next()) != null) - { - try - { - Float floatValue = Float.valueOf(candidate.termText()); - String valueString = NumericEncoder.encode(floatValue.floatValue()); - Token floatToken = new Token(valueString, candidate.startOffset(), candidate.startOffset(), - candidate.type()); - return floatToken; - } - catch (NumberFormatException e) - { - // just ignore and try the next one - } - } - return null; - } -} \ No newline at end of file diff --git a/source/java/org/alfresco/repo/search/impl/lucene/analysis/FrenchSnowballAnalyser.java b/source/java/org/alfresco/repo/search/impl/lucene/analysis/FrenchSnowballAnalyser.java deleted file mode 100644 index 8a1502ae68..0000000000 --- a/source/java/org/alfresco/repo/search/impl/lucene/analysis/FrenchSnowballAnalyser.java +++ /dev/null @@ -1,30 +0,0 @@ -/* - * Copyright (C) 2005-2010 Alfresco Software Limited. - * - * This file is part of Alfresco - * - * Alfresco is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Alfresco is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with Alfresco. If not, see . - */ -package org.alfresco.repo.search.impl.lucene.analysis; - -import org.apache.lucene.analysis.snowball.SnowballAnalyzer; - -public class FrenchSnowballAnalyser extends SnowballAnalyzer -{ - - public FrenchSnowballAnalyser() - { - super("French"); - } -} diff --git a/source/java/org/alfresco/repo/search/impl/lucene/analysis/FrenchSnowballAnalyserThatRemovesAccents.java b/source/java/org/alfresco/repo/search/impl/lucene/analysis/FrenchSnowballAnalyserThatRemovesAccents.java deleted file mode 100644 index e829b10f2b..0000000000 --- a/source/java/org/alfresco/repo/search/impl/lucene/analysis/FrenchSnowballAnalyserThatRemovesAccents.java +++ /dev/null @@ -1,43 +0,0 @@ -/* - * Copyright (C) 2005-2010 Alfresco Software Limited. - * - * This file is part of Alfresco - * - * Alfresco is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Alfresco is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with Alfresco. If not, see . - */ -package org.alfresco.repo.search.impl.lucene.analysis; - -import java.io.Reader; - -import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.analysis.ISOLatin1AccentFilter; -import org.apache.lucene.analysis.TokenStream; - -public class FrenchSnowballAnalyserThatRemovesAccents extends Analyzer -{ - Analyzer analyzer = new FrenchSnowballAnalyser(); - - public FrenchSnowballAnalyserThatRemovesAccents() - { - - } - - public TokenStream tokenStream(String fieldName, Reader reader) - { - TokenStream result = analyzer.tokenStream(fieldName, reader); - result = new ISOLatin1AccentFilter(result); - return result; - } - -} diff --git a/source/java/org/alfresco/repo/search/impl/lucene/analysis/German2SnowballAnalyser.java b/source/java/org/alfresco/repo/search/impl/lucene/analysis/German2SnowballAnalyser.java deleted file mode 100644 index 3f5b0c97b0..0000000000 --- a/source/java/org/alfresco/repo/search/impl/lucene/analysis/German2SnowballAnalyser.java +++ /dev/null @@ -1,30 +0,0 @@ -/* - * Copyright (C) 2005-2010 Alfresco Software Limited. - * - * This file is part of Alfresco - * - * Alfresco is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Alfresco is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with Alfresco. If not, see . - */ -package org.alfresco.repo.search.impl.lucene.analysis; - -import org.apache.lucene.analysis.snowball.SnowballAnalyzer; - -public class German2SnowballAnalyser extends SnowballAnalyzer -{ - - public German2SnowballAnalyser() - { - super("German2"); - } -} diff --git a/source/java/org/alfresco/repo/search/impl/lucene/analysis/GermanSnowballAnalyser.java b/source/java/org/alfresco/repo/search/impl/lucene/analysis/GermanSnowballAnalyser.java deleted file mode 100644 index c7e2889256..0000000000 --- a/source/java/org/alfresco/repo/search/impl/lucene/analysis/GermanSnowballAnalyser.java +++ /dev/null @@ -1,30 +0,0 @@ -/* - * Copyright (C) 2005-2010 Alfresco Software Limited. - * - * This file is part of Alfresco - * - * Alfresco is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Alfresco is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with Alfresco. If not, see . - */ -package org.alfresco.repo.search.impl.lucene.analysis; - -import org.apache.lucene.analysis.snowball.SnowballAnalyzer; - -public class GermanSnowballAnalyser extends SnowballAnalyzer -{ - - public GermanSnowballAnalyser() - { - super("German"); - } -} diff --git a/source/java/org/alfresco/repo/search/impl/lucene/analysis/IntegerAnalyser.java b/source/java/org/alfresco/repo/search/impl/lucene/analysis/IntegerAnalyser.java deleted file mode 100644 index 2da5174099..0000000000 --- a/source/java/org/alfresco/repo/search/impl/lucene/analysis/IntegerAnalyser.java +++ /dev/null @@ -1,43 +0,0 @@ -/* - * Copyright (C) 2005-2010 Alfresco Software Limited. - * - * This file is part of Alfresco - * - * Alfresco is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Alfresco is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with Alfresco. If not, see . - */ -package org.alfresco.repo.search.impl.lucene.analysis; - -import java.io.Reader; - -import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.analysis.TokenStream; - -/** - * Simple analyser for integers. - * - * @author Andy Hind - */ -public class IntegerAnalyser extends Analyzer -{ - - public IntegerAnalyser() - { - super(); - } - - public TokenStream tokenStream(String fieldName, Reader reader) - { - return new IntegerTokenFilter(reader); - } -} diff --git a/source/java/org/alfresco/repo/search/impl/lucene/analysis/IntegerTokenFilter.java b/source/java/org/alfresco/repo/search/impl/lucene/analysis/IntegerTokenFilter.java deleted file mode 100644 index 7cf27b9770..0000000000 --- a/source/java/org/alfresco/repo/search/impl/lucene/analysis/IntegerTokenFilter.java +++ /dev/null @@ -1,69 +0,0 @@ -/* - * Copyright (C) 2005-2010 Alfresco Software Limited. - * - * This file is part of Alfresco - * - * Alfresco is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Alfresco is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with Alfresco. If not, see . - */ -package org.alfresco.repo.search.impl.lucene.analysis; - -import java.io.IOException; -import java.io.Reader; - -import org.apache.lucene.analysis.Token; -import org.apache.lucene.analysis.Tokenizer; -import org.apache.lucene.analysis.WhitespaceTokenizer; - -/** - * Simple tokeniser for integers. - * - * @author Andy Hind - */ -public class IntegerTokenFilter extends Tokenizer -{ - Tokenizer baseTokeniser; - - public IntegerTokenFilter(Reader in) - { - super(in); - baseTokeniser = new WhitespaceTokenizer(in); - } - - /* - * (non-Javadoc) - * - * @see org.apache.lucene.analysis.TokenStream#next() - */ - - public Token next() throws IOException - { - Token candidate; - while((candidate = baseTokeniser.next()) != null) - { - try - { - Integer integer = Integer.valueOf(candidate.termText()); - String valueString = NumericEncoder.encode(integer.intValue()); - Token integerToken = new Token(valueString, candidate.startOffset(), candidate.startOffset(), - candidate.type()); - return integerToken; - } - catch (NumberFormatException e) - { - // just ignore and try the next one - } - } - return null; - } -} \ No newline at end of file diff --git a/source/java/org/alfresco/repo/search/impl/lucene/analysis/ItalianSnowballAnalyser.java b/source/java/org/alfresco/repo/search/impl/lucene/analysis/ItalianSnowballAnalyser.java deleted file mode 100644 index 67d921f997..0000000000 --- a/source/java/org/alfresco/repo/search/impl/lucene/analysis/ItalianSnowballAnalyser.java +++ /dev/null @@ -1,30 +0,0 @@ -/* - * Copyright (C) 2005-2010 Alfresco Software Limited. - * - * This file is part of Alfresco - * - * Alfresco is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Alfresco is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with Alfresco. If not, see . - */ -package org.alfresco.repo.search.impl.lucene.analysis; - -import org.apache.lucene.analysis.snowball.SnowballAnalyzer; - -public class ItalianSnowballAnalyser extends SnowballAnalyzer -{ - - public ItalianSnowballAnalyser() - { - super("Italian"); - } -} diff --git a/source/java/org/alfresco/repo/search/impl/lucene/analysis/KPSnowballAnalyser.java b/source/java/org/alfresco/repo/search/impl/lucene/analysis/KPSnowballAnalyser.java deleted file mode 100644 index ee2326556a..0000000000 --- a/source/java/org/alfresco/repo/search/impl/lucene/analysis/KPSnowballAnalyser.java +++ /dev/null @@ -1,30 +0,0 @@ -/* - * Copyright (C) 2005-2010 Alfresco Software Limited. - * - * This file is part of Alfresco - * - * Alfresco is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Alfresco is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with Alfresco. If not, see . - */ -package org.alfresco.repo.search.impl.lucene.analysis; - -import org.apache.lucene.analysis.snowball.SnowballAnalyzer; - -public class KPSnowballAnalyser extends SnowballAnalyzer -{ - - public KPSnowballAnalyser() - { - super("Kp"); - } -} diff --git a/source/java/org/alfresco/repo/search/impl/lucene/analysis/LongAnalyser.java b/source/java/org/alfresco/repo/search/impl/lucene/analysis/LongAnalyser.java deleted file mode 100644 index 923be93cd0..0000000000 --- a/source/java/org/alfresco/repo/search/impl/lucene/analysis/LongAnalyser.java +++ /dev/null @@ -1,44 +0,0 @@ -/* - * Copyright (C) 2005-2010 Alfresco Software Limited. - * - * This file is part of Alfresco - * - * Alfresco is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Alfresco is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with Alfresco. If not, see . - */ -package org.alfresco.repo.search.impl.lucene.analysis; - -import java.io.Reader; - -import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.analysis.TokenStream; - -/** - * Simple analyser for longs. - * - * @author Andy Hind - */ -public class LongAnalyser extends Analyzer -{ - - public LongAnalyser() - { - super(); - } - - - public TokenStream tokenStream(String fieldName, Reader reader) - { - return new LongTokenFilter(reader); - } -} diff --git a/source/java/org/alfresco/repo/search/impl/lucene/analysis/LongTokenFilter.java b/source/java/org/alfresco/repo/search/impl/lucene/analysis/LongTokenFilter.java deleted file mode 100644 index fe0905840e..0000000000 --- a/source/java/org/alfresco/repo/search/impl/lucene/analysis/LongTokenFilter.java +++ /dev/null @@ -1,69 +0,0 @@ -/* - * Copyright (C) 2005-2010 Alfresco Software Limited. - * - * This file is part of Alfresco - * - * Alfresco is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Alfresco is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with Alfresco. If not, see . - */ -package org.alfresco.repo.search.impl.lucene.analysis; - -import java.io.IOException; -import java.io.Reader; - -import org.apache.lucene.analysis.Token; -import org.apache.lucene.analysis.Tokenizer; -import org.apache.lucene.analysis.WhitespaceTokenizer; - -/** - * Simple tokeniser for longs. - * - * @author Andy Hind - */ -public class LongTokenFilter extends Tokenizer -{ - Tokenizer baseTokeniser; - - public LongTokenFilter(Reader in) - { - super(in); - baseTokeniser = new WhitespaceTokenizer(in); - } - - /* - * (non-Javadoc) - * - * @see org.apache.lucene.analysis.TokenStream#next() - */ - - public Token next() throws IOException - { - Token candidate; - while((candidate = baseTokeniser.next()) != null) - { - try - { - Long longValue = Long.valueOf(candidate.termText()); - String valueString = NumericEncoder.encode(longValue.longValue()); - Token longToken = new Token(valueString, candidate.startOffset(), candidate.startOffset(), - candidate.type()); - return longToken; - } - catch (NumberFormatException e) - { - // just ignore and try the next one - } - } - return null; - } -} \ No newline at end of file diff --git a/source/java/org/alfresco/repo/search/impl/lucene/analysis/LovinsSnowballAnalyser.java b/source/java/org/alfresco/repo/search/impl/lucene/analysis/LovinsSnowballAnalyser.java deleted file mode 100644 index cdd0c0edf0..0000000000 --- a/source/java/org/alfresco/repo/search/impl/lucene/analysis/LovinsSnowballAnalyser.java +++ /dev/null @@ -1,30 +0,0 @@ -/* - * Copyright (C) 2005-2010 Alfresco Software Limited. - * - * This file is part of Alfresco - * - * Alfresco is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Alfresco is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with Alfresco. If not, see . - */ -package org.alfresco.repo.search.impl.lucene.analysis; - -import org.apache.lucene.analysis.snowball.SnowballAnalyzer; - -public class LovinsSnowballAnalyser extends SnowballAnalyzer -{ - - public LovinsSnowballAnalyser() - { - super("Lovins"); - } -} diff --git a/source/java/org/alfresco/repo/search/impl/lucene/analysis/LowerCaseVerbatimAnalyser.java b/source/java/org/alfresco/repo/search/impl/lucene/analysis/LowerCaseVerbatimAnalyser.java deleted file mode 100644 index 89cbef6351..0000000000 --- a/source/java/org/alfresco/repo/search/impl/lucene/analysis/LowerCaseVerbatimAnalyser.java +++ /dev/null @@ -1,27 +0,0 @@ -/* - * Copyright (C) 2005-2010 Alfresco Software Limited. - * - * This file is part of Alfresco - * - * Alfresco is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Alfresco is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with Alfresco. If not, see . - */ -package org.alfresco.repo.search.impl.lucene.analysis; - -public class LowerCaseVerbatimAnalyser extends VerbatimAnalyser -{ - public LowerCaseVerbatimAnalyser() - { - super(true); - } -} diff --git a/source/java/org/alfresco/repo/search/impl/lucene/analysis/MLAnalayser.java b/source/java/org/alfresco/repo/search/impl/lucene/analysis/MLAnalayser.java deleted file mode 100644 index 4c3c91d796..0000000000 --- a/source/java/org/alfresco/repo/search/impl/lucene/analysis/MLAnalayser.java +++ /dev/null @@ -1,213 +0,0 @@ -/* - * Copyright (C) 2005-2010 Alfresco Software Limited. - * - * This file is part of Alfresco - * - * Alfresco is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Alfresco is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with Alfresco. If not, see . - */ -package org.alfresco.repo.search.impl.lucene.analysis; - -import java.io.BufferedReader; -import java.io.IOException; -import java.io.Reader; -import java.util.HashMap; -import java.util.Locale; - -import org.springframework.extensions.surf.util.I18NUtil; -import org.alfresco.repo.search.MLAnalysisMode; -import org.alfresco.service.cmr.dictionary.DataTypeDefinition; -import org.alfresco.service.cmr.dictionary.DictionaryService; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; -import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.analysis.TokenStream; - -public class MLAnalayser extends Analyzer -{ - private static Log s_logger = LogFactory.getLog(MLAnalayser.class); - - private DictionaryService dictionaryService; - - private HashMap analysers = new HashMap(); - - private MLAnalysisMode mlAnalaysisMode; - - public MLAnalayser(DictionaryService dictionaryService, MLAnalysisMode mlAnalaysisMode) - { - this.dictionaryService = dictionaryService; - this.mlAnalaysisMode = mlAnalaysisMode; - } - - @Override - public TokenStream tokenStream(String fieldName, Reader reader) - { - // We use read ahead to get the language info - if this does not exist we need to restart - // an use the default - there foer we need mark and restore. - - if (!(reader instanceof BufferedReader)) - { - BufferedReader breader = new BufferedReader(reader); - try - { - if (!breader.markSupported()) - { - throw new AnalysisException( - "Multilingual tokenisation requires a reader that supports marks and reset"); - } - breader.mark(100); - StringBuilder builder = new StringBuilder(); - if (breader.read() == '\u0000') - { - String language = ""; - String country = ""; - String varient = ""; - char c; - int count = 0; - while ((c = (char) breader.read()) != '\u0000') - { - if (count++ > 99) - { - breader.reset(); - return getDefaultAnalyser().tokenStream(fieldName, breader); - } - if (c == '_') - { - if (language.length() == 0) - { - language = builder.toString(); - } - else if (country.length() == 0) - { - country = builder.toString(); - } - else if (varient.length() == 0) - { - varient = builder.toString(); - } - else - { - breader.reset(); - return getDefaultAnalyser().tokenStream(fieldName, breader); - } - builder = new StringBuilder(); - } - else - { - builder.append(c); - } - } - if (builder.length() > 0) - { - if (language.length() == 0) - { - language = builder.toString(); - } - else if (country.length() == 0) - { - country = builder.toString(); - } - else if (varient.length() == 0) - { - varient = builder.toString(); - } - else - { - breader.reset(); - return getDefaultAnalyser().tokenStream(fieldName, breader); - } - } - Locale locale = new Locale(language, country, varient); - // leave the reader where it is .... - return new MLTokenDuplicator(getAnalyser(locale).tokenStream(fieldName, breader), locale, breader, mlAnalaysisMode); - } - else - { - breader.reset(); - return getDefaultAnalyser().tokenStream(fieldName, breader); - } - } - catch (IOException io) - { - try - { - breader.reset(); - } - catch (IOException e) - { - throw new AnalysisException("Failed to reset buffered reader - token stream will be invalid", e); - } - return getDefaultAnalyser().tokenStream(fieldName, breader); - } - - } - else - { - throw new AnalysisException("Multilingual tokenisation requires a buffered reader"); - } - } - - private Analyzer getDefaultAnalyser() - { - return getAnalyser(I18NUtil.getLocale()); - } - - private Analyzer getAnalyser(Locale locale) - { - Analyzer analyser = (Analyzer) analysers.get(locale); - if (analyser == null) - { - analyser = findAnalyser(locale); - } - // wrap analyser to produce plain and prefixed tokens - return analyser; - } - - private Analyzer findAnalyser(Locale locale) - { - Analyzer analyser = loadAnalyzer(locale); - analysers.put(locale, analyser); - return analyser; - } - - private Analyzer loadAnalyzer(Locale locale) - { - DataTypeDefinition dataType = dictionaryService.getDataType(DataTypeDefinition.TEXT); - String analyserClassName = dataType.getAnalyserClassName(locale); - if (s_logger.isDebugEnabled()) - { - s_logger.debug("Loading " + analyserClassName + " for " + locale); - } - try - { - Class clazz = Class.forName(analyserClassName); - Analyzer analyser = (Analyzer) clazz.newInstance(); - return analyser; - } - catch (ClassNotFoundException e) - { - throw new RuntimeException("Unable to load analyser for property of type " - + dataType.getName() + " using " + analyserClassName); - } - catch (InstantiationException e) - { - throw new RuntimeException("Unable to load analyser for property of type " - + dataType.getName() + " using " + analyserClassName); - } - catch (IllegalAccessException e) - { - throw new RuntimeException("Unable to load analyser for property of type " - + dataType.getName() + " using " + analyserClassName); - } - } -} diff --git a/source/java/org/alfresco/repo/search/impl/lucene/analysis/MLTokenDuplicator.java b/source/java/org/alfresco/repo/search/impl/lucene/analysis/MLTokenDuplicator.java deleted file mode 100644 index def10a0bc0..0000000000 --- a/source/java/org/alfresco/repo/search/impl/lucene/analysis/MLTokenDuplicator.java +++ /dev/null @@ -1,148 +0,0 @@ -/* - * Copyright (C) 2005-2010 Alfresco Software Limited. - * - * This file is part of Alfresco - * - * Alfresco is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Alfresco is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with Alfresco. If not, see . - */ -package org.alfresco.repo.search.impl.lucene.analysis; - -import java.io.IOException; -import java.io.Reader; -import java.util.ArrayList; -import java.util.Collection; -import java.util.HashSet; -import java.util.Iterator; -import java.util.Locale; - -import org.alfresco.repo.search.MLAnalysisMode; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; -import org.apache.lucene.analysis.Token; -import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.Tokenizer; - -/** - * Create duplicate tokens for multilingual varients The forms are Tokens: Token - all languages {fr}Token - if a - * language is specified {fr_CA}Token - if a language and country is specified {fr_CA_Varient}Token - for all three - * {fr__Varient}Token - for a language varient with no country - * - * @author andyh - */ -public class MLTokenDuplicator extends Tokenizer -{ - private static Log s_logger = LogFactory.getLog(MLTokenDuplicator.class); - - TokenStream source; - - Locale locale; - - Iterator it; - - HashSet prefixes; - - public MLTokenDuplicator(TokenStream source, Locale locale, Reader reader, MLAnalysisMode mlAnalaysisMode) - { - super(reader); - this.source = source; - this.locale = locale; - - Collection locales = MLAnalysisMode.getLocales(mlAnalaysisMode, locale, false); - prefixes = new HashSet(locales.size()); - for(Locale toAdd : locales) - { - String localeString = toAdd.toString(); - if(localeString.length() == 0) - { - prefixes.add(""); - } - else - { - StringBuilder builder = new StringBuilder(16); - builder.append("{").append(localeString).append("}"); - prefixes.add(builder.toString()); - } - } - if(s_logger.isDebugEnabled()) - { - s_logger.debug("Locale "+ locale +" using "+mlAnalaysisMode+" is "+prefixes); - } - - } - - public MLTokenDuplicator(Locale locale, MLAnalysisMode mlAnalaysisMode) - { - this(null, locale, null, mlAnalaysisMode); - } - - @Override - public Token next() throws IOException - { - Token t = null; - if (it == null) - { - it = buildIterator(); - } - if (it == null) - { - return null; - } - if (it.hasNext()) - { - t = it.next(); - return t; - } - else - { - it = null; - t = this.next(); - return t; - } - } - - private Iterator buildIterator() throws IOException - { - Token token = source.next(); - return buildIterator(token); - - } - - - public Iterator buildIterator(Token token) - { - if (token == null) - { - return null; - } - - ArrayList tokens = new ArrayList(prefixes.size()); - for (String prefix : prefixes) - { - Token newToken = new Token(prefix + token.termText(), token.startOffset(), token.endOffset(), token.type()); - if (tokens.size() == 0) - { - newToken.setPositionIncrement(token.getPositionIncrement()); - } - else - { - newToken.setPositionIncrement(0); - } - tokens.add(newToken); - } - return tokens.iterator(); - - } - - -} diff --git a/source/java/org/alfresco/repo/search/impl/lucene/analysis/NorwegianSnowballAnalyser.java b/source/java/org/alfresco/repo/search/impl/lucene/analysis/NorwegianSnowballAnalyser.java deleted file mode 100644 index 0121208b39..0000000000 --- a/source/java/org/alfresco/repo/search/impl/lucene/analysis/NorwegianSnowballAnalyser.java +++ /dev/null @@ -1,30 +0,0 @@ -/* - * Copyright (C) 2005-2010 Alfresco Software Limited. - * - * This file is part of Alfresco - * - * Alfresco is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Alfresco is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with Alfresco. If not, see . - */ -package org.alfresco.repo.search.impl.lucene.analysis; - -import org.apache.lucene.analysis.snowball.SnowballAnalyzer; - -public class NorwegianSnowballAnalyser extends SnowballAnalyzer -{ - - public NorwegianSnowballAnalyser() - { - super("Norwegian"); - } -} diff --git a/source/java/org/alfresco/repo/search/impl/lucene/analysis/NumericEncoder.java b/source/java/org/alfresco/repo/search/impl/lucene/analysis/NumericEncoder.java deleted file mode 100644 index eac3db2a5a..0000000000 --- a/source/java/org/alfresco/repo/search/impl/lucene/analysis/NumericEncoder.java +++ /dev/null @@ -1,228 +0,0 @@ -/* - * Copyright (C) 2005-2010 Alfresco Software Limited. - * - * This file is part of Alfresco - * - * Alfresco is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Alfresco is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with Alfresco. If not, see . - */ -package org.alfresco.repo.search.impl.lucene.analysis; - -/** - * Support to encode numeric types in the lucene index. - * - * To support range queries in the lucene index numeric types need to be indexed - * specially. This has been addressed for int and long types for lucene and - * limited support (via scaling) for float and double. - * - * The implementation splits an int, long, float or double into the sign bit, - * optional exponent and mantissa either from the int or long format or its IEEE - * 754 byte representation. - * - * To index content so small negative numbers are indexed correctly and are - * after big negative numbers in range queries. - * - * The algorithm finds the sign, if the number is negative, then the mantissa - * and exponent are XORed against the appropriate masks. This reverses the - * order. As negative numbers appear first in the list their sign bit is 0 and - * positive numbers are 1. - * - * @author Andy Hind - */ -public class NumericEncoder -{ - /* - * Constants for integer encoding - */ - - static int INTEGER_SIGN_MASK = 0x80000000; - - /* - * Constants for long encoding - */ - - static long LONG_SIGN_MASK = 0x8000000000000000L; - - /* - * Constants for float encoding - */ - - static int FLOAT_SIGN_MASK = 0x80000000; - - static int FLOAT_EXPONENT_MASK = 0x7F800000; - - static int FLOAT_MANTISSA_MASK = 0x007FFFFF; - - /* - * Constants for double encoding - */ - - static long DOUBLE_SIGN_MASK = 0x8000000000000000L; - - static long DOUBLE_EXPONENT_MASK = 0x7FF0000000000000L; - - static long DOUBLE_MANTISSA_MASK = 0x000FFFFFFFFFFFFFL; - - private NumericEncoder() - { - super(); - } - - /** - * Encode an integer into a string that orders correctly using string - * comparison Integer.MIN_VALUE encodes as 00000000 and MAX_VALUE as - * ffffffff. - * - * @param intToEncode - * @return the encoded string - */ - public static String encode(int intToEncode) - { - int replacement = intToEncode ^ INTEGER_SIGN_MASK; - return encodeToHex(replacement); - } - - /** - * Encode a long into a string that orders correctly using string comparison - * Long.MIN_VALUE encodes as 0000000000000000 and MAX_VALUE as - * ffffffffffffffff. - * - * @param longToEncode - * @return - the encoded string - */ - public static String encode(long longToEncode) - { - long replacement = longToEncode ^ LONG_SIGN_MASK; - return encodeToHex(replacement); - } - - /** - * Secode a long - * @param hex - * @return - the decoded string - */ - public static long decodeLong(String hex) - { - return decodeFromHex(hex) ^ LONG_SIGN_MASK; - } - - - public static int decodeInt(String hex) - { - return decodeIntFromHex(hex) ^ INTEGER_SIGN_MASK; - } - - /** - * Encode a float into a string that orders correctly according to string - * comparison. Note that there is no negative NaN but there are codings that - * imply this. So NaN and -Infinity may not compare as expected. - * - * @param floatToEncode - * @return - the encoded string - */ - public static String encode(float floatToEncode) - { - int bits = Float.floatToIntBits(floatToEncode); - int sign = bits & FLOAT_SIGN_MASK; - int exponent = bits & FLOAT_EXPONENT_MASK; - int mantissa = bits & FLOAT_MANTISSA_MASK; - if (sign != 0) - { - exponent ^= FLOAT_EXPONENT_MASK; - mantissa ^= FLOAT_MANTISSA_MASK; - } - sign ^= FLOAT_SIGN_MASK; - int replacement = sign | exponent | mantissa; - return encodeToHex(replacement); - } - - /** - * Encode a double into a string that orders correctly according to string - * comparison. Note that there is no negative NaN but there are codings that - * imply this. So NaN and -Infinity may not compare as expected. - * - * @param doubleToEncode - * @return the encoded string - */ - public static String encode(double doubleToEncode) - { - long bits = Double.doubleToLongBits(doubleToEncode); - long sign = bits & DOUBLE_SIGN_MASK; - long exponent = bits & DOUBLE_EXPONENT_MASK; - long mantissa = bits & DOUBLE_MANTISSA_MASK; - if (sign != 0) - { - exponent ^= DOUBLE_EXPONENT_MASK; - mantissa ^= DOUBLE_MANTISSA_MASK; - } - sign ^= DOUBLE_SIGN_MASK; - long replacement = sign | exponent | mantissa; - return encodeToHex(replacement); - } - - private static String encodeToHex(int i) - { - char[] buf = new char[] { '0', '0', '0', '0', '0', '0', '0', '0' }; - int charPos = 8; - do - { - buf[--charPos] = DIGITS[i & MASK]; - i >>>= 4; - } - while (i != 0); - return new String(buf); - } - - private static String encodeToHex(long l) - { - char[] buf = new char[] { '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0' }; - int charPos = 16; - do - { - buf[--charPos] = DIGITS[(int) l & MASK]; - l >>>= 4; - } - while (l != 0); - return new String(buf); - } - - private static long decodeFromHex(String hex) - { - long l = 0; - long factor = 1; - for(int i = 15; i >= 0; i--, factor <<= 4) - { - int digit = Character.digit(hex.charAt(i), 16); - l += digit*factor; - } - return l; - } - - private static int decodeIntFromHex(String hex) - { - int l = 0; - int factor = 1; - for(int i = 7; i >= 0; i--, factor <<= 4) - { - int digit = Character.digit(hex.charAt(i), 16); - l += digit*factor; - } - return l; - } - - - private static final char[] DIGITS = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', - 'f' }; - - private static final int MASK = (1 << 4) - 1; -} diff --git a/source/java/org/alfresco/repo/search/impl/lucene/analysis/NumericEncodingTest.java b/source/java/org/alfresco/repo/search/impl/lucene/analysis/NumericEncodingTest.java deleted file mode 100644 index 62e7b28401..0000000000 --- a/source/java/org/alfresco/repo/search/impl/lucene/analysis/NumericEncodingTest.java +++ /dev/null @@ -1,215 +0,0 @@ -/* - * Copyright (C) 2005-2010 Alfresco Software Limited. - * - * This file is part of Alfresco - * - * Alfresco is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Alfresco is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with Alfresco. If not, see . - */ -package org.alfresco.repo.search.impl.lucene.analysis; - -import junit.framework.TestCase; - -/** - * Tests for string encoding - * @author andyh - * - */ -public class NumericEncodingTest extends TestCase -{ - - /** - * - * - */ - public NumericEncodingTest() - { - super(); - } - - /** - * - * @param arg0 - */ - public NumericEncodingTest(String arg0) - { - super(arg0); - } - - /** - * Do an exhaustive test for integers - * - */ - public void xtestAllIntegerEncodings() - { - String lastString = null; - String nextString = null; - for (long i = Integer.MIN_VALUE; i <= Integer.MAX_VALUE; i++) - { - nextString = NumericEncoder.encode((int) i); - if (lastString != null) - { - assertFalse(lastString.compareTo(nextString) > 0); - } - lastString = nextString; - } - } - - /** - * Do an exhaustive test for float - * - */ - public void xtestAllFloatEncodings() - { - Float last = null; - Float next = null; - String lastString = null; - String nextString = null; - - for (int sign = 1; sign >= 0; sign--) - { - if (sign == 0) - { - for (int exponent = 0; exponent <= 0xFF; exponent++) - { - for (int mantissa = 0; mantissa <= 0x007FFFFF; mantissa++) - { - int bitPattern = sign << 31 | exponent << 23 | mantissa; - next = Float.intBitsToFloat(bitPattern); - - if (!next.equals(Float.NaN) && (last != null) && (last.compareTo(next) > 0)) - { - System.err.println(last + " > " + next); - } - if (!next.equals(Float.NaN)) - { - nextString = NumericEncoder.encode(next); - if ((lastString != null) && (lastString.compareTo(nextString) > 0)) - { - System.err.println(lastString + " > " + nextString); - } - lastString = nextString; - } - last = next; - - } - } - } - else - { - for (int exponent = 0xFF; exponent >= 0; exponent--) - { - for (int mantissa = 0x007FFFFF; mantissa >= 0; mantissa--) - { - int bitPattern = sign << 31 | exponent << 23 | mantissa; - next = Float.intBitsToFloat(bitPattern); - if (!next.equals(Float.NaN) && (last != null) && (last.compareTo(next) > 0)) - { - System.err.println(last + " > " + next); - } - if (!next.equals(Float.NaN)) - { - nextString = NumericEncoder.encode(next); - if ((lastString != null) && (lastString.compareTo(nextString) > 0)) - { - System.err.println(lastString + " > " + nextString); - } - lastString = nextString; - } - last = next; - } - } - } - } - } - - /** - * Sample test for int - */ - - public void testIntegerEncoding() - { - assertEquals("00000000", NumericEncoder.encode(Integer.MIN_VALUE)); - assertEquals("00000001", NumericEncoder.encode(Integer.MIN_VALUE + 1)); - assertEquals("7fffffff", NumericEncoder.encode(-1)); - assertEquals("80000000", NumericEncoder.encode(0)); - assertEquals("80000001", NumericEncoder.encode(1)); - assertEquals("fffffffe", NumericEncoder.encode(Integer.MAX_VALUE - 1)); - assertEquals("ffffffff", NumericEncoder.encode(Integer.MAX_VALUE)); - } - - /** - * Sample test for long - */ - - public void testLongEncoding() - { - assertEquals("0000000000000000", NumericEncoder.encode(Long.MIN_VALUE)); - assertEquals("0000000000000001", NumericEncoder.encode(Long.MIN_VALUE + 1)); - assertEquals("7fffffffffffffff", NumericEncoder.encode(-1L)); - assertEquals("8000000000000000", NumericEncoder.encode(0L)); - assertEquals("8000000000000001", NumericEncoder.encode(1L)); - assertEquals("fffffffffffffffe", NumericEncoder.encode(Long.MAX_VALUE - 1)); - assertEquals("ffffffffffffffff", NumericEncoder.encode(Long.MAX_VALUE)); - - assertEquals(NumericEncoder.decodeLong(NumericEncoder.encode(Long.MIN_VALUE)), Long.MIN_VALUE); - assertEquals(NumericEncoder.decodeLong(NumericEncoder.encode(Long.MIN_VALUE + 1)),Long.MIN_VALUE + 1); - assertEquals(NumericEncoder.decodeLong(NumericEncoder.encode(-1L)), -1L); - assertEquals(NumericEncoder.decodeLong(NumericEncoder.encode(0L)), 0L); - assertEquals(NumericEncoder.decodeLong(NumericEncoder.encode(1L)), 1L); - assertEquals(NumericEncoder.decodeLong(NumericEncoder.encode(Long.MAX_VALUE - 1)),Long.MAX_VALUE - 1); - assertEquals(NumericEncoder.decodeLong(NumericEncoder.encode(Long.MAX_VALUE)), Long.MAX_VALUE); - } - - /** - * Sample test for float - */ - - public void testFloatEncoding() - { - assertEquals("007fffff", NumericEncoder.encode(Float.NEGATIVE_INFINITY)); - assertEquals("00800000", NumericEncoder.encode(-Float.MAX_VALUE)); - assertEquals("7ffffffe", NumericEncoder.encode(-Float.MIN_VALUE)); - assertEquals("7fffffff", NumericEncoder.encode(-0f)); - assertEquals("80000000", NumericEncoder.encode(0f)); - assertEquals("80000001", NumericEncoder.encode(Float.MIN_VALUE)); - assertEquals("ff7fffff", NumericEncoder.encode(Float.MAX_VALUE)); - assertEquals("ff800000", NumericEncoder.encode(Float.POSITIVE_INFINITY)); - assertEquals("ffc00000", NumericEncoder.encode(Float.NaN)); - - } - - /** - * Sample test for double - */ - - public void testDoubleEncoding() - { - assertEquals("000fffffffffffff", NumericEncoder.encode(Double.NEGATIVE_INFINITY)); - assertEquals("0010000000000000", NumericEncoder.encode(-Double.MAX_VALUE)); - assertEquals("7ffffffffffffffe", NumericEncoder.encode(-Double.MIN_VALUE)); - assertEquals("7fffffffffffffff", NumericEncoder.encode(-0d)); - assertEquals("8000000000000000", NumericEncoder.encode(0d)); - assertEquals("8000000000000001", NumericEncoder.encode(Double.MIN_VALUE)); - assertEquals("ffefffffffffffff", NumericEncoder.encode(Double.MAX_VALUE)); - assertEquals("fff0000000000000", NumericEncoder.encode(Double.POSITIVE_INFINITY)); - assertEquals("fff8000000000000", NumericEncoder.encode(Double.NaN)); - - assertTrue( NumericEncoder.encode(-0.9).compareTo(NumericEncoder.encode(0.88)) < 0); - assertTrue( NumericEncoder.encode(-0.9).compareTo(NumericEncoder.encode(0.91)) < 0); - assertTrue( NumericEncoder.encode(0.88).compareTo(NumericEncoder.encode(0.91)) < 0); - - - - } -} diff --git a/source/java/org/alfresco/repo/search/impl/lucene/analysis/PathAnalyser.java b/source/java/org/alfresco/repo/search/impl/lucene/analysis/PathAnalyser.java deleted file mode 100644 index d8d9b099ee..0000000000 --- a/source/java/org/alfresco/repo/search/impl/lucene/analysis/PathAnalyser.java +++ /dev/null @@ -1,39 +0,0 @@ -/* - * Copyright (C) 2005-2010 Alfresco Software Limited. - * - * This file is part of Alfresco - * - * Alfresco is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Alfresco is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with Alfresco. If not, see . - */ -package org.alfresco.repo.search.impl.lucene.analysis; - -import java.io.Reader; - -import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.analysis.TokenStream; - -/** - * Analyse repository paths - * - * @author andyh - */ -public class PathAnalyser extends Analyzer -{ - public TokenStream tokenStream(String fieldName, Reader reader) - { - return new PathTokenFilter(reader, PathTokenFilter.PATH_SEPARATOR, - PathTokenFilter.SEPARATOR_TOKEN_TEXT, PathTokenFilter.NO_NS_TOKEN_TEXT, - PathTokenFilter.NAMESPACE_START_DELIMITER, PathTokenFilter.NAMESPACE_END_DELIMITER, true); - } -} \ No newline at end of file diff --git a/source/java/org/alfresco/repo/search/impl/lucene/analysis/PathTokenFilter.java b/source/java/org/alfresco/repo/search/impl/lucene/analysis/PathTokenFilter.java deleted file mode 100644 index 8f27077106..0000000000 --- a/source/java/org/alfresco/repo/search/impl/lucene/analysis/PathTokenFilter.java +++ /dev/null @@ -1,291 +0,0 @@ -/* - * Copyright (C) 2005-2010 Alfresco Software Limited. - * - * This file is part of Alfresco - * - * Alfresco is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Alfresco is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with Alfresco. If not, see . - */ -package org.alfresco.repo.search.impl.lucene.analysis; - -import java.io.IOException; -import java.io.Reader; -import java.text.DecimalFormat; -import java.text.NumberFormat; -import java.util.Iterator; -import java.util.LinkedList; - -import org.apache.lucene.analysis.Token; -import org.apache.lucene.analysis.Tokenizer; - -/** - * @author andyh TODO To change the template for this generated type comment go to Window - Preferences - Java - Code - * Style - Code Templates - */ -public class PathTokenFilter extends Tokenizer -{ - public final static String INTEGER_FORMAT = "0000000000"; - - public final static char PATH_SEPARATOR = ';'; - - public final static char NAMESPACE_START_DELIMITER = '{'; - - public final static char NAMESPACE_END_DELIMITER = '}'; - - public final static String SEPARATOR_TOKEN_TEXT = ";"; - - public final static String NO_NS_TOKEN_TEXT = ""; - - public final static String TOKEN_TYPE_PATH_SEP = "PATH_SEPARATOR"; - - public final static String TOKEN_TYPE_PATH_LENGTH = "PATH_LENGTH"; - - public final static String TOKEN_TYPE_PATH_ELEMENT_NAME = "PATH_ELEMENT_NAME"; - - public final static String TOKEN_TYPE_PATH_ELEMENT_NAMESPACE = "PATH_ELEMENT_NAMESPACE"; - - public final static String TOKEN_TYPE_PATH_ELEMENT_NAMESPACE_PREFIX = "PATH_ELEMENT_NAMESPACE_PREFIX"; - - char pathSeparator; - - String separatorTokenText; - - String noNsTokenText; - - char nsStartDelimiter; - - int nsStartDelimiterLength; - - char nsEndDelimiter; - - int nsEndDelimiterLength; - - char nsPrefixDelimiter = ':'; - - LinkedList tokens = new LinkedList(); - - Iterator it = null; - - private boolean includeNamespace; - - public PathTokenFilter(Reader in, char pathSeparator, String separatorTokenText, String noNsTokenText, - char nsStartDelimiter, char nsEndDelimiter, boolean includeNameSpace) - { - super(in); - this.pathSeparator = pathSeparator; - this.separatorTokenText = separatorTokenText; - this.noNsTokenText = noNsTokenText; - this.nsStartDelimiter = nsStartDelimiter; - this.nsEndDelimiter = nsEndDelimiter; - this.includeNamespace = includeNameSpace; - - this.nsStartDelimiterLength = 1; - this.nsEndDelimiterLength = 1; - - } - - /* - * (non-Javadoc) - * - * @see org.apache.lucene.analysis.TokenStream#next() - */ - - public Token next() throws IOException - { - Token nextToken; - if (it == null) - { - buildTokenListAndIterator(); - } - if (it.hasNext()) - { - nextToken = it.next(); - } - else - { - nextToken = null; - } - return nextToken; - } - - private void buildTokenListAndIterator() throws IOException - { - NumberFormat nf = new DecimalFormat(INTEGER_FORMAT); - - // Could optimise to read each path ata time - not just all paths - int insertCountAt = 0; - int lengthCounter = 0; - Token t; - Token pathSplitToken = null; - Token nameToken = null; - Token countToken = null; - Token namespaceToken = null; - while ((t = nextToken()) != null) - { - String text = t.termText(); - - if (text.length() == 0) - { - continue; // Skip if we find // or /; or ;; etc - } - - if (text.charAt(text.length() - 1) == pathSeparator) - { - text = text.substring(0, text.length() - 1); - pathSplitToken = new Token(separatorTokenText, t.startOffset(), t.endOffset(), TOKEN_TYPE_PATH_SEP); - pathSplitToken.setPositionIncrement(1); - - } - - int split = -1; - boolean isPrefix = false; - - if ((text.length() > 0) && (text.charAt(0) == nsStartDelimiter)) - { - split = text.indexOf(nsEndDelimiter); - } - - if (split == -1) - { - split = text.indexOf(nsPrefixDelimiter); - isPrefix = true; - } - - if (split == -1) - { - namespaceToken = new Token(noNsTokenText, t.startOffset(), t.startOffset(), - TOKEN_TYPE_PATH_ELEMENT_NAMESPACE); - nameToken = new Token(text, t.startOffset(), t.endOffset(), TOKEN_TYPE_PATH_ELEMENT_NAME); - - } - else - { - if (isPrefix) - { - namespaceToken = new Token(text.substring(0, split), t.startOffset(), t.startOffset() + split, - TOKEN_TYPE_PATH_ELEMENT_NAMESPACE_PREFIX); - nameToken = new Token(text.substring(split + 1), t.startOffset() + split + 1, t.endOffset(), - TOKEN_TYPE_PATH_ELEMENT_NAME); - } - else - { - namespaceToken = new Token(text.substring(nsStartDelimiterLength, - (split + nsEndDelimiterLength - 1)), t.startOffset(), t.startOffset() + split, - TOKEN_TYPE_PATH_ELEMENT_NAMESPACE); - nameToken = new Token(text.substring(split + nsEndDelimiterLength), t.startOffset() - + split + nsEndDelimiterLength, t.endOffset(), TOKEN_TYPE_PATH_ELEMENT_NAME); - } - } - - namespaceToken.setPositionIncrement(1); - nameToken.setPositionIncrement(1); - - if (includeNamespace) - { - if (namespaceToken.termText().equals("")) - { - namespaceToken = new Token(noNsTokenText, t.startOffset(), t.startOffset(), - TOKEN_TYPE_PATH_ELEMENT_NAMESPACE); - namespaceToken.setPositionIncrement(1); - } - - tokens.add(namespaceToken); - - } - tokens.add(nameToken); - - lengthCounter++; - - if (pathSplitToken != null) - { - - String countString = nf.format(lengthCounter); - countToken = new Token(countString, t.startOffset(), t.endOffset(), TOKEN_TYPE_PATH_SEP); - countToken.setPositionIncrement(1); - - tokens.add(insertCountAt, countToken); - tokens.add(pathSplitToken); - - lengthCounter = 0; - insertCountAt = tokens.size(); - - pathSplitToken = null; - } - } - - String countString = nf.format(lengthCounter); - countToken = new Token(countString, 0, 0, TOKEN_TYPE_PATH_SEP); - countToken.setPositionIncrement(1); - - tokens.add(insertCountAt, countToken); - - if ((tokens.size() == 0) || !(tokens.get(tokens.size() - 1).termText().equals(TOKEN_TYPE_PATH_SEP))) - { - pathSplitToken = new Token(separatorTokenText, 0, 0, TOKEN_TYPE_PATH_SEP); - pathSplitToken.setPositionIncrement(1); - tokens.add(pathSplitToken); - } - - it = tokens.iterator(); - } - - int readerPosition = 0; - - private Token nextToken() throws IOException - { - if (readerPosition == -1) - { - return null; - } - StringBuilder buffer = new StringBuilder(64); - boolean inNameSpace = false; - int start = readerPosition; - int current; - char c; - while ((current = input.read()) != -1) - { - c = (char) current; - readerPosition++; - if (c == nsStartDelimiter) - { - inNameSpace = true; - } - else if (c == nsEndDelimiter) - { - inNameSpace = false; - } - else if (!inNameSpace && (c == '/')) - { - return new Token(buffer.toString(), start, readerPosition - 1, "QNAME"); - } - else if (!inNameSpace && (c == ';')) - { - buffer.append(c); - return new Token(buffer.toString(), start, readerPosition, "LASTQNAME"); - } - - buffer.append(c); - } - int end = readerPosition - 1; - readerPosition = -1; - if (!inNameSpace) - { - return new Token(buffer.toString(), start, end, "QNAME"); - } - else - { - throw new IllegalStateException("QName terminated incorrectly: " + buffer.toString()); - } - - } -} \ No newline at end of file diff --git a/source/java/org/alfresco/repo/search/impl/lucene/analysis/PathTokenFilterTest.java b/source/java/org/alfresco/repo/search/impl/lucene/analysis/PathTokenFilterTest.java deleted file mode 100644 index 11e5532809..0000000000 --- a/source/java/org/alfresco/repo/search/impl/lucene/analysis/PathTokenFilterTest.java +++ /dev/null @@ -1,133 +0,0 @@ -/* - * Copyright (C) 2005-2010 Alfresco Software Limited. - * - * This file is part of Alfresco - * - * Alfresco is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Alfresco is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with Alfresco. If not, see . - */ -package org.alfresco.repo.search.impl.lucene.analysis; - -import java.io.IOException; -import java.io.StringReader; - -import junit.framework.TestCase; - -import org.apache.lucene.analysis.Token; -import org.apache.lucene.analysis.TokenStream; - -public class PathTokenFilterTest extends TestCase -{ - - public PathTokenFilterTest() - { - super(); - } - - public PathTokenFilterTest(String arg0) - { - super(arg0); - } - - - public void testFullPath() throws IOException - { - tokenise("{uri1}one", new String[]{"uri1", "one"}); - tokenise("/{uri1}one", new String[]{"uri1", "one"}); - tokenise("{uri1}one/{uri2}two/", new String[]{"uri1", "one", "uri2", "two"}); - tokenise("/{uri1}one/{uri2}two/", new String[]{"uri1", "one", "uri2", "two"}); - tokenise("{uri1}one/{uri2}two/{uri3}three", new String[]{"uri1", "one", "uri2", "two", "uri3", "three"}); - tokenise("/{uri1}one/{uri2}two/{uri3}three", new String[]{"uri1", "one", "uri2", "two", "uri3", "three"}); - try - { - tokenise("{uri1}one;{uri2}two/", new String[]{"uri1", "one", "uri2", "two"}); - } - catch(IllegalStateException ise) - { - - } - - } - - - public void testPrefixPath() throws IOException - { - tokenise("uri1:one", new String[]{"uri1", "one"}); - tokenise("/uri1:one", new String[]{"uri1", "one"}); - tokenise("uri1:one/uri2:two/", new String[]{"uri1", "one", "uri2", "two"}); - tokenise("/uri1:one/uri2:two/", new String[]{"uri1", "one", "uri2", "two"}); - tokenise("uri1:one/uri2:two/uri3:three", new String[]{"uri1", "one", "uri2", "two", "uri3", "three"}); - tokenise("/uri1:one/uri2:two/uri3:three", new String[]{"uri1", "one", "uri2", "two", "uri3", "three"}); - try - { - tokenise("{uri1}one;{uri2}two/", new String[]{"uri1", "one", "uri2", "two"}); - } - catch(IllegalStateException ise) - { - - } - - } - - - public void testMixedPath() throws IOException - { - - tokenise("{uri1}one/uri2:two/", new String[]{"uri1", "one", "uri2", "two"}); - tokenise("/{uri1}one/uri2:two/", new String[]{"uri1", "one", "uri2", "two"}); - tokenise("uri1:one/{uri2}two/uri3:three", new String[]{"uri1", "one", "uri2", "two", "uri3", "three"}); - tokenise("/uri1:one/{uri2}two/uri3:three", new String[]{"uri1", "one", "uri2", "two", "uri3", "three"}); - try - { - tokenise("{uri1}one;{uri2}two/", new String[]{"uri1", "one", "uri2", "two"}); - } - catch(IllegalStateException ise) - { - - } - - } - - - private void tokenise(String path, String[] tokens) throws IOException - { - StringReader reader = new StringReader(path); - TokenStream ts = new PathTokenFilter(reader, PathTokenFilter.PATH_SEPARATOR, - PathTokenFilter.SEPARATOR_TOKEN_TEXT, PathTokenFilter.NO_NS_TOKEN_TEXT, - PathTokenFilter.NAMESPACE_START_DELIMITER, PathTokenFilter.NAMESPACE_END_DELIMITER, true); - Token t; - int i = 0; - while( (t = ts.next()) != null) - { - if(t.type().equals(PathTokenFilter.TOKEN_TYPE_PATH_ELEMENT_NAMESPACE)) - { - assert(i % 2 == 0); - assertEquals(t.termText(), tokens[i++]); - } - else if(t.type().equals(PathTokenFilter.TOKEN_TYPE_PATH_ELEMENT_NAMESPACE_PREFIX)) - { - assert(i % 2 == 0); - assertEquals(t.termText(), tokens[i++]); - } - else if(t.type().equals(PathTokenFilter.TOKEN_TYPE_PATH_ELEMENT_NAME)) - { - assert(i % 2 == 1); - assertEquals(t.termText(), tokens[i++]); - } - } - if(i != tokens.length) - { - fail("Invalid number of tokens, found "+i+" and expected "+tokens.length); - } - } -} diff --git a/source/java/org/alfresco/repo/search/impl/lucene/analysis/PorterSnowballAnalyser.java b/source/java/org/alfresco/repo/search/impl/lucene/analysis/PorterSnowballAnalyser.java deleted file mode 100644 index fc914ab96e..0000000000 --- a/source/java/org/alfresco/repo/search/impl/lucene/analysis/PorterSnowballAnalyser.java +++ /dev/null @@ -1,30 +0,0 @@ -/* - * Copyright (C) 2005-2010 Alfresco Software Limited. - * - * This file is part of Alfresco - * - * Alfresco is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Alfresco is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with Alfresco. If not, see . - */ -package org.alfresco.repo.search.impl.lucene.analysis; - -import org.apache.lucene.analysis.snowball.SnowballAnalyzer; - -public class PorterSnowballAnalyser extends SnowballAnalyzer -{ - - public PorterSnowballAnalyser() - { - super("Porter"); - } -} diff --git a/source/java/org/alfresco/repo/search/impl/lucene/analysis/PortugueseSnowballAnalyser.java b/source/java/org/alfresco/repo/search/impl/lucene/analysis/PortugueseSnowballAnalyser.java deleted file mode 100644 index ae138758d5..0000000000 --- a/source/java/org/alfresco/repo/search/impl/lucene/analysis/PortugueseSnowballAnalyser.java +++ /dev/null @@ -1,30 +0,0 @@ -/* - * Copyright (C) 2005-2010 Alfresco Software Limited. - * - * This file is part of Alfresco - * - * Alfresco is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Alfresco is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with Alfresco. If not, see . - */ -package org.alfresco.repo.search.impl.lucene.analysis; - -import org.apache.lucene.analysis.snowball.SnowballAnalyzer; - -public class PortugueseSnowballAnalyser extends SnowballAnalyzer -{ - - public PortugueseSnowballAnalyser() - { - super("Portuguese"); - } -} diff --git a/source/java/org/alfresco/repo/search/impl/lucene/analysis/RussianSnowballAnalyser.java b/source/java/org/alfresco/repo/search/impl/lucene/analysis/RussianSnowballAnalyser.java deleted file mode 100644 index 20426e900a..0000000000 --- a/source/java/org/alfresco/repo/search/impl/lucene/analysis/RussianSnowballAnalyser.java +++ /dev/null @@ -1,30 +0,0 @@ -/* - * Copyright (C) 2005-2010 Alfresco Software Limited. - * - * This file is part of Alfresco - * - * Alfresco is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Alfresco is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with Alfresco. If not, see . - */ -package org.alfresco.repo.search.impl.lucene.analysis; - -import org.apache.lucene.analysis.snowball.SnowballAnalyzer; - -public class RussianSnowballAnalyser extends SnowballAnalyzer -{ - - public RussianSnowballAnalyser() - { - super("Russian"); - } -} diff --git a/source/java/org/alfresco/repo/search/impl/lucene/analysis/SpanishSnowballAnalyser.java b/source/java/org/alfresco/repo/search/impl/lucene/analysis/SpanishSnowballAnalyser.java deleted file mode 100644 index 07c9f4f260..0000000000 --- a/source/java/org/alfresco/repo/search/impl/lucene/analysis/SpanishSnowballAnalyser.java +++ /dev/null @@ -1,30 +0,0 @@ -/* - * Copyright (C) 2005-2010 Alfresco Software Limited. - * - * This file is part of Alfresco - * - * Alfresco is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Alfresco is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with Alfresco. If not, see . - */ -package org.alfresco.repo.search.impl.lucene.analysis; - -import org.apache.lucene.analysis.snowball.SnowballAnalyzer; - -public class SpanishSnowballAnalyser extends SnowballAnalyzer -{ - - public SpanishSnowballAnalyser() - { - super("Spanish"); - } -} diff --git a/source/java/org/alfresco/repo/search/impl/lucene/analysis/SwedishSnowballAnalyser.java b/source/java/org/alfresco/repo/search/impl/lucene/analysis/SwedishSnowballAnalyser.java deleted file mode 100644 index c6090d30ed..0000000000 --- a/source/java/org/alfresco/repo/search/impl/lucene/analysis/SwedishSnowballAnalyser.java +++ /dev/null @@ -1,30 +0,0 @@ -/* - * Copyright (C) 2005-2010 Alfresco Software Limited. - * - * This file is part of Alfresco - * - * Alfresco is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Alfresco is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with Alfresco. If not, see . - */ -package org.alfresco.repo.search.impl.lucene.analysis; - -import org.apache.lucene.analysis.snowball.SnowballAnalyzer; - -public class SwedishSnowballAnalyser extends SnowballAnalyzer -{ - - public SwedishSnowballAnalyser() - { - super("Swedish"); - } -} diff --git a/source/java/org/alfresco/repo/search/impl/lucene/analysis/VerbatimAnalyser.java b/source/java/org/alfresco/repo/search/impl/lucene/analysis/VerbatimAnalyser.java deleted file mode 100644 index d1d8b44236..0000000000 --- a/source/java/org/alfresco/repo/search/impl/lucene/analysis/VerbatimAnalyser.java +++ /dev/null @@ -1,45 +0,0 @@ -/* - * Copyright (C) 2005-2010 Alfresco Software Limited. - * - * This file is part of Alfresco - * - * Alfresco is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Alfresco is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with Alfresco. If not, see . - */ -package org.alfresco.repo.search.impl.lucene.analysis; - -import java.io.Reader; - -import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.analysis.TokenStream; - -public class VerbatimAnalyser extends Analyzer -{ - boolean lowerCase; - - public VerbatimAnalyser() - { - lowerCase = false; - } - - public VerbatimAnalyser(boolean lowerCase) - { - super(); - this.lowerCase = lowerCase; - } - - public TokenStream tokenStream(String fieldName, Reader reader) - { - return new VerbatimTokenFilter(reader, lowerCase); - } -} diff --git a/source/java/org/alfresco/repo/search/impl/lucene/analysis/VerbatimMLAnalayser.java b/source/java/org/alfresco/repo/search/impl/lucene/analysis/VerbatimMLAnalayser.java deleted file mode 100644 index 804633fb40..0000000000 --- a/source/java/org/alfresco/repo/search/impl/lucene/analysis/VerbatimMLAnalayser.java +++ /dev/null @@ -1,163 +0,0 @@ -/* - * Copyright (C) 2005-2010 Alfresco Software Limited. - * - * This file is part of Alfresco - * - * Alfresco is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Alfresco is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with Alfresco. If not, see . - */ -package org.alfresco.repo.search.impl.lucene.analysis; - -import java.io.BufferedReader; -import java.io.IOException; -import java.io.Reader; -import java.util.Locale; - -import org.alfresco.repo.search.MLAnalysisMode; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; -import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.analysis.TokenStream; - -public class VerbatimMLAnalayser extends Analyzer -{ - private static Log s_logger = LogFactory.getLog(VerbatimMLAnalayser.class); - - - private MLAnalysisMode mlAnalaysisMode; - - public VerbatimMLAnalayser(MLAnalysisMode mlAnalaysisMode) - { - this.mlAnalaysisMode = mlAnalaysisMode; - } - - @Override - public TokenStream tokenStream(String fieldName, Reader reader) - { - // We use read ahead to get the language info - if this does not exist we need to restart - // an use the default - there foer we need mark and restore. - - if (!(reader instanceof BufferedReader)) - { - BufferedReader breader = new BufferedReader(reader); - try - { - if (!breader.markSupported()) - { - throw new AnalysisException( - "Multilingual tokenisation requires a reader that supports marks and reset"); - } - breader.mark(100); - StringBuilder builder = new StringBuilder(); - if (breader.read() == '\u0000') - { - String language = ""; - String country = ""; - String varient = ""; - char c; - int count = 0; - while ((c = (char) breader.read()) != '\u0000') - { - if (count++ > 99) - { - breader.reset(); - return getAnalyser().tokenStream(fieldName, breader); - } - if (c == '_') - { - if (language.length() == 0) - { - language = builder.toString(); - } - else if (country.length() == 0) - { - country = builder.toString(); - } - else if (varient.length() == 0) - { - varient = builder.toString(); - } - else - { - breader.reset(); - return getAnalyser().tokenStream(fieldName, breader); - } - builder = new StringBuilder(); - } - else - { - builder.append(c); - } - } - if (builder.length() > 0) - { - if (language.length() == 0) - { - language = builder.toString(); - } - else if (country.length() == 0) - { - country = builder.toString(); - } - else if (varient.length() == 0) - { - varient = builder.toString(); - } - else - { - breader.reset(); - return getAnalyser().tokenStream(fieldName, breader); - } - } - Locale locale = new Locale(language, country, varient); - // leave the reader where it is .... - return new MLTokenDuplicator(getAnalyser().tokenStream(fieldName, breader), locale, breader, mlAnalaysisMode); - } - else - { - breader.reset(); - return getAnalyser().tokenStream(fieldName, breader); - } - } - catch (IOException io) - { - try - { - breader.reset(); - } - catch (IOException e) - { - throw new AnalysisException("Failed to reset buffered reader - token stream will be invalid", e); - } - return getAnalyser().tokenStream(fieldName, breader); - } - - } - else - { - throw new AnalysisException("Multilingual tokenisation requires a buffered reader"); - } - } - - /** - * @return - */ - private Analyzer getAnalyser() - { - return new VerbatimAnalyser(false); - } - - - - -} diff --git a/source/java/org/alfresco/repo/search/impl/lucene/analysis/VerbatimTokenFilter.java b/source/java/org/alfresco/repo/search/impl/lucene/analysis/VerbatimTokenFilter.java deleted file mode 100644 index 93f442ebb1..0000000000 --- a/source/java/org/alfresco/repo/search/impl/lucene/analysis/VerbatimTokenFilter.java +++ /dev/null @@ -1,67 +0,0 @@ -/* - * Copyright (C) 2005-2010 Alfresco Software Limited. - * - * This file is part of Alfresco - * - * Alfresco is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Alfresco is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with Alfresco. If not, see . - */ -package org.alfresco.repo.search.impl.lucene.analysis; - -import java.io.IOException; -import java.io.Reader; - -import org.apache.lucene.analysis.Token; -import org.apache.lucene.analysis.Tokenizer; - -public class VerbatimTokenFilter extends Tokenizer -{ - boolean readInput = true; - - boolean lowerCase; - - VerbatimTokenFilter(Reader in, boolean lowerCase) - { - super(in); - this.lowerCase = lowerCase; - } - - @Override - public Token next() throws IOException - { - if (readInput) - { - readInput = false; - StringBuilder buffer = new StringBuilder(); - int current; - char c; - while ((current = input.read()) != -1) - { - c = (char) current; - buffer.append(c); - } - - String token = buffer.toString(); - if(lowerCase) - { - token = token.toLowerCase(); - } - return new Token(token, 0, token.length() - 1, "VERBATIM"); - } - else - { - return null; - } - } - -}