From 6bda204c4df4c0730f7c74f179826dc426eba6e4 Mon Sep 17 00:00:00 2001 From: Andrew Hind Date: Mon, 15 Dec 2008 16:08:30 +0000 Subject: [PATCH] Add sorting support for text and mltext git-svn-id: https://svn.alfresco.com/repos/alfresco-enterprise/alfresco/HEAD/root@12395 c4b6b30b-aa2e-2d43-bbcb-ca4b014f7261 --- config/alfresco/model/contentModel.xml | 15 ++ .../alfresco/repo/search/MLAnalysisMode.java | 3 +- .../impl/lucene/ADMLuceneIndexerImpl.java | 207 +++++++++++++++--- .../impl/lucene/ADMLuceneSearcherImpl.java | 133 ++++++++--- .../search/impl/lucene/ADMLuceneTest.java | 171 ++++++++++++--- .../search/impl/lucene/LuceneTest_model.xml | 20 ++ 6 files changed, 462 insertions(+), 87 deletions(-) diff --git a/config/alfresco/model/contentModel.xml b/config/alfresco/model/contentModel.xml index ed673bc70c..099bbc3f9c 100644 --- a/config/alfresco/model/contentModel.xml +++ b/config/alfresco/model/contentModel.xml @@ -32,6 +32,11 @@ Name d:text true + + true + false + both + @@ -373,10 +378,20 @@ Title d:mltext + + true + false + both + Description d:mltext + + true + false + both + diff --git a/source/java/org/alfresco/repo/search/MLAnalysisMode.java b/source/java/org/alfresco/repo/search/MLAnalysisMode.java index e703276727..4b28107fe5 100644 --- a/source/java/org/alfresco/repo/search/MLAnalysisMode.java +++ b/source/java/org/alfresco/repo/search/MLAnalysisMode.java @@ -27,6 +27,7 @@ package org.alfresco.repo.search; import java.util.Collection; import java.util.HashSet; import java.util.Locale; +import java.util.Set; import org.alfresco.error.AlfrescoRuntimeException; @@ -720,7 +721,7 @@ public enum MLAnalysisMode public abstract boolean includesExactCountryMatch(); - public static Collection getLocales(MLAnalysisMode mlAnalaysisMode, Locale locale, boolean withWildcards) + public static Set getLocales(MLAnalysisMode mlAnalaysisMode, Locale locale, boolean withWildcards) { HashSet locales = new HashSet(); diff --git a/source/java/org/alfresco/repo/search/impl/lucene/ADMLuceneIndexerImpl.java b/source/java/org/alfresco/repo/search/impl/lucene/ADMLuceneIndexerImpl.java index 88c97c87e7..50a276a874 100644 --- a/source/java/org/alfresco/repo/search/impl/lucene/ADMLuceneIndexerImpl.java +++ b/source/java/org/alfresco/repo/search/impl/lucene/ADMLuceneIndexerImpl.java @@ -50,7 +50,10 @@ import org.alfresco.repo.content.MimetypeMap; import org.alfresco.repo.content.transform.ContentTransformer; import org.alfresco.repo.dictionary.IndexTokenisationMode; import org.alfresco.repo.search.IndexerException; +import org.alfresco.repo.search.MLAnalysisMode; import org.alfresco.repo.search.impl.lucene.analysis.DateTimeAnalyser; +import org.alfresco.repo.search.impl.lucene.analysis.MLTokenDuplicator; +import org.alfresco.repo.search.impl.lucene.analysis.VerbatimAnalyser; import org.alfresco.repo.search.impl.lucene.fts.FTSIndexerAware; import org.alfresco.repo.search.impl.lucene.fts.FullTextSearchIndexer; import org.alfresco.repo.tenant.TenantService; @@ -89,6 +92,7 @@ import org.apache.lucene.index.TermDocs; import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.Hits; import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.Query; import org.apache.lucene.search.Searcher; import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.BooleanClause.Occur; @@ -393,7 +397,7 @@ public class ADMLuceneIndexerImpl extends AbstractLuceneIndexerImpl imp indexer.initialise(storeRef, deltaId); return indexer; } - + public static ADMLuceneNoActionIndexerImpl getNoActionIndexer(StoreRef storeRef, String deltaId, LuceneConfig config) throws LuceneIndexException { if (s_logger.isDebugEnabled()) @@ -814,6 +818,8 @@ public class ADMLuceneIndexerImpl extends AbstractLuceneIndexerImpl imp if (isContent) { + // Content is always tokenised + ContentData contentData = DefaultTypeConverter.INSTANCE.convert(ContentData.class, serializableValue); if (!index || contentData.getMimetype() == null) { @@ -909,17 +915,6 @@ public class ADMLuceneIndexerImpl extends AbstractLuceneIndexerImpl imp if (readerReady) { InputStreamReader isr = null; - // InputStream ris = reader.getContentInputStream(); - // try - // { - // isr = new InputStreamReader(ris, "UTF-8"); - // } - // catch (UnsupportedEncodingException e) - // { - // isr = new InputStreamReader(ris); - // } - // doc.add(new Field("TEXT", isr, Field.TermVector.NO)); - InputStream ris = reader.getReader().getContentInputStream(); try { @@ -984,9 +979,85 @@ public class ADMLuceneIndexerImpl extends AbstractLuceneIndexerImpl imp for (Locale locale : mlText.getLocales()) { String localeString = mlText.getValue(locale); - StringBuilder builder = new StringBuilder(); - builder.append("\u0000").append(locale.toString()).append("\u0000").append(localeString); - doc.add(new Field(attributeName, builder.toString(), fieldStore, fieldIndex, Field.TermVector.NO)); + StringBuilder builder; + MLAnalysisMode analysisMode; + VerbatimAnalyser vba; + MLTokenDuplicator duplicator; + Token t; + switch (tokenise) + { + case TRUE: + builder = new StringBuilder(); + builder.append("\u0000").append(locale.toString()).append("\u0000").append(localeString); + doc.add(new Field(attributeName, builder.toString(), fieldStore, fieldIndex, Field.TermVector.NO)); + break; + case FALSE: + // analyse ml text + analysisMode = getLuceneConfig().getDefaultMLIndexAnalysisMode(); + // Do the analysis here + vba = new VerbatimAnalyser(false); + duplicator = new MLTokenDuplicator(vba.tokenStream(attributeName, new StringReader(localeString)), locale, null, analysisMode); + try + { + while ((t = duplicator.next()) != null) + { + String localeText = ""; + if (t.termText().indexOf('{') == 0) + { + int end = t.termText().indexOf('}', 1); + if (end != -1) + { + localeText = t.termText().substring(1, end); + } + } + if (localeText.length() > 0) + { + doc.add(new Field(attributeName + "." + localeText + ".sort", t.termText(), Field.Store.NO, Field.Index.NO_NORMS, Field.TermVector.NO)); + } + } + } + catch (IOException e) + { + // TODO ?? + } + + break; + case BOTH: + builder = new StringBuilder(); + builder.append("\u0000").append(locale.toString()).append("\u0000").append(localeString); + doc.add(new Field(attributeName, builder.toString(), fieldStore, fieldIndex, Field.TermVector.NO)); + + // analyse ml text + analysisMode = getLuceneConfig().getDefaultMLIndexAnalysisMode(); + // Do the analysis here + vba = new VerbatimAnalyser(false); + duplicator = new MLTokenDuplicator(vba.tokenStream(attributeName, new StringReader(localeString)), locale, null, analysisMode); + try + { + while ((t = duplicator.next()) != null) + { + String localeText = ""; + if (t.termText().indexOf('{') == 0) + { + int end = t.termText().indexOf('}', 1); + if (end != -1) + { + localeText = t.termText().substring(1, end); + } + } + if (localeText.length() > 0) + { + doc.add(new Field(attributeName + "." + localeText + ".sort", t.termText(), Field.Store.NO, Field.Index.NO_NORMS, Field.TermVector.NO)); + } + } + } + catch (IOException e) + { + // TODO ?? + } + + break; + } } } else if (isText) @@ -1013,6 +1084,10 @@ public class ADMLuceneIndexerImpl extends AbstractLuceneIndexerImpl imp locale = I18NUtil.getLocale(); } StringBuilder builder; + MLAnalysisMode analysisMode; + VerbatimAnalyser vba; + MLTokenDuplicator duplicator; + Token t; switch (tokenise) { default: @@ -1020,37 +1095,109 @@ public class ADMLuceneIndexerImpl extends AbstractLuceneIndexerImpl imp builder = new StringBuilder(); builder.append("\u0000").append(locale.toString()).append("\u0000").append(strValue); doc.add(new Field(attributeName, builder.toString(), fieldStore, fieldIndex, Field.TermVector.NO)); - break; case FALSE: - doc.add(new Field(attributeName, strValue, fieldStore, fieldIndex, Field.TermVector.NO)); + analysisMode = getLuceneConfig().getDefaultMLIndexAnalysisMode(); + // Do the analysis here + vba = new VerbatimAnalyser(false); + duplicator = new MLTokenDuplicator(vba.tokenStream(attributeName, new StringReader(strValue)), locale, null, analysisMode); + try + { + while ((t = duplicator.next()) != null) + { + String localeText = ""; + if (t.termText().indexOf('{') == 0) + { + int end = t.termText().indexOf('}', 1); + if (end != -1) + { + localeText = t.termText().substring(1, end); + } + } + if (localeText.length() > 0) + { + doc.add(new Field(attributeName + "." + localeText + ".sort", t.termText(), Field.Store.NO, Field.Index.NO_NORMS, Field.TermVector.NO)); + } + } + } + catch (IOException e) + { + // TODO ?? + } + break; case BOTH: builder = new StringBuilder(); builder.append("\u0000").append(locale.toString()).append("\u0000").append(strValue); doc.add(new Field(attributeName, builder.toString(), fieldStore, fieldIndex, Field.TermVector.NO)); - doc.add(new Field(attributeName, strValue, fieldStore, fieldIndex, Field.TermVector.NO)); + analysisMode = getLuceneConfig().getDefaultMLIndexAnalysisMode(); + // Do the analysis here + vba = new VerbatimAnalyser(false); + duplicator = new MLTokenDuplicator(vba.tokenStream(attributeName, new StringReader(strValue)), locale, null, analysisMode); + try + { + while ((t = duplicator.next()) != null) + { + String localeText = ""; + if (t.termText().indexOf('{') == 0) + { + int end = t.termText().indexOf('}', 1); + if (end != -1) + { + localeText = t.termText().substring(1, end); + } + } + if (localeText.length() > 0) + { + doc.add(new Field(attributeName + "." + localeText + ".sort", t.termText(), Field.Store.NO, Field.Index.NO_NORMS, Field.TermVector.NO)); + } + } + } + catch (IOException e) + { + // TODO ?? + } break; } } else if (isDateTime) { - doc.add(new Field(attributeName, strValue, fieldStore, fieldIndex, Field.TermVector.NO)); - - SimpleDateFormat df = CachingDateFormat.getDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSS", true); - + SimpleDateFormat df; Date date; - try + switch (tokenise) { - date = df.parse(strValue); - doc.add(new Field(attributeName + ".sort", df.format(date), Field.Store.NO, Field.Index.NO_NORMS, Field.TermVector.NO)); + default: + case TRUE: + doc.add(new Field(attributeName, strValue, fieldStore, fieldIndex, Field.TermVector.NO)); + break; + case FALSE: + df = CachingDateFormat.getDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSS", true); + try + { + date = df.parse(strValue); + doc.add(new Field(attributeName + ".sort", df.format(date), Field.Store.NO, Field.Index.NO_NORMS, Field.TermVector.NO)); + } + catch (ParseException e) + { + // ignore for ordering + } + break; + case BOTH: + doc.add(new Field(attributeName, strValue, fieldStore, fieldIndex, Field.TermVector.NO)); + + df = CachingDateFormat.getDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSS", true); + try + { + date = df.parse(strValue); + doc.add(new Field(attributeName + ".sort", df.format(date), Field.Store.NO, Field.Index.NO_NORMS, Field.TermVector.NO)); + } + catch (ParseException e) + { + // ignore for ordering + } + break; } - catch (ParseException e) - { - // ignore for ordering - } - } else { diff --git a/source/java/org/alfresco/repo/search/impl/lucene/ADMLuceneSearcherImpl.java b/source/java/org/alfresco/repo/search/impl/lucene/ADMLuceneSearcherImpl.java index 06561b89b2..2e800b3283 100644 --- a/source/java/org/alfresco/repo/search/impl/lucene/ADMLuceneSearcherImpl.java +++ b/source/java/org/alfresco/repo/search/impl/lucene/ADMLuceneSearcherImpl.java @@ -26,22 +26,30 @@ package org.alfresco.repo.search.impl.lucene; import java.io.IOException; import java.io.Serializable; +import java.io.StringReader; import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; import java.util.HashMap; import java.util.HashSet; import java.util.LinkedList; import java.util.List; import java.util.ListIterator; +import java.util.Locale; import java.util.Map; import java.util.Set; +import org.alfresco.i18n.I18NUtil; import org.alfresco.repo.search.CannedQueryDef; import org.alfresco.repo.search.EmptyResultSet; +import org.alfresco.repo.search.MLAnalysisMode; import org.alfresco.repo.search.QueryRegisterComponent; import org.alfresco.repo.search.SearcherException; import org.alfresco.repo.search.impl.NodeSearcher; import org.alfresco.repo.search.impl.lucene.QueryParser.Operator; import org.alfresco.repo.search.impl.lucene.analysis.DateTimeAnalyser; +import org.alfresco.repo.search.impl.lucene.analysis.MLTokenDuplicator; +import org.alfresco.repo.search.impl.lucene.analysis.VerbatimAnalyser; import org.alfresco.repo.search.results.SortedResultSet; import org.alfresco.repo.tenant.TenantService; import org.alfresco.service.cmr.dictionary.DataTypeDefinition; @@ -68,6 +76,7 @@ import org.apache.commons.logging.LogFactory; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.Term; import org.apache.lucene.index.TermEnum; +import org.apache.lucene.index.IndexReader.FieldOption; import org.apache.lucene.search.Hits; import org.apache.lucene.search.Query; import org.apache.lucene.search.Searcher; @@ -269,13 +278,77 @@ public class ADMLuceneSearcherImpl extends AbstractLuceneBase implements LuceneS switch (sd.getSortType()) { case FIELD: + Locale sortLocale = null; String field = sd.getField(); if (field.startsWith("@")) { field = expandAttributeFieldName(field); PropertyDefinition propertyDef = getDictionaryService().getProperty(QName.createQName(field.substring(1))); - if (propertyDef.getDataType().getName().equals(DataTypeDefinition.DATETIME)) + if (propertyDef.getDataType().getName().equals(DataTypeDefinition.CONTENT)) + { + throw new SearcherException("Order on content properties is not curently supported"); + } + else if ((propertyDef.getDataType().getName().equals(DataTypeDefinition.MLTEXT)) || (propertyDef.getDataType().getName().equals(DataTypeDefinition.TEXT))) + { + List locales = searchParameters.getLocales(); + if (((locales == null) || (locales.size() == 0))) + { + locales = Collections.singletonList(I18NUtil.getLocale()); + } + + if (locales.size() > 1) + { + throw new SearcherException("Order on text/mltext properties with more than one locale is not curently supported"); + } + + sortLocale = locales.get(0); + // find best field match + + MLAnalysisMode analysisMode = getLuceneConfig().getDefaultMLSearchAnalysisMode(); + HashSet allowableLocales = new HashSet(); + for (Locale l : MLAnalysisMode.getLocales(analysisMode, sortLocale, false)) + { + allowableLocales.add(l.toString()); + } + + String sortField = field; + + for (Object current : searcher.getReader().getFieldNames(FieldOption.INDEXED)) + { + String currentString = (String) current; + if (currentString.startsWith(field) && currentString.endsWith(".sort")) + { + String fieldLocale = currentString.substring(field.length() + 1, currentString.length() - 5); + if (allowableLocales.contains(fieldLocale)) + { + if (fieldLocale.equals(sortLocale.toString())) + { + sortField = currentString; + break; + } + else if (sortLocale.toString().startsWith(fieldLocale)) + { + if (sortField.equals(field) || (currentString.length() < sortField.length())) + { + sortField = currentString; + } + } + else if (fieldLocale.startsWith(sortLocale.toString())) + { + if (sortField.equals(field) || (currentString.length() < sortField.length())) + { + sortField = currentString; + } + } + } + } + } + + field = sortField; + + } + else if (propertyDef.getDataType().getName().equals(DataTypeDefinition.DATETIME)) { DataTypeDefinition dataType = propertyDef.getDataType(); String analyserClassName = dataType.getAnalyserClassName(); @@ -292,7 +365,7 @@ public class ADMLuceneSearcherImpl extends AbstractLuceneBase implements LuceneS } if (fieldHasTerm(searcher.getReader(), field)) { - fields[index++] = new SortField(field, !sd.isAscending()); + fields[index++] = new SortField(field, sortLocale, !sd.isAscending()); } else { @@ -309,7 +382,7 @@ public class ADMLuceneSearcherImpl extends AbstractLuceneBase implements LuceneS } hits = searcher.search(query, new Sort(fields)); - + } else { @@ -318,7 +391,7 @@ public class ADMLuceneSearcherImpl extends AbstractLuceneBase implements LuceneS Path[] paths = searchParameters.getAttributePaths().toArray(new Path[0]); ResultSet rs = new LuceneResultSet(hits, searcher, nodeService, tenantService, paths, searchParameters, getLuceneConfig()); - if(requiresPostSort) + if (requiresPostSort) { ResultSet sorted = new SortedResultSet(rs, nodeService, searchParameters, namespacePrefixResolver); return sorted; @@ -359,7 +432,8 @@ public class ADMLuceneSearcherImpl extends AbstractLuceneBase implements LuceneS return new EmptyResultSet(); } Hits hits = searcher.search(query); - return new LuceneResultSet(hits, searcher, nodeService, tenantService, searchParameters.getAttributePaths().toArray(new Path[0]), searchParameters, getLuceneConfig()); + return new LuceneResultSet(hits, searcher, nodeService, tenantService, searchParameters.getAttributePaths().toArray(new Path[0]), searchParameters, + getLuceneConfig()); } catch (SAXPathException e) { @@ -757,7 +831,7 @@ public class ADMLuceneSearcherImpl extends AbstractLuceneBase implements LuceneS Term term = terms.term(); if (term != null) { - if(!term.field().equals(field)) + if (!term.field().equals(field)) { break; } @@ -765,26 +839,26 @@ public class ADMLuceneSearcherImpl extends AbstractLuceneBase implements LuceneS Pair pair = new Pair(term.text(), Integer.valueOf(freq)); if (answer.size() < count) { - if (answer.size() == 0) - { - answer.add(pair); - } - else if (answer.get(answer.size() - 1).getSecond().compareTo(pair.getSecond()) >= 0) - { - answer.add(pair); - } - else - { - for (ListIterator> it = answer.listIterator(); it.hasNext(); /**/) - { - Pair test = it.next(); - if (test.getSecond().compareTo(pair.getSecond()) < 0) - { - it.previous(); - it.add(pair); - break; - } - } + if (answer.size() == 0) + { + answer.add(pair); + } + else if (answer.get(answer.size() - 1).getSecond().compareTo(pair.getSecond()) >= 0) + { + answer.add(pair); + } + else + { + for (ListIterator> it = answer.listIterator(); it.hasNext(); /**/) + { + Pair test = it.next(); + if (test.getSecond().compareTo(pair.getSecond()) < 0) + { + it.previous(); + it.add(pair); + break; + } + } } } else if (answer.get(count - 1).getSecond().compareTo(pair.getSecond()) < 0) @@ -806,7 +880,8 @@ public class ADMLuceneSearcherImpl extends AbstractLuceneBase implements LuceneS // off the end } } - } while (terms.next()); + } + while (terms.next()); terms.close(); return answer; @@ -832,7 +907,9 @@ public class ADMLuceneSearcherImpl extends AbstractLuceneBase implements LuceneS } - /* (non-Javadoc) + /* + * (non-Javadoc) + * * @see org.alfresco.repo.search.impl.lucene.LuceneSearcher#getClosingIndexSearcher() */ public ClosingIndexSearcher getClosingIndexSearcher() diff --git a/source/java/org/alfresco/repo/search/impl/lucene/ADMLuceneTest.java b/source/java/org/alfresco/repo/search/impl/lucene/ADMLuceneTest.java index 49956a537a..56d37c4a0e 100644 --- a/source/java/org/alfresco/repo/search/impl/lucene/ADMLuceneTest.java +++ b/source/java/org/alfresco/repo/search/impl/lucene/ADMLuceneTest.java @@ -128,6 +128,10 @@ public class ADMLuceneTest extends TestCase QName orderInt = QName.createQName(TEST_NAMESPACE, "orderInt"); + QName orderText = QName.createQName(TEST_NAMESPACE, "orderText"); + + QName orderMLText = QName.createQName(TEST_NAMESPACE, "orderMLText"); + QName aspectWithChildren = QName.createQName(TEST_NAMESPACE, "aspectWithChildren"); TransactionService transactionService; @@ -422,12 +426,12 @@ public class ADMLuceneTest extends TestCase documentOrder = new NodeRef[] { rootNodeRef, n4, n5, n6, n7, n8, n9, n10, n11, n12, n13, n14, n3, n1, n2 }; -// TODO: Why was the cm:auditable aspect added here? -// By adding it, the auditable properties were set automatically. -// nodeService.addAspect(n3, ContentModel.ASPECT_AUDITABLE, null); -// nodeService.addAspect(n1, ContentModel.ASPECT_AUDITABLE, null); - nodeService.setProperty(n1, ContentModel.PROP_MODIFIED, new Date(new Date().getTime() - 1000*60*60)); -// nodeService.addAspect(n2, ContentModel.ASPECT_AUDITABLE, null); + // TODO: Why was the cm:auditable aspect added here? + // By adding it, the auditable properties were set automatically. + // nodeService.addAspect(n3, ContentModel.ASPECT_AUDITABLE, null); + // nodeService.addAspect(n1, ContentModel.ASPECT_AUDITABLE, null); + nodeService.setProperty(n1, ContentModel.PROP_MODIFIED, new Date(new Date().getTime() - 1000 * 60 * 60)); + // nodeService.addAspect(n2, ContentModel.ASPECT_AUDITABLE, null); } private double orderDoubleCount = -0.11d; @@ -440,6 +444,8 @@ public class ADMLuceneTest extends TestCase private int orderIntCount = -45764576; + private int orderTextCount = 0; + /** * @return properties */ @@ -451,11 +457,20 @@ public class ADMLuceneTest extends TestCase testProperties.put(orderFloat, orderFloatCount); testProperties.put(orderLong, orderLongCount); testProperties.put(orderInt, orderIntCount); + testProperties.put(orderText, new String(new char[] { (char) ('a' + orderTextCount) }) + " cabbage"); + + MLText mlText = new MLText(); + mlText.addValue(Locale.ENGLISH, new String(new char[] { (char) ('a' + orderTextCount) }) + " banana"); + mlText.addValue(Locale.FRENCH, new String(new char[] { (char) ('z' - orderTextCount) }) + " banane"); + mlText.addValue(Locale.CHINESE, new String(new char[] { (char) ('香' + orderTextCount) }) + " 香蕉"); + testProperties.put(orderMLText, mlText); + orderDate = Duration.subtract(orderDate, new Duration("P1D")); orderDoubleCount += 0.1d; orderFloatCount += 0.82f; orderLongCount += 299999999999999l; orderIntCount += 8576457; + orderTextCount++; return testProperties; } @@ -691,7 +706,7 @@ public class ADMLuceneTest extends TestCase private void doBulkTest(int n) throws Exception { SessionSizeResourceManager.setDisableInTransaction(); - + Map testProperties = new HashMap(); testProperties.put(QName.createQName(TEST_NAMESPACE, "text-indexed-stored-tokenised-atomic"), "BULK"); for (int i = 0; i < n; i++) @@ -734,7 +749,7 @@ public class ADMLuceneTest extends TestCase private void getCold(ADMLuceneSearcherImpl searcher, int n) { hibernateL1CacheBulkLoader.clear(); - + long start; long end; @@ -759,10 +774,10 @@ public class ADMLuceneTest extends TestCase System.out.println(n + " Cold in " + ((end - start) / 10e9)); } - + private void getWarm(ADMLuceneSearcherImpl searcher, int n) { - + long start; long end; @@ -787,11 +802,11 @@ public class ADMLuceneTest extends TestCase System.out.println(n + " Warm in " + ((end - start) / 10e9)); } - + private void getCold10(ADMLuceneSearcherImpl searcher, int n) { hibernateL1CacheBulkLoader.clear(); - + long start; long end; @@ -816,11 +831,11 @@ public class ADMLuceneTest extends TestCase System.out.println(n + " Prefetch 10 in " + ((end - start) / 10e9)); } - + private void getCold100(ADMLuceneSearcherImpl searcher, int n) { hibernateL1CacheBulkLoader.clear(); - + long start; long end; @@ -845,11 +860,11 @@ public class ADMLuceneTest extends TestCase System.out.println(n + " Prefetch 100 in " + ((end - start) / 10e9)); } - + private void getCold1000(ADMLuceneSearcherImpl searcher, int n) { hibernateL1CacheBulkLoader.clear(); - + long start; long end; @@ -874,11 +889,11 @@ public class ADMLuceneTest extends TestCase System.out.println(n + " Prefetch 1000 in " + ((end - start) / 10e9)); } - + private void getCold10000(ADMLuceneSearcherImpl searcher, int n) { hibernateL1CacheBulkLoader.clear(); - + long start; long end; @@ -2016,7 +2031,7 @@ public class ADMLuceneTest extends TestCase date = currentBun; } results.close(); - + SearchParameters sp_7 = new SearchParameters(); sp_7.addStore(rootNodeRef.getStoreRef()); sp_7.setLanguage(SearchService.LANGUAGE_LUCENE); @@ -2038,7 +2053,7 @@ public class ADMLuceneTest extends TestCase c.set(Calendar.HOUR_OF_DAY, 0); currentBun = c.getTime(); } - if (date != null) + if ((date != null) && (currentBun != null)) { assertTrue(date.compareTo(currentBun) <= 0); } @@ -2245,16 +2260,116 @@ public class ADMLuceneTest extends TestCase } results.close(); - luceneFTS.resume(); + // sort by text SearchParameters sp17 = new SearchParameters(); sp17.addStore(rootNodeRef.getStoreRef()); sp17.setLanguage(SearchService.LANGUAGE_LUCENE); sp17.setQuery("PATH:\"//.\""); - sp17.addSort("cabbage", false); + sp17.addSort("@" + orderText, true); results = searcher.query(sp17); + + String text = null; + for (ResultSetRow row : results) + { + String currentBun = DefaultTypeConverter.INSTANCE.convert(String.class, nodeService.getProperty(row.getNodeRef(), orderText)); + // System.out.println( (currentBun == null ? "null" : NumericEncoder.encode(currentBun))+ " "+currentBun); + if ((text != null) && (currentBun != null)) + { + assertTrue(text.compareTo(currentBun) <= 0); + } + text = currentBun; + } results.close(); - + + SearchParameters sp18 = new SearchParameters(); + sp18.addStore(rootNodeRef.getStoreRef()); + sp18.setLanguage(SearchService.LANGUAGE_LUCENE); + sp18.setQuery("PATH:\"//.\""); + sp18.addSort("@" + orderText, false); + results = searcher.query(sp18); + + text = null; + for (ResultSetRow row : results) + { + String currentBun = DefaultTypeConverter.INSTANCE.convert(String.class, nodeService.getProperty(row.getNodeRef(), orderText)); + // System.out.println(currentBun); + if ((text != null) && (currentBun != null)) + { + assertTrue(text.compareTo(currentBun) >= 0); + } + text = currentBun; + } + results.close(); + + // sort by ML text + + Locale[] testLocales = new Locale[] { I18NUtil.getLocale(), Locale.ENGLISH, Locale.FRENCH, Locale.CHINESE }; + for (Locale testLocale : testLocales) + { + + SearchParameters sp19 = new SearchParameters(); + sp19.addStore(rootNodeRef.getStoreRef()); + sp19.setLanguage(SearchService.LANGUAGE_LUCENE); + sp19.setQuery("PATH:\"//.\""); + sp19.addSort("@" + orderMLText, true); + sp19.addLocale(testLocale); + results = searcher.query(sp19); + + text = null; + for (ResultSetRow row : results) + { + MLText mltext = DefaultTypeConverter.INSTANCE.convert(MLText.class, nodeService.getProperty(row.getNodeRef(), orderMLText)); + if (mltext != null) + { + String currentBun = mltext.getValue(testLocale); + // System.out.println( (currentBun == null ? "null" : NumericEncoder.encode(currentBun))+ " + // "+currentBun); + if ((text != null) && (currentBun != null)) + { + assertTrue(text.compareTo(currentBun) <= 0); + } + text = currentBun; + } + } + results.close(); + + SearchParameters sp20 = new SearchParameters(); + sp20.addStore(rootNodeRef.getStoreRef()); + sp20.setLanguage(SearchService.LANGUAGE_LUCENE); + sp20.setQuery("PATH:\"//.\""); + sp20.addSort("@" + orderMLText, false); + sp20.addLocale(testLocale); + results = searcher.query(sp20); + + text = null; + for (ResultSetRow row : results) + { + MLText mltext = DefaultTypeConverter.INSTANCE.convert(MLText.class, nodeService.getProperty(row.getNodeRef(), orderMLText)); + if (mltext != null) + { + String currentBun = mltext.getValue(testLocale); + if ((text != null) && (currentBun != null)) + { + assertTrue(text.compareTo(currentBun) >= 0); + } + text = currentBun; + } + } + results.close(); + + } + + luceneFTS.resume(); + + SearchParameters spN = new SearchParameters(); + spN.addStore(rootNodeRef.getStoreRef()); + spN.setLanguage(SearchService.LANGUAGE_LUCENE); + spN.setQuery("PATH:\"//.\""); + spN.addSort("cabbage", false); + results = searcher.query(spN); + results.close(); + luceneFTS.resume(); } @@ -4480,13 +4595,13 @@ public class ADMLuceneTest extends TestCase results = searcher.query(sp); assertEquals(1, results.length()); results.close(); - + sp = new SearchParameters(); sp.addStore(rootNodeRef.getStoreRef()); sp.setLanguage("lucene"); sp.setQuery("ISNULL:\"" + QName.createQName(TEST_NAMESPACE, "null").toString() + "\""); results = searcher.query(sp); - //assertEquals(62, results.length()); + // assertEquals(62, results.length()); results.close(); sp = new SearchParameters(); @@ -4496,13 +4611,13 @@ public class ADMLuceneTest extends TestCase results = searcher.query(sp); assertEquals(0, results.length()); results.close(); - + sp = new SearchParameters(); sp.addStore(rootNodeRef.getStoreRef()); sp.setLanguage("lucene"); sp.setQuery("ISNULL:\"" + QName.createQName(TEST_NAMESPACE, "path-ista").toString() + "\""); results = searcher.query(sp); - //assertEquals(61, results.length()); + // assertEquals(61, results.length()); results.close(); sp = new SearchParameters(); @@ -4534,7 +4649,7 @@ public class ADMLuceneTest extends TestCase sp.setLanguage("lucene"); sp.setQuery("ISNULL:\"" + QName.createQName(TEST_NAMESPACE, "aspectProperty").toString() + "\""); results = searcher.query(sp); - //assertEquals(62, results.length()); + // assertEquals(62, results.length()); results.close(); sp = new SearchParameters(); diff --git a/source/java/org/alfresco/repo/search/impl/lucene/LuceneTest_model.xml b/source/java/org/alfresco/repo/search/impl/lucene/LuceneTest_model.xml index 2ff0340c1d..fe377b1e5f 100644 --- a/source/java/org/alfresco/repo/search/impl/lucene/LuceneTest_model.xml +++ b/source/java/org/alfresco/repo/search/impl/lucene/LuceneTest_model.xml @@ -68,6 +68,26 @@ true true + + + d:text + true + false + + true + true + both + + + + d:mltext + true + false + + true + true + both +