From 01deeef511287118e56ba38d83e5bd374b6ec8bd Mon Sep 17 00:00:00 2001 From: Andrew Hind Date: Wed, 17 Dec 2008 12:29:54 +0000 Subject: [PATCH] Build fix 1) for tokenisation = false 2) so d:datetime properties index as expected git-svn-id: https://svn.alfresco.com/repos/alfresco-enterprise/alfresco/HEAD/root@12444 c4b6b30b-aa2e-2d43-bbcb-ca4b014f7261 --- config/alfresco/model/calendarModel.xml | 20 ++ config/alfresco/model/contentModel.xml | 45 ++++ config/alfresco/model/systemModel.xml | 5 + config/alfresco/model/wcmAppModel.xml | 20 ++ config/alfresco/workflow/wcmWorkflowModel.xml | 5 + .../impl/lucene/ADMLuceneIndexerImpl.java | 10 +- .../impl/lucene/AVMLuceneIndexerImpl.java | 218 ++++++++++++++++-- .../security/authentication/userModel.xml | 10 + .../alfresco/repo/version/version2_model.xml | 15 ++ 9 files changed, 322 insertions(+), 26 deletions(-) diff --git a/config/alfresco/model/calendarModel.xml b/config/alfresco/model/calendarModel.xml index fd19a88046..0c96e19a2e 100644 --- a/config/alfresco/model/calendarModel.xml +++ b/config/alfresco/model/calendarModel.xml @@ -29,9 +29,19 @@ d:datetime + + true + false + both + d:datetime + + true + false + both + d:text @@ -54,9 +64,19 @@ d:datetime + + true + false + both + d:datetime + + true + false + both + d:text diff --git a/config/alfresco/model/contentModel.xml b/config/alfresco/model/contentModel.xml index 099bbc3f9c..6cabfc954b 100644 --- a/config/alfresco/model/contentModel.xml +++ b/config/alfresco/model/contentModel.xml @@ -404,6 +404,11 @@ d:datetime true true + + true + false + both + Creator @@ -416,6 +421,11 @@ d:datetime true true + + true + false + both + Modifier @@ -427,6 +437,11 @@ Accessed d:datetime true + + true + false + both + @@ -490,6 +505,11 @@ Remove After d:datetime + + true + false + both + @@ -650,10 +670,20 @@ Effective From d:datetime + + true + false + both + Effective To d:datetime + + true + false + both + @@ -862,6 +892,11 @@ Sent Date d:datetime + + true + false + both + @@ -949,10 +984,20 @@ Published d:datetime + + true + false + both + Updated d:datetime + + true + false + both + diff --git a/config/alfresco/model/systemModel.xml b/config/alfresco/model/systemModel.xml index 5ecfb884c1..aba1b51b9b 100644 --- a/config/alfresco/model/systemModel.xml +++ b/config/alfresco/model/systemModel.xml @@ -160,6 +160,11 @@ d:datetime true + + true + false + both + d:text diff --git a/config/alfresco/model/wcmAppModel.xml b/config/alfresco/model/wcmAppModel.xml index 9909aa1715..6f26c7e8aa 100644 --- a/config/alfresco/model/wcmAppModel.xml +++ b/config/alfresco/model/wcmAppModel.xml @@ -396,6 +396,11 @@ Time Of Deploy Attempt d:datetime true + + true + false + both + @@ -431,11 +436,21 @@ Time Deploy Started d:datetime true + + true + false + both + Time Deploy Finished d:datetime true + + true + false + both + Deployment Successful @@ -624,6 +639,11 @@ Expiration Date d:datetime true + + true + false + both + diff --git a/config/alfresco/workflow/wcmWorkflowModel.xml b/config/alfresco/workflow/wcmWorkflowModel.xml index 11628e98fe..402b293116 100644 --- a/config/alfresco/workflow/wcmWorkflowModel.xml +++ b/config/alfresco/workflow/wcmWorkflowModel.xml @@ -238,6 +238,11 @@ Launch Date Date the content in the submission should be committed d:datetime + + true + false + both + Validate Links diff --git a/source/java/org/alfresco/repo/search/impl/lucene/ADMLuceneIndexerImpl.java b/source/java/org/alfresco/repo/search/impl/lucene/ADMLuceneIndexerImpl.java index 50a276a874..7a813cb9a1 100644 --- a/source/java/org/alfresco/repo/search/impl/lucene/ADMLuceneIndexerImpl.java +++ b/source/java/org/alfresco/repo/search/impl/lucene/ADMLuceneIndexerImpl.java @@ -1014,6 +1014,9 @@ public class ADMLuceneIndexerImpl extends AbstractLuceneIndexerImpl imp { doc.add(new Field(attributeName + "." + localeText + ".sort", t.termText(), Field.Store.NO, Field.Index.NO_NORMS, Field.TermVector.NO)); } + + doc.add(new Field(attributeName, t.termText(), Field.Store.NO, Field.Index.NO_NORMS, Field.TermVector.NO)); + } } catch (IOException e) @@ -1083,6 +1086,7 @@ public class ADMLuceneIndexerImpl extends AbstractLuceneIndexerImpl imp { locale = I18NUtil.getLocale(); } + StringBuilder builder; MLAnalysisMode analysisMode; VerbatimAnalyser vba; @@ -1118,6 +1122,8 @@ public class ADMLuceneIndexerImpl extends AbstractLuceneIndexerImpl imp { doc.add(new Field(attributeName + "." + localeText + ".sort", t.termText(), Field.Store.NO, Field.Index.NO_NORMS, Field.TermVector.NO)); } + + doc.add(new Field(attributeName, t.termText(), Field.Store.NO, Field.Index.NO_NORMS, Field.TermVector.NO)); } } catch (IOException e) @@ -1176,7 +1182,7 @@ public class ADMLuceneIndexerImpl extends AbstractLuceneIndexerImpl imp try { date = df.parse(strValue); - doc.add(new Field(attributeName + ".sort", df.format(date), Field.Store.NO, Field.Index.NO_NORMS, Field.TermVector.NO)); + doc.add(new Field(attributeName, df.format(date), Field.Store.NO, Field.Index.NO_NORMS, Field.TermVector.NO)); } catch (ParseException e) { @@ -1185,7 +1191,7 @@ public class ADMLuceneIndexerImpl extends AbstractLuceneIndexerImpl imp break; case BOTH: doc.add(new Field(attributeName, strValue, fieldStore, fieldIndex, Field.TermVector.NO)); - + df = CachingDateFormat.getDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSS", true); try { diff --git a/source/java/org/alfresco/repo/search/impl/lucene/AVMLuceneIndexerImpl.java b/source/java/org/alfresco/repo/search/impl/lucene/AVMLuceneIndexerImpl.java index f0bed4513a..4dddf1fa98 100644 --- a/source/java/org/alfresco/repo/search/impl/lucene/AVMLuceneIndexerImpl.java +++ b/source/java/org/alfresco/repo/search/impl/lucene/AVMLuceneIndexerImpl.java @@ -54,7 +54,10 @@ import org.alfresco.repo.content.transform.ContentTransformer; import org.alfresco.repo.dictionary.IndexTokenisationMode; import org.alfresco.repo.domain.PropertyValue; import org.alfresco.repo.search.IndexMode; +import org.alfresco.repo.search.MLAnalysisMode; import org.alfresco.repo.search.impl.lucene.analysis.DateTimeAnalyser; +import org.alfresco.repo.search.impl.lucene.analysis.MLTokenDuplicator; +import org.alfresco.repo.search.impl.lucene.analysis.VerbatimAnalyser; import org.alfresco.repo.search.impl.lucene.fts.FTSIndexerAware; import org.alfresco.repo.search.impl.lucene.fts.FullTextSearchIndexer; import org.alfresco.repo.transaction.AlfrescoTransactionSupport; @@ -85,6 +88,7 @@ import org.alfresco.util.ISO9075; import org.alfresco.util.Pair; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; +import org.apache.lucene.analysis.Token; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.index.IndexReader; @@ -739,6 +743,8 @@ public class AVMLuceneIndexerImpl extends AbstractLuceneIndexerImpl impl if (isContent) { + // Content is always tokenised + ContentData contentData = DefaultTypeConverter.INSTANCE.convert(ContentData.class, serializableValue); if (!index || contentData.getMimetype() == null) { @@ -815,6 +821,12 @@ public class AVMLuceneIndexerImpl extends AbstractLuceneIndexerImpl impl transformer.transform(reader, writer); // point the reader to the new-written content reader = writer.getReader(); + // Check that the reader is a view onto something concrete + if (!reader.exists()) + { + throw new ContentIOException("The transformation did not write any content, yet: \n" + + " transformer: " + transformer + "\n" + " temp writer: " + writer); + } } catch (ContentIOException e) { @@ -899,9 +911,88 @@ public class AVMLuceneIndexerImpl extends AbstractLuceneIndexerImpl impl for (Locale locale : mlText.getLocales()) { String localeString = mlText.getValue(locale); - StringBuilder builder = new StringBuilder(); - builder.append("\u0000").append(locale.toString()).append("\u0000").append(localeString); - doc.add(new Field(attributeName, builder.toString(), fieldStore, fieldIndex, Field.TermVector.NO)); + StringBuilder builder; + MLAnalysisMode analysisMode; + VerbatimAnalyser vba; + MLTokenDuplicator duplicator; + Token t; + switch (tokenise) + { + case TRUE: + builder = new StringBuilder(); + builder.append("\u0000").append(locale.toString()).append("\u0000").append(localeString); + doc.add(new Field(attributeName, builder.toString(), fieldStore, fieldIndex, Field.TermVector.NO)); + break; + case FALSE: + // analyse ml text + analysisMode = getLuceneConfig().getDefaultMLIndexAnalysisMode(); + // Do the analysis here + vba = new VerbatimAnalyser(false); + duplicator = new MLTokenDuplicator(vba.tokenStream(attributeName, new StringReader(localeString)), locale, null, analysisMode); + try + { + while ((t = duplicator.next()) != null) + { + String localeText = ""; + if (t.termText().indexOf('{') == 0) + { + int end = t.termText().indexOf('}', 1); + if (end != -1) + { + localeText = t.termText().substring(1, end); + } + } + if (localeText.length() > 0) + { + doc.add(new Field(attributeName, t.termText(), Field.Store.NO, Field.Index.NO_NORMS, Field.TermVector.NO)); + } + + doc.add(new Field(attributeName, t.termText(), Field.Store.NO, Field.Index.NO_NORMS, Field.TermVector.NO)); + + } + } + catch (IOException e) + { + // TODO ?? + } + + break; + case BOTH: + builder = new StringBuilder(); + builder.append("\u0000").append(locale.toString()).append("\u0000").append(localeString); + doc.add(new Field(attributeName, builder.toString(), fieldStore, fieldIndex, Field.TermVector.NO)); + + // analyse ml text + analysisMode = getLuceneConfig().getDefaultMLIndexAnalysisMode(); + // Do the analysis here + vba = new VerbatimAnalyser(false); + duplicator = new MLTokenDuplicator(vba.tokenStream(attributeName, new StringReader(localeString)), locale, null, analysisMode); + try + { + while ((t = duplicator.next()) != null) + { + String localeText = ""; + if (t.termText().indexOf('{') == 0) + { + int end = t.termText().indexOf('}', 1); + if (end != -1) + { + localeText = t.termText().substring(1, end); + } + } + if (localeText.length() > 0) + { + doc.add(new Field(attributeName + "." + localeText + ".sort", t.termText(), Field.Store.NO, Field.Index.NO_NORMS, Field.TermVector.NO)); + } + } + } + catch (IOException e) + { + // TODO ?? + } + + break; + } } } else if (isText) @@ -929,42 +1020,121 @@ public class AVMLuceneIndexerImpl extends AbstractLuceneIndexerImpl impl } StringBuilder builder; - switch(tokenise) + MLAnalysisMode analysisMode; + VerbatimAnalyser vba; + MLTokenDuplicator duplicator; + Token t; + switch (tokenise) { default: case TRUE: - builder = new StringBuilder(); + builder = new StringBuilder(); builder.append("\u0000").append(locale.toString()).append("\u0000").append(strValue); doc.add(new Field(attributeName, builder.toString(), fieldStore, fieldIndex, Field.TermVector.NO)); - break; + break; case FALSE: - doc.add(new Field(attributeName, strValue, fieldStore, fieldIndex, Field.TermVector.NO)); - break; + analysisMode = getLuceneConfig().getDefaultMLIndexAnalysisMode(); + // Do the analysis here + vba = new VerbatimAnalyser(false); + duplicator = new MLTokenDuplicator(vba.tokenStream(attributeName, new StringReader(strValue)), locale, null, analysisMode); + try + { + while ((t = duplicator.next()) != null) + { + String localeText = ""; + if (t.termText().indexOf('{') == 0) + { + int end = t.termText().indexOf('}', 1); + if (end != -1) + { + localeText = t.termText().substring(1, end); + } + } + if (localeText.length() > 0) + { + doc.add(new Field(attributeName, t.termText(), Field.Store.NO, Field.Index.NO_NORMS, Field.TermVector.NO)); + } + + doc.add(new Field(attributeName, t.termText(), Field.Store.NO, Field.Index.NO_NORMS, Field.TermVector.NO)); + } + } + catch (IOException e) + { + // TODO ?? + } + + break; case BOTH: - builder = new StringBuilder(); + builder = new StringBuilder(); builder.append("\u0000").append(locale.toString()).append("\u0000").append(strValue); doc.add(new Field(attributeName, builder.toString(), fieldStore, fieldIndex, Field.TermVector.NO)); - - doc.add(new Field(attributeName, strValue, fieldStore, fieldIndex, Field.TermVector.NO)); - break; + + analysisMode = getLuceneConfig().getDefaultMLIndexAnalysisMode(); + // Do the analysis here + vba = new VerbatimAnalyser(false); + duplicator = new MLTokenDuplicator(vba.tokenStream(attributeName, new StringReader(strValue)), locale, null, analysisMode); + try + { + while ((t = duplicator.next()) != null) + { + String localeText = ""; + if (t.termText().indexOf('{') == 0) + { + int end = t.termText().indexOf('}', 1); + if (end != -1) + { + localeText = t.termText().substring(1, end); + } + } + if (localeText.length() > 0) + { + doc.add(new Field(attributeName + "." + localeText + ".sort", t.termText(), Field.Store.NO, Field.Index.NO_NORMS, Field.TermVector.NO)); + } + } + } + catch (IOException e) + { + // TODO ?? + } + break; } } else if (isDateTime) { - - doc.add(new Field(attributeName, strValue, fieldStore, fieldIndex, Field.TermVector.NO)); - - SimpleDateFormat df = CachingDateFormat.getDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSS", true); - + SimpleDateFormat df; Date date; - try + switch (tokenise) { - date = df.parse(strValue); - doc.add(new Field(attributeName + ".sort", df.format(date), Field.Store.NO, Field.Index.NO_NORMS, Field.TermVector.NO)); - } - catch (ParseException e) - { - // ignore for ordering + default: + case TRUE: + doc.add(new Field(attributeName, strValue, fieldStore, fieldIndex, Field.TermVector.NO)); + break; + case FALSE: + df = CachingDateFormat.getDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSS", true); + try + { + date = df.parse(strValue); + doc.add(new Field(attributeName, df.format(date), Field.Store.NO, Field.Index.NO_NORMS, Field.TermVector.NO)); + } + catch (ParseException e) + { + // ignore for ordering + } + break; + case BOTH: + doc.add(new Field(attributeName, strValue, fieldStore, fieldIndex, Field.TermVector.NO)); + + df = CachingDateFormat.getDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSS", true); + try + { + date = df.parse(strValue); + doc.add(new Field(attributeName + ".sort", df.format(date), Field.Store.NO, Field.Index.NO_NORMS, Field.TermVector.NO)); + } + catch (ParseException e) + { + // ignore for ordering + } + break; } } else diff --git a/source/java/org/alfresco/repo/security/authentication/userModel.xml b/source/java/org/alfresco/repo/security/authentication/userModel.xml index 18e981c3fa..53b765ad7f 100644 --- a/source/java/org/alfresco/repo/security/authentication/userModel.xml +++ b/source/java/org/alfresco/repo/security/authentication/userModel.xml @@ -49,12 +49,22 @@ d:datetime + + true + false + both + d:boolean d:datetime + + true + false + both + d:boolean diff --git a/source/java/org/alfresco/repo/version/version2_model.xml b/source/java/org/alfresco/repo/version/version2_model.xml index e5136147f9..b5aebf5fb4 100644 --- a/source/java/org/alfresco/repo/version/version2_model.xml +++ b/source/java/org/alfresco/repo/version/version2_model.xml @@ -109,6 +109,11 @@ Created d:datetime true + + true + false + both + Creator @@ -119,6 +124,11 @@ Modified d:datetime true + + true + false + both + Modifier @@ -129,6 +139,11 @@ Accessed d:datetime true + + true + false + both +