mirror of
https://github.com/Alfresco/alfresco-community-repo.git
synced 2025-08-07 17:49:17 +00:00
ALF-9413: RSOLR 022: Fine-grained control of full-text indexing
- final part - supported in SOLR, added aspect support to explorer and share git-svn-id: https://svn.alfresco.com/repos/alfresco-enterprise/alfresco/HEAD/root@29192 c4b6b30b-aa2e-2d43-bbcb-ca4b014f7261
This commit is contained in:
@@ -327,3 +327,9 @@ cm_contentmodel.property.exif_yResolution.description=Vertical resolution in pix
|
|||||||
cm_contentmodel.property.exif_resolutionUnit.title=Resolution Unit
|
cm_contentmodel.property.exif_resolutionUnit.title=Resolution Unit
|
||||||
cm_contentmodel.property.exif_resolutionUnit.description=Unit used for horizontal and vertical resolution
|
cm_contentmodel.property.exif_resolutionUnit.description=Unit used for horizontal and vertical resolution
|
||||||
|
|
||||||
|
cm_contentmodel.aspect.cm_indexControl.title=Index Control
|
||||||
|
cm_contentmodel.aspect.cm_indexControl.description=Control Index Behaviour
|
||||||
|
cm_contentmodel.property.cm_isIndexed.title=Is Indexed
|
||||||
|
cm_contentmodel.property.cm_isIndexed.description=Is the node indexed and can be found via search.
|
||||||
|
cm_contentmodel.property.cm_isContentIndexed.title=Is Content Indexed
|
||||||
|
cm_contentmodel.property.cm_isContentIndexed.description=Are the node's d:content properties indexed?
|
||||||
|
@@ -1419,10 +1419,12 @@
|
|||||||
<property name="cm:isIndexed">
|
<property name="cm:isIndexed">
|
||||||
<title>Is indexed</title>
|
<title>Is indexed</title>
|
||||||
<type>d:boolean</type>
|
<type>d:boolean</type>
|
||||||
|
<default>true</default>
|
||||||
</property>
|
</property>
|
||||||
<property name="cm:isContentIndexed">
|
<property name="cm:isContentIndexed">
|
||||||
<title>Is content indexed</title>
|
<title>Is content indexed</title>
|
||||||
<type>d:boolean</type>
|
<type>d:boolean</type>
|
||||||
|
<default>true</default>
|
||||||
</property>
|
</property>
|
||||||
</properties>
|
</properties>
|
||||||
</aspect>
|
</aspect>
|
||||||
|
@@ -476,16 +476,44 @@ public class AVMLuceneIndexerImpl extends AbstractLuceneIndexerImpl<String> impl
|
|||||||
boolean isAtomic = true;
|
boolean isAtomic = true;
|
||||||
|
|
||||||
Map<QName, Serializable> properties = getIndexableProperties(desc, nodeRef, endVersion, stringNodeRef);
|
Map<QName, Serializable> properties = getIndexableProperties(desc, nodeRef, endVersion, stringNodeRef);
|
||||||
|
|
||||||
|
if(properties.containsKey(ContentModel.PROP_IS_INDEXED))
|
||||||
|
{
|
||||||
|
Serializable sValue = properties.get(ContentModel.PROP_IS_INDEXED);
|
||||||
|
if(sValue != null)
|
||||||
|
{
|
||||||
|
Boolean isIndexed = DefaultTypeConverter.INSTANCE.convert(Boolean.class, sValue);
|
||||||
|
if((isIndexed != null) && (isIndexed.booleanValue() == false))
|
||||||
|
{
|
||||||
|
return docs;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
boolean isContentIndexedForNode = true;
|
||||||
|
if(properties.containsKey(ContentModel.PROP_IS_CONTENT_INDEXED))
|
||||||
|
{
|
||||||
|
Serializable sValue = properties.get(ContentModel.PROP_IS_CONTENT_INDEXED);
|
||||||
|
if(sValue != null)
|
||||||
|
{
|
||||||
|
Boolean isIndexed = DefaultTypeConverter.INSTANCE.convert(Boolean.class, sValue);
|
||||||
|
if((isIndexed != null) && (isIndexed.booleanValue() == false))
|
||||||
|
{
|
||||||
|
isContentIndexedForNode = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
for (QName propertyName : properties.keySet())
|
for (QName propertyName : properties.keySet())
|
||||||
{
|
{
|
||||||
Serializable value = properties.get(propertyName);
|
Serializable value = properties.get(propertyName);
|
||||||
if (indexAllProperties)
|
if (indexAllProperties)
|
||||||
{
|
{
|
||||||
indexProperty(nodeRef, propertyName, value, xdoc, false, properties);
|
indexProperty(nodeRef, propertyName, value, xdoc, false, properties, isContentIndexedForNode);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
isAtomic &= indexProperty(nodeRef, propertyName, value, xdoc, true, properties);
|
isAtomic &= indexProperty(nodeRef, propertyName, value, xdoc, true, properties,isContentIndexedForNode);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -722,7 +750,7 @@ public class AVMLuceneIndexerImpl extends AbstractLuceneIndexerImpl<String> impl
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
protected boolean indexProperty(NodeRef banana, QName propertyName, Serializable value, Document doc, boolean indexAtomicPropertiesOnly, Map<QName, Serializable> properties)
|
protected boolean indexProperty(NodeRef banana, QName propertyName, Serializable value, Document doc, boolean indexAtomicPropertiesOnly, Map<QName, Serializable> properties, boolean isContentIndexedForNode)
|
||||||
{
|
{
|
||||||
String attributeName = "@" + QName.createQName(propertyName.getNamespaceURI(), ISO9075.encode(propertyName.getLocalName()));
|
String attributeName = "@" + QName.createQName(propertyName.getNamespaceURI(), ISO9075.encode(propertyName.getLocalName()));
|
||||||
|
|
||||||
@@ -811,119 +839,127 @@ public class AVMLuceneIndexerImpl extends AbstractLuceneIndexerImpl<String> impl
|
|||||||
}
|
}
|
||||||
doc.add(new Field(attributeName + ".locale", locale.toString().toLowerCase(), Field.Store.NO, Field.Index.UN_TOKENIZED, Field.TermVector.NO));
|
doc.add(new Field(attributeName + ".locale", locale.toString().toLowerCase(), Field.Store.NO, Field.Index.UN_TOKENIZED, Field.TermVector.NO));
|
||||||
|
|
||||||
ContentReader reader = null;
|
|
||||||
try
|
|
||||||
{
|
|
||||||
reader = contentService.getRawReader(contentData.getContentUrl());
|
|
||||||
reader.setEncoding(contentData.getEncoding());
|
|
||||||
reader.setLocale(contentData.getLocale());
|
|
||||||
reader.setMimetype(contentData.getMimetype());
|
|
||||||
}
|
|
||||||
catch (Exception e)
|
|
||||||
{
|
|
||||||
reader = null;
|
|
||||||
}
|
|
||||||
// ContentReader reader = contentService.getReader(banana, propertyName);
|
|
||||||
if (reader != null && reader.exists())
|
|
||||||
{
|
|
||||||
boolean readerReady = true;
|
|
||||||
// transform if necessary (it is not a UTF-8 text document)
|
|
||||||
if (!EqualsHelper.nullSafeEquals(reader.getMimetype(), MimetypeMap.MIMETYPE_TEXT_PLAIN) || !EqualsHelper.nullSafeEquals(reader.getEncoding(), "UTF-8"))
|
|
||||||
{
|
|
||||||
// get the transformer
|
|
||||||
ContentTransformer transformer = contentService.getTransformer(reader.getMimetype(), MimetypeMap.MIMETYPE_TEXT_PLAIN);
|
|
||||||
// is this transformer good enough?
|
|
||||||
if (transformer == null)
|
|
||||||
{
|
|
||||||
// log it
|
|
||||||
if (s_logger.isDebugEnabled())
|
|
||||||
{
|
|
||||||
s_logger.debug("Not indexed: No transformation: \n" + " source: " + reader + "\n" + " target: " + MimetypeMap.MIMETYPE_TEXT_PLAIN);
|
|
||||||
}
|
|
||||||
// don't index from the reader
|
|
||||||
readerReady = false;
|
|
||||||
// not indexed: no transformation
|
|
||||||
// doc.add(new Field("TEXT", NOT_INDEXED_NO_TRANSFORMATION, Field.Store.NO,
|
|
||||||
// Field.Index.TOKENIZED, Field.TermVector.NO));
|
|
||||||
doc.add(new Field(attributeName, NOT_INDEXED_NO_TRANSFORMATION, Field.Store.NO, Field.Index.TOKENIZED, Field.TermVector.NO));
|
|
||||||
}
|
|
||||||
// else if (indexAtomicPropertiesOnly
|
|
||||||
// && transformer.getTransformationTime() > maxAtomicTransformationTime)
|
|
||||||
// {
|
|
||||||
// only indexing atomic properties
|
|
||||||
// indexing will take too long, so push it to the background
|
|
||||||
// wereAllAtomic = false;
|
|
||||||
// }
|
|
||||||
else
|
|
||||||
{
|
|
||||||
// We have a transformer that is fast enough
|
|
||||||
ContentWriter writer = contentService.getTempWriter();
|
|
||||||
writer.setMimetype(MimetypeMap.MIMETYPE_TEXT_PLAIN);
|
|
||||||
// this is what the analyzers expect on the stream
|
|
||||||
writer.setEncoding("UTF-8");
|
|
||||||
try
|
|
||||||
{
|
|
||||||
|
|
||||||
transformer.transform(reader, writer);
|
if(isContentIndexedForNode)
|
||||||
// point the reader to the new-written content
|
{
|
||||||
reader = writer.getReader();
|
ContentReader reader = null;
|
||||||
// Check that the reader is a view onto something concrete
|
try
|
||||||
if (!reader.exists())
|
{
|
||||||
{
|
reader = contentService.getRawReader(contentData.getContentUrl());
|
||||||
throw new ContentIOException("The transformation did not write any content, yet: \n"
|
reader.setEncoding(contentData.getEncoding());
|
||||||
+ " transformer: " + transformer + "\n" + " temp writer: " + writer);
|
reader.setLocale(contentData.getLocale());
|
||||||
}
|
reader.setMimetype(contentData.getMimetype());
|
||||||
}
|
}
|
||||||
catch (ContentIOException e)
|
catch (Exception e)
|
||||||
|
{
|
||||||
|
reader = null;
|
||||||
|
}
|
||||||
|
// ContentReader reader = contentService.getReader(banana, propertyName);
|
||||||
|
if (reader != null && reader.exists())
|
||||||
|
{
|
||||||
|
boolean readerReady = true;
|
||||||
|
// transform if necessary (it is not a UTF-8 text document)
|
||||||
|
if (!EqualsHelper.nullSafeEquals(reader.getMimetype(), MimetypeMap.MIMETYPE_TEXT_PLAIN) || !EqualsHelper.nullSafeEquals(reader.getEncoding(), "UTF-8"))
|
||||||
|
{
|
||||||
|
// get the transformer
|
||||||
|
ContentTransformer transformer = contentService.getTransformer(reader.getMimetype(), MimetypeMap.MIMETYPE_TEXT_PLAIN);
|
||||||
|
// is this transformer good enough?
|
||||||
|
if (transformer == null)
|
||||||
{
|
{
|
||||||
// log it
|
// log it
|
||||||
if (s_logger.isDebugEnabled())
|
if (s_logger.isDebugEnabled())
|
||||||
{
|
{
|
||||||
s_logger.debug("Not indexed: Transformation failed", e);
|
s_logger.debug("Not indexed: No transformation: \n" + " source: " + reader + "\n" + " target: " + MimetypeMap.MIMETYPE_TEXT_PLAIN);
|
||||||
}
|
}
|
||||||
// don't index from the reader
|
// don't index from the reader
|
||||||
readerReady = false;
|
readerReady = false;
|
||||||
// not indexed: transformation
|
// not indexed: no transformation
|
||||||
// failed
|
// doc.add(new Field("TEXT", NOT_INDEXED_NO_TRANSFORMATION, Field.Store.NO,
|
||||||
// doc.add(new Field("TEXT", NOT_INDEXED_TRANSFORMATION_FAILED, Field.Store.NO,
|
|
||||||
// Field.Index.TOKENIZED, Field.TermVector.NO));
|
// Field.Index.TOKENIZED, Field.TermVector.NO));
|
||||||
doc.add(new Field(attributeName, NOT_INDEXED_TRANSFORMATION_FAILED, Field.Store.NO, Field.Index.TOKENIZED, Field.TermVector.NO));
|
doc.add(new Field(attributeName, NOT_INDEXED_NO_TRANSFORMATION, Field.Store.NO, Field.Index.TOKENIZED, Field.TermVector.NO));
|
||||||
|
}
|
||||||
|
// else if (indexAtomicPropertiesOnly
|
||||||
|
// && transformer.getTransformationTime() > maxAtomicTransformationTime)
|
||||||
|
// {
|
||||||
|
// only indexing atomic properties
|
||||||
|
// indexing will take too long, so push it to the background
|
||||||
|
// wereAllAtomic = false;
|
||||||
|
// }
|
||||||
|
else
|
||||||
|
{
|
||||||
|
// We have a transformer that is fast enough
|
||||||
|
ContentWriter writer = contentService.getTempWriter();
|
||||||
|
writer.setMimetype(MimetypeMap.MIMETYPE_TEXT_PLAIN);
|
||||||
|
// this is what the analyzers expect on the stream
|
||||||
|
writer.setEncoding("UTF-8");
|
||||||
|
try
|
||||||
|
{
|
||||||
|
|
||||||
|
transformer.transform(reader, writer);
|
||||||
|
// point the reader to the new-written content
|
||||||
|
reader = writer.getReader();
|
||||||
|
// Check that the reader is a view onto something concrete
|
||||||
|
if (!reader.exists())
|
||||||
|
{
|
||||||
|
throw new ContentIOException("The transformation did not write any content, yet: \n"
|
||||||
|
+ " transformer: " + transformer + "\n" + " temp writer: " + writer);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
catch (ContentIOException e)
|
||||||
|
{
|
||||||
|
// log it
|
||||||
|
if (s_logger.isDebugEnabled())
|
||||||
|
{
|
||||||
|
s_logger.debug("Not indexed: Transformation failed", e);
|
||||||
|
}
|
||||||
|
// don't index from the reader
|
||||||
|
readerReady = false;
|
||||||
|
// not indexed: transformation
|
||||||
|
// failed
|
||||||
|
// doc.add(new Field("TEXT", NOT_INDEXED_TRANSFORMATION_FAILED, Field.Store.NO,
|
||||||
|
// Field.Index.TOKENIZED, Field.TermVector.NO));
|
||||||
|
doc.add(new Field(attributeName, NOT_INDEXED_TRANSFORMATION_FAILED, Field.Store.NO, Field.Index.TOKENIZED, Field.TermVector.NO));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
// add the text field using the stream from the
|
||||||
|
// reader, but only if the reader is valid
|
||||||
|
if (readerReady)
|
||||||
|
{
|
||||||
|
InputStreamReader isr = null;
|
||||||
|
InputStream ris = reader.getReader().getContentInputStream();
|
||||||
|
try
|
||||||
|
{
|
||||||
|
isr = new InputStreamReader(ris, "UTF-8");
|
||||||
|
}
|
||||||
|
catch (UnsupportedEncodingException e)
|
||||||
|
{
|
||||||
|
isr = new InputStreamReader(ris);
|
||||||
|
}
|
||||||
|
StringBuilder builder = new StringBuilder();
|
||||||
|
builder.append("\u0000").append(locale.toString()).append("\u0000");
|
||||||
|
StringReader prefix = new StringReader(builder.toString());
|
||||||
|
Reader multiReader = new MultiReader(prefix, isr);
|
||||||
|
doc.add(new Field(attributeName, multiReader, Field.TermVector.NO));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
// add the text field using the stream from the
|
else
|
||||||
// reader, but only if the reader is valid
|
// URL not present (null reader) or no content at the URL (file missing)
|
||||||
if (readerReady)
|
|
||||||
{
|
{
|
||||||
InputStreamReader isr = null;
|
// log it
|
||||||
InputStream ris = reader.getReader().getContentInputStream();
|
if (s_logger.isDebugEnabled())
|
||||||
try
|
|
||||||
{
|
{
|
||||||
isr = new InputStreamReader(ris, "UTF-8");
|
s_logger.debug("Not indexed: Content Missing \n"
|
||||||
|
+ " node: " + banana + "\n" + " reader: " + reader + "\n" + " content exists: "
|
||||||
|
+ (reader == null ? " --- " : Boolean.toString(reader.exists())));
|
||||||
}
|
}
|
||||||
catch (UnsupportedEncodingException e)
|
// not indexed: content missing
|
||||||
{
|
doc.add(new Field(attributeName, NOT_INDEXED_CONTENT_MISSING, Field.Store.NO, Field.Index.TOKENIZED, Field.TermVector.NO));
|
||||||
isr = new InputStreamReader(ris);
|
|
||||||
}
|
|
||||||
StringBuilder builder = new StringBuilder();
|
|
||||||
builder.append("\u0000").append(locale.toString()).append("\u0000");
|
|
||||||
StringReader prefix = new StringReader(builder.toString());
|
|
||||||
Reader multiReader = new MultiReader(prefix, isr);
|
|
||||||
doc.add(new Field(attributeName, multiReader, Field.TermVector.NO));
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
// URL not present (null reader) or no content at the URL (file missing)
|
|
||||||
{
|
{
|
||||||
// log it
|
return true;
|
||||||
if (s_logger.isDebugEnabled())
|
|
||||||
{
|
|
||||||
s_logger.debug("Not indexed: Content Missing \n"
|
|
||||||
+ " node: " + banana + "\n" + " reader: " + reader + "\n" + " content exists: "
|
|
||||||
+ (reader == null ? " --- " : Boolean.toString(reader.exists())));
|
|
||||||
}
|
|
||||||
// not indexed: content missing
|
|
||||||
doc.add(new Field(attributeName, NOT_INDEXED_CONTENT_MISSING, Field.Store.NO, Field.Index.TOKENIZED, Field.TermVector.NO));
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
@@ -1871,7 +1907,7 @@ public class AVMLuceneIndexerImpl extends AbstractLuceneIndexerImpl<String> impl
|
|||||||
{
|
{
|
||||||
indexedDocCount++;
|
indexedDocCount++;
|
||||||
}
|
}
|
||||||
|
|
||||||
public int getLastIndexedSnapshot(String store)
|
public int getLastIndexedSnapshot(String store)
|
||||||
{
|
{
|
||||||
int last = getLastAsynchronousSnapshot(store);
|
int last = getLastAsynchronousSnapshot(store);
|
||||||
@@ -1886,7 +1922,7 @@ public class AVMLuceneIndexerImpl extends AbstractLuceneIndexerImpl<String> impl
|
|||||||
}
|
}
|
||||||
return hasIndexBeenCreated(store) ? 0 : -1;
|
return hasIndexBeenCreated(store) ? 0 : -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
private int getLastSynchronousSnapshot(String store)
|
private int getLastSynchronousSnapshot(String store)
|
||||||
{
|
{
|
||||||
int answer = getLastSynchronousSnapshot(store, IndexChannel.DELTA);
|
int answer = getLastSynchronousSnapshot(store, IndexChannel.DELTA);
|
||||||
@@ -2110,7 +2146,7 @@ public class AVMLuceneIndexerImpl extends AbstractLuceneIndexerImpl<String> impl
|
|||||||
*/
|
*/
|
||||||
public void deleteIndex(StoreRef storeRef)
|
public void deleteIndex(StoreRef storeRef)
|
||||||
{
|
{
|
||||||
deleteIndex();
|
deleteIndex();
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Reference in New Issue
Block a user