SEARCH-2529: Allows configuration for getting the path information for a node in batches or as single nodes

New solrcore.properties available: alfresco.metadata.getPathsInNodeBatches=true
This commit is contained in:
Angel Borroy
2020-11-03 10:41:38 +01:00
parent 5922f61233
commit 82e56411bf
2 changed files with 44 additions and 4 deletions

View File

@@ -408,6 +408,10 @@ public class SolrInformationServer implements InformationServer
private long cleanContentLastPurged;
// Get Paths information from Repository for a batch of nodes (true by default)
// When false, Paths information is only recovered for single nodes
private final boolean getPathsInNodeBatches;
// Metadata pulling control
private boolean skipDescendantDocsForSpecificTypes;
private boolean skipDescendantDocsForSpecificAspects;
@@ -605,6 +609,8 @@ public class SolrInformationServer implements InformationServer
contentStreamLimit = Integer.parseInt(coreConfiguration.getProperty("alfresco.contentStreamLimit", "10000000"));
getPathsInNodeBatches = Boolean.parseBoolean(coreConfiguration.getProperty("alfresco.metadata.getPathsInNodeBatches", "true"));
props = AlfrescoSolrDataModel.getCommonConfig();
hostName = ConfigUtil.locateProperty(SOLR_HOST, props.getProperty(SOLR_HOST));
@@ -2013,6 +2019,9 @@ public class SolrInformationServer implements InformationServer
nmdp.setNodeIds(nodeIds);
nmdp.setIncludeChildIds(false);
nmdp.setIncludeChildAssociations(false);
// Getting Ancestor information when getting a batch of nodes from repository,
// may contain large information to be stored in memory for a long time.
nmdp.setIncludePaths(getPathsInNodeBatches);
// Fetches bulk metadata
nmdp.setMaxResults(Integer.MAX_VALUE);
@@ -2160,10 +2169,18 @@ public class SolrInformationServer implements InformationServer
if (cascadeTrackingEnabled())
{
updatePathRelatedFields(metadata, doc);
updateNamePathRelatedFields(metadata, doc);
updateAncestorRelatedFields(metadata, doc);
doc.setField(FIELD_PARENT_ASSOC_CRC, metadata.getParentAssocsCrc());
// As metadata is used like final but the lambdas above, we need a new variable here
NodeMetaData extendedMetadata = metadata;
// Ancestor information was not recovered for node batches, so we need to update
// the node with that information before updating the SOLR Document
if (!getPathsInNodeBatches)
{
extendedMetadata = getNodeMetaDataWithPathInfo(metadata.getId());
}
updatePathRelatedFields(extendedMetadata, doc);
updateNamePathRelatedFields(extendedMetadata, doc);
updateAncestorRelatedFields(extendedMetadata, doc);
doc.setField(FIELD_PARENT_ASSOC_CRC, extendedMetadata.getParentAssocsCrc());
}
ofNullable(metadata.getOwner()).ifPresent(owner -> doc.setField(FIELD_OWNER, owner));
@@ -2207,6 +2224,23 @@ public class SolrInformationServer implements InformationServer
});
}
/**
* Gets full metadata information for a given nodeId, including Paths information.
* Paths information can be huge in some scenarios, so it's recommended to use
* this method always, as this gets Paths information for a single node.
* @param nodeId Id for the node to get information from repository
* @return Full metadata information for the node
*/
private NodeMetaData getNodeMetaDataWithPathInfo(long nodeId)
{
NodeMetaDataParameters nmdp = new NodeMetaDataParameters();
nmdp.setFromNodeId(nodeId);
nmdp.setToNodeId(nodeId);
nmdp.setIncludePaths(true);
nmdp.setMaxResults(1);
return getNodesMetaDataFromRepository(nmdp).get().iterator().next();
}
private void updateAncestorRelatedFields(NodeMetaData nodeMetaData, SolrInputDocument doc)
{
doc.removeField(FIELD_ANCESTOR);

View File

@@ -183,6 +183,12 @@ alfresco.metadata.ignore.datatype.1=app:configurations
alfresco.metadata.skipDescendantDocsForSpecificAspects=false
#alfresco.metadata.ignore.aspect.0=
# If you are experimenting OOM errors, probably your Paths information is too large to be
# recovered for node batches. You can change this property to "false" in order to get
# paths information only for single nodes. Be aware that when using "false", memory requirements
# are lower but also indexing performance is slower.
alfresco.metadata.getPathsInNodeBatches=true
# Date/Datetime fields only: if this property is set to true (default value) each date/datetime field
#
# - will be indexed as a whole value