SEARCH-2529: Allows configuration for getting the path information for a node in batches or as single nodes

New solrcore.properties available: alfresco.metadata.getPathsInNodeBatches=true
2025-09-17 14:21:20 +00:00 · 2020-11-03 10:41:38 +01:00
parent 5922f61233
commit 82e56411bf
2 changed files with 44 additions and 4 deletions
--- a/search-services/alfresco-search/src/main/java/org/alfresco/solr/SolrInformationServer.java
+++ b/search-services/alfresco-search/src/main/java/org/alfresco/solr/SolrInformationServer.java
@@ -408,6 +408,10 @@ public class SolrInformationServer implements InformationServer

    private long cleanContentLastPurged;

+    // Get Paths information from Repository for a batch of nodes (true by default)
+    // When false, Paths information is only recovered for single nodes
+    private final boolean getPathsInNodeBatches;
+    
    // Metadata pulling control
    private boolean skipDescendantDocsForSpecificTypes;
    private boolean skipDescendantDocsForSpecificAspects;
@@ -605,6 +609,8 @@ public class SolrInformationServer implements InformationServer

        contentStreamLimit = Integer.parseInt(coreConfiguration.getProperty("alfresco.contentStreamLimit", "10000000"));

+        getPathsInNodeBatches = Boolean.parseBoolean(coreConfiguration.getProperty("alfresco.metadata.getPathsInNodeBatches", "true"));
+
        props = AlfrescoSolrDataModel.getCommonConfig();
        hostName = ConfigUtil.locateProperty(SOLR_HOST, props.getProperty(SOLR_HOST));

@@ -2013,6 +2019,9 @@ public class SolrInformationServer implements InformationServer
                nmdp.setNodeIds(nodeIds);
                nmdp.setIncludeChildIds(false);
                nmdp.setIncludeChildAssociations(false);
+                // Getting Ancestor information when getting a batch of nodes from repository,
+                // may contain large information to be stored in memory for a long time.
+                nmdp.setIncludePaths(getPathsInNodeBatches);

                // Fetches bulk metadata
                nmdp.setMaxResults(Integer.MAX_VALUE);
@@ -2160,10 +2169,18 @@ public class SolrInformationServer implements InformationServer

        if (cascadeTrackingEnabled())
        {
-            updatePathRelatedFields(metadata, doc);
-            updateNamePathRelatedFields(metadata, doc);
-            updateAncestorRelatedFields(metadata, doc);
-            doc.setField(FIELD_PARENT_ASSOC_CRC, metadata.getParentAssocsCrc());
+            // As metadata is used like final but the lambdas above, we need a new variable here
+            NodeMetaData extendedMetadata = metadata;
+            // Ancestor information was not recovered for node batches, so we need to update
+            // the node with that information before updating the SOLR Document
+            if (!getPathsInNodeBatches)
+            {
+                extendedMetadata = getNodeMetaDataWithPathInfo(metadata.getId());
+            }
+            updatePathRelatedFields(extendedMetadata, doc);
+            updateNamePathRelatedFields(extendedMetadata, doc);
+            updateAncestorRelatedFields(extendedMetadata, doc);
+            doc.setField(FIELD_PARENT_ASSOC_CRC, extendedMetadata.getParentAssocsCrc());
        }

        ofNullable(metadata.getOwner()).ifPresent(owner -> doc.setField(FIELD_OWNER, owner));
@@ -2207,6 +2224,23 @@ public class SolrInformationServer implements InformationServer
        });
    }

+    /**
+     * Gets full metadata information for a given nodeId, including Paths information.
+     * Paths information can be huge in some scenarios, so it's recommended to use 
+     * this method always, as this gets Paths information for a single node. 
+     * @param nodeId Id for the node to get information from repository
+     * @return Full metadata information for the node
+     */
+    private NodeMetaData getNodeMetaDataWithPathInfo(long nodeId)
+    {
+        NodeMetaDataParameters nmdp = new NodeMetaDataParameters();
+        nmdp.setFromNodeId(nodeId);
+        nmdp.setToNodeId(nodeId);
+        nmdp.setIncludePaths(true);
+        nmdp.setMaxResults(1);
+        return getNodesMetaDataFromRepository(nmdp).get().iterator().next();
+    }
+    
    private void updateAncestorRelatedFields(NodeMetaData nodeMetaData, SolrInputDocument doc)
    {
        doc.removeField(FIELD_ANCESTOR);
--- a/search-services/alfresco-search/src/main/resources/solr/instance/templates/rerank/conf/solrcore.properties
+++ b/search-services/alfresco-search/src/main/resources/solr/instance/templates/rerank/conf/solrcore.properties
@@ -183,6 +183,12 @@ alfresco.metadata.ignore.datatype.1=app:configurations
 alfresco.metadata.skipDescendantDocsForSpecificAspects=false
 #alfresco.metadata.ignore.aspect.0=

+# If you are experimenting OOM errors, probably your Paths information is too large to be 
+ # recovered for node batches. You can change this property to "false" in order to get
+ # paths information only for single nodes. Be aware that when using "false", memory requirements
+ # are lower but also indexing performance is slower.  
+ alfresco.metadata.getPathsInNodeBatches=true
+
 # Date/Datetime fields only: if this property is set to true (default value) each date/datetime field
 #
 # - will be indexed as a whole value