From dc94c779b0e2fef43745e3b76d30d42bb100ab5c Mon Sep 17 00:00:00 2001
From: Joel <joel.bernstein@alfresco.com>
Date: Mon, 30 Jul 2018 12:53:06 -0400
Subject: [PATCH] SEARCH-1001: Add javadoc

---
 .../alfresco/solr/SolrInformationServer.java  | 18 ++++++-
 .../solr/query/AbstractAuthoritySetQuery.java | 22 +++++++-
 .../solr/query/AlfrescoFTSQParserPlugin.java  | 11 ++--
 .../alfresco/solr/query/Solr4QueryParser.java |  8 +++
 .../solr/query/SolrAuthoritySetQuery.java     | 38 ++++++++++++++
 .../solr/tracker/AbstractTracker.java         |  5 ++
 .../alfresco/solr/tracker/CommitTracker.java  | 12 +++++
 .../solr/tracker/MetadataTracker.java         | 50 +++++++++++++++++++
 8 files changed, 159 insertions(+), 5 deletions(-)

diff --git a/alfresco-search/src/main/java/org/alfresco/solr/SolrInformationServer.java b/alfresco-search/src/main/java/org/alfresco/solr/SolrInformationServer.java
index 04ee580be..651477408 100644
--- a/alfresco-search/src/main/java/org/alfresco/solr/SolrInformationServer.java
+++ b/alfresco-search/src/main/java/org/alfresco/solr/SolrInformationServer.java
@@ -1439,7 +1439,23 @@ public class SolrInformationServer implements InformationServer
 
         /*
         * Choose the max between the last commit time in the index and the last time the tracker started.
-        * Hole retention is applied to both.        *
+        * Hole retention is applied to both.
+        *
+        * This logic is very tricky and very important to understand.
+        *
+        * state.getLastGoodTxCommitTimeInIndex() is used to determine where to start pulling transactions from the repo on the
+        * current tracker run.
+        *
+        * If we simply take the current value of  state.getLastIndexedTxCommitTime() we have the following problem:
+        *
+        * If no data is added to the repo for a long period of time state.getLastIndexedTxCommitTime() never moves forward. This causes the
+        * loop inside MetadataTracker.getSomeTransactions() to hammer the repo as the time between state.getLastIndexedTxCommitTime()
+        * and state.setTimeToStopIndexing increases.
+        *
+        * To resolve this we choose the max between the last commit time in the index and the last time the tracker started. In theory
+        * if we start looking for transactions after the last tracker was started (and apply hole retention), we should never miss a
+        * transaction. Or atleast ensure that principal behind hole retention is respected. This theory should be closely looked at if
+        * the trackers ever lose data.
         */
 
         timeBeforeWhichThereCanBeNoTxHolesInIndex = Math.max(timeBeforeWhichThereCanBeNoTxHolesInIndex, lastStartTimeWhichThereCanBeNoTxHolesInIndex);
diff --git a/alfresco-search/src/main/java/org/alfresco/solr/query/AbstractAuthoritySetQuery.java b/alfresco-search/src/main/java/org/alfresco/solr/query/AbstractAuthoritySetQuery.java
index 8893a81bc..8cff300f2 100644
--- a/alfresco-search/src/main/java/org/alfresco/solr/query/AbstractAuthoritySetQuery.java
+++ b/alfresco-search/src/main/java/org/alfresco/solr/query/AbstractAuthoritySetQuery.java
@@ -82,15 +82,29 @@ public abstract class AbstractAuthoritySetQuery extends Query
         return authorities.hashCode();
     }
 
+
+    /*
+    *  This method collects the bitset of documents that match the authorities.
+    */
+
     protected HybridBitSet getACLSet(String[] auths, String field, SolrIndexSearcher searcher) throws IOException
     {
+        /*
+        * Build a query that matches the authorities with a field in the ACL records in the index.
+        */
+
     	BooleanQuery.Builder queryBuilder = new BooleanQuery.Builder();
         for(String current : auths)
         {
         	queryBuilder.add(new TermQuery(new Term(field, current)), BooleanClause.Occur.SHOULD);
         }
 
-        //NOTE: this query will be in the filter cache. Ideally it would remain cached throughout the users session.
+
+        /*
+        *   Collect a docset containing the ACL records that match the query.
+        *   This query will be in the filter cache. Ideally it would remain cached throughout the users session.
+        */
+
         DocSet docSet = searcher.getDocSet(queryBuilder.build());
 
         DocIterator iterator = docSet.iterator();
@@ -102,6 +116,12 @@ public abstract class AbstractAuthoritySetQuery extends Query
         //TODO : makes this configurable. For some systems this is huge and for others not big enough.
         HybridBitSet hybridBitSet = new HybridBitSet(60000000);
 
+        /*
+        * Collect the ACLID's from the matching acl records.
+        * This is done in a separate step so the initial ACL query can be cached in the FilterCache
+        * The initial ACL query may be expensive if the number of authorities is very large.
+        */
+
         List<LeafReaderContext> leaves = searcher.getTopReaderContext().leaves();
         LeafReaderContext context = leaves.get(0);
         NumericDocValues aclValues = DocValuesCache.getNumericDocValues(QueryConstants.FIELD_ACLID, context.reader());
diff --git a/alfresco-search/src/main/java/org/alfresco/solr/query/AlfrescoFTSQParserPlugin.java b/alfresco-search/src/main/java/org/alfresco/solr/query/AlfrescoFTSQParserPlugin.java
index bde1f1f95..45724f381 100644
--- a/alfresco-search/src/main/java/org/alfresco/solr/query/AlfrescoFTSQParserPlugin.java
+++ b/alfresco-search/src/main/java/org/alfresco/solr/query/AlfrescoFTSQParserPlugin.java
@@ -81,8 +81,9 @@ public class AlfrescoFTSQParserPlugin extends QParserPlugin
                 rerankPhase = RerankPhase.valueOf(arg.toString());
             }
 
-            //First check the System property.
-            //Then check solrcore.properties, defaulting to the postFilter.
+            /*
+            * This turns on the postFilter
+            */
 
             postfilter = Boolean.parseBoolean(System.getProperty("alfresco.postfilter",
                                                                  req.getCore().getCoreDescriptor().getCoreProperty("alfresco.postfilter",
@@ -109,7 +110,11 @@ public class AlfrescoFTSQParserPlugin extends QParserPlugin
 
                 if(authset && postfilter)
                 {
-                    //Return the PostFilter
+                    /*
+                    * The cost of 200 turns on the postfilter inside Solr
+                    * The postfilter query pulls out all the post filters in the
+                    * query and applies them.
+                    */
                     return new PostFilterQuery(200, query);
                 }
 
diff --git a/alfresco-search/src/main/java/org/alfresco/solr/query/Solr4QueryParser.java b/alfresco-search/src/main/java/org/alfresco/solr/query/Solr4QueryParser.java
index fbe72dafb..a5b3f8a55 100644
--- a/alfresco-search/src/main/java/org/alfresco/solr/query/Solr4QueryParser.java
+++ b/alfresco-search/src/main/java/org/alfresco/solr/query/Solr4QueryParser.java
@@ -563,9 +563,17 @@ public class Solr4QueryParser extends QueryParser implements QueryConstants
                 return createReaderSetQuery(queryText);
             } else if (field.equals(FIELD_AUTHORITY))
             {
+                /*
+                * ACL DOCUMENTATION STARTS HERE
+                * This creates the query that applies the ACL filter
+                */
                 return createAuthorityQuery(queryText);
             } else if (field.equals(FIELD_AUTHORITYSET))
             {
+                /*
+                * ACL DOCUMENTATION STARTS HERE
+                * This creates the query that applies the ACL filter
+                */
                 return createAuthoritySetQuery(queryText);
             } else if (field.equals(FIELD_DENIED))
             {
diff --git a/alfresco-search/src/main/java/org/alfresco/solr/query/SolrAuthoritySetQuery.java b/alfresco-search/src/main/java/org/alfresco/solr/query/SolrAuthoritySetQuery.java
index 31d7bced1..e4d5c9cca 100644
--- a/alfresco-search/src/main/java/org/alfresco/solr/query/SolrAuthoritySetQuery.java
+++ b/alfresco-search/src/main/java/org/alfresco/solr/query/SolrAuthoritySetQuery.java
@@ -108,6 +108,21 @@ public class SolrAuthoritySetQuery extends AbstractAuthoritySetQuery implements
         }
     }
 
+    /*
+    *  ACL PostFilter
+    *
+    *  The getFilterCollector function returns a DelegatingCollector
+    *  which is used to filter the documents that match the query.
+    *
+    *  A delegating collector wraps the TopDocs Collector which gathers the top documents that
+    *  match a query. A delegating can filter the documents before "delegating" to the TopDocs
+    *  collector. This filtering process is where the ACL logic is applied.
+    *
+    *  The getFilterCollector method sets up the data structures needed to apply the acl rules.
+    *  These data structures are then passed to the access control collectors.
+    *
+    */
+
     public DelegatingCollector getFilterCollector(IndexSearcher searcher)
     {
 
@@ -136,7 +151,19 @@ public class SolrAuthoritySetQuery extends AbstractAuthoritySetQuery implements
 
         try
         {
+
+            /*
+            *  Collect the ACLID's that match the authorities.
+            *  This is done by querying the ACL records in the index. See the method for more
+            *  documentation on this query.
+            */
+
             HybridBitSet aclSet = getACLSet(auths, QueryConstants.FIELD_READER, solrIndexSearcher);
+
+            /*
+            * Collect the documents that the user owns.
+            */
+
             BitsFilter ownerFilter = getOwnerFilter(auths, solrIndexSearcher);
 
             if (globalReaders.contains(PermissionService.OWNER_AUTHORITY))
@@ -251,6 +278,11 @@ public class SolrAuthoritySetQuery extends AbstractAuthoritySetQuery implements
 		}
     }
 
+
+    /*
+    *  The AccessControlCollector applies that ACL logic given aclIds and ownerFilter
+    */
+
     class AccessControlCollector extends DelegatingCollector
     {
         private HybridBitSet aclIds;
@@ -276,6 +308,12 @@ public class SolrAuthoritySetQuery extends AbstractAuthoritySetQuery implements
             this.ownerDocs = ownerFilter.getBitSets().get(context.ord);
         }
 
+        /*
+        * The collect method is applied to each document that matches the
+        * query. The document's aclId must be in the set of aclId's passed into the collector,
+        * or the documents id must be in the ownerDocs.
+        */
+
         public void collect(int doc) throws IOException
         {
             long aclId = this.fieldValues.get(doc);
diff --git a/alfresco-search/src/main/java/org/alfresco/solr/tracker/AbstractTracker.java b/alfresco-search/src/main/java/org/alfresco/solr/tracker/AbstractTracker.java
index d55fc274c..7f38e7f38 100644
--- a/alfresco-search/src/main/java/org/alfresco/solr/tracker/AbstractTracker.java
+++ b/alfresco-search/src/main/java/org/alfresco/solr/tracker/AbstractTracker.java
@@ -180,6 +180,11 @@ public abstract class AbstractTracker implements Tracker
 
         try
         {
+            /*
+            * The runLock ensures that for each tracker type (metadata, content, commit, cascade) only one tracker will
+            * be running at a time.
+            */
+
             runLock.acquire();
 
             if(state==null && Boolean.parseBoolean(System.getProperty("alfresco.test", "false")))
diff --git a/alfresco-search/src/main/java/org/alfresco/solr/tracker/CommitTracker.java b/alfresco-search/src/main/java/org/alfresco/solr/tracker/CommitTracker.java
index ba0c2b6ff..3acbd4a51 100644
--- a/alfresco-search/src/main/java/org/alfresco/solr/tracker/CommitTracker.java
+++ b/alfresco-search/src/main/java/org/alfresco/solr/tracker/CommitTracker.java
@@ -131,6 +131,18 @@ public class CommitTracker extends AbstractTracker
 
             //See if we need a rollback
             if(metadataTracker.getRollback() || aclTracker.getRollback()) {
+
+                /*
+                * The metadataTracker and aclTracker will return true if an unhandled exception has occurred during indexing.
+                *
+                * The doRollback method rolls the index back to the state that it was in at the last commit. This will undo
+                * all the work that has been done by other trackers after the last commit.
+                *
+                * The state of the other trackers is then set to null so the trackers will initialize their state from
+                * the index, rather then the in-memory state. This keeps the trackers in-sync with index if their work is
+                * rolled back.
+                */
+
                 doRollback();
                 return;
             }
diff --git a/alfresco-search/src/main/java/org/alfresco/solr/tracker/MetadataTracker.java b/alfresco-search/src/main/java/org/alfresco/solr/tracker/MetadataTracker.java
index 824d47c00..d52c7bf24 100644
--- a/alfresco-search/src/main/java/org/alfresco/solr/tracker/MetadataTracker.java
+++ b/alfresco-search/src/main/java/org/alfresco/solr/tracker/MetadataTracker.java
@@ -143,6 +143,15 @@ public class MetadataTracker extends AbstractTracker implements Tracker
         if(!isMaster && isSlave)
         {
             // Dynamic registration
+            /*
+            * This section allows Solr's master/slave setup to be used with dynamic shard registration.
+            * In this scenario the slave is polling a "tracking" Solr node. The code below calls
+            * the repo to register the state of the node without pulling any real transactions from the repo.
+            *
+            * This allows the repo to register the replica so that it will be included in queries. But the slave Solr node
+            * will pull its data from a "tracking" Solr node using Solr's master/slave replication, rather then tracking the repository.
+            *
+            */
             
             ShardState shardstate = getShardState();
             client.getTransactions(0L, null, 0L, null, 0, shardstate);
@@ -610,6 +619,11 @@ public class MetadataTracker extends AbstractTracker implements Tracker
         {
             try
             {
+                /*
+                * This write lock is used to lock out the Commit Tracker. The ensures that the MetaDataTracker will
+                * not be indexing content while commits or rollbacks are occurring.
+                */
+
                 getWriteLock().acquire();
 
                 /*
@@ -621,6 +635,26 @@ public class MetadataTracker extends AbstractTracker implements Tracker
                 this.state = getTrackerState();
 
 
+                /*
+                *  The fromCommitTime tells getSomeTransactions() where to start, this actually fairly straight forward.
+                *
+                *  What makes this code so tricky to understand is the state.getTimeToStopIndexing().
+                *
+                *  There are two scenarios to keep in mind:
+                *
+                *  1) Full re-index: In this scenario the state.getTimeToStopIndexing() will never stop the indexing.
+                *
+                *  2) Up-to-date indexing: This is where state.getTimeToStopIndexing() gets interesting. In this scenario
+                *  the Solr index is already up to date with the repo and it is tracking new transactions. The state.getTimeToStopIndexing()
+                *  in this scenario causes the getSomeTransactions() call to stop returning results if it finds a transaction
+                *  beyond a specific point in time. This will break out of this loop and end the tracker run.
+                *
+                *  The next time the metadata tracker runs the "continueState()" method applies the "hole retention"
+                *  to state.getLastGoodTxCommitTimeInIndex(). This causes the state.getLastGoodTxCommitTimeInIndex() to scan
+                *  for prior transactions that might have been missed.
+                *
+                */
+
                 Long fromCommitTime = getTxFromCommitTime(txnsFound, state.getLastGoodTxCommitTimeInIndex());
                 transactions = getSomeTransactions(txnsFound, fromCommitTime, TIME_STEP_1_HR_IN_MS, 2000,
                                                    state.getTimeToStopIndexing());
@@ -640,6 +674,22 @@ public class MetadataTracker extends AbstractTracker implements Tracker
 
                 ArrayList<Transaction> txBatch = new ArrayList<>();
                 for (Transaction info : transactions.getTransactions()) {
+
+                    /*
+                    *  isInIndex is used to ensure transactions that are being re-pulled due to "hole retention" are not re-indexed if
+                    *  they have already been indexed.
+                    *
+                    *  The logic in infoSrv.txnInIndex() first checks an in-memory LRUcache for the txnId. If it doesn't find it in the cache
+                    *  it checks the index. The LRUCache is only needed for txnId's that have been indexed but are not yet visible in the index for
+                    *  one of two reasons:
+                    *
+                    *  1) The commit tracker has not yet committed the transaction.
+                    *  2) The txnId has been committed to the index but the new searcher has not yet been warmed.
+                    *
+                    *  This means that to ensure txnId's are not needlessly reprocessed during hole retention, the LRUCache must be large
+                    *  enough to cover the time between when a txnId is indexed and when it becomes visible.
+                    */
+
                     boolean isInIndex = (infoSrv.txnInIndex(info.getId(), true) && info.getCommitTimeMs() <= state.getLastIndexedTxCommitTime());
                     if (isInIndex) {
                         txnsFound.add(info);