SEARCH-1001: Add javadoc

This commit is contained in:
Joel
2018-07-30 12:53:06 -04:00
parent 18689904a0
commit dc94c779b0
8 changed files with 159 additions and 5 deletions

View File

@@ -1439,7 +1439,23 @@ public class SolrInformationServer implements InformationServer
/*
* Choose the max between the last commit time in the index and the last time the tracker started.
* Hole retention is applied to both. *
* Hole retention is applied to both.
*
* This logic is very tricky and very important to understand.
*
* state.getLastGoodTxCommitTimeInIndex() is used to determine where to start pulling transactions from the repo on the
* current tracker run.
*
* If we simply take the current value of state.getLastIndexedTxCommitTime() we have the following problem:
*
* If no data is added to the repo for a long period of time state.getLastIndexedTxCommitTime() never moves forward. This causes the
* loop inside MetadataTracker.getSomeTransactions() to hammer the repo as the time between state.getLastIndexedTxCommitTime()
* and state.setTimeToStopIndexing increases.
*
* To resolve this we choose the max between the last commit time in the index and the last time the tracker started. In theory
* if we start looking for transactions after the last tracker was started (and apply hole retention), we should never miss a
* transaction. Or atleast ensure that principal behind hole retention is respected. This theory should be closely looked at if
* the trackers ever lose data.
*/
timeBeforeWhichThereCanBeNoTxHolesInIndex = Math.max(timeBeforeWhichThereCanBeNoTxHolesInIndex, lastStartTimeWhichThereCanBeNoTxHolesInIndex);

View File

@@ -82,15 +82,29 @@ public abstract class AbstractAuthoritySetQuery extends Query
return authorities.hashCode();
}
/*
* This method collects the bitset of documents that match the authorities.
*/
protected HybridBitSet getACLSet(String[] auths, String field, SolrIndexSearcher searcher) throws IOException
{
/*
* Build a query that matches the authorities with a field in the ACL records in the index.
*/
BooleanQuery.Builder queryBuilder = new BooleanQuery.Builder();
for(String current : auths)
{
queryBuilder.add(new TermQuery(new Term(field, current)), BooleanClause.Occur.SHOULD);
}
//NOTE: this query will be in the filter cache. Ideally it would remain cached throughout the users session.
/*
* Collect a docset containing the ACL records that match the query.
* This query will be in the filter cache. Ideally it would remain cached throughout the users session.
*/
DocSet docSet = searcher.getDocSet(queryBuilder.build());
DocIterator iterator = docSet.iterator();
@@ -102,6 +116,12 @@ public abstract class AbstractAuthoritySetQuery extends Query
//TODO : makes this configurable. For some systems this is huge and for others not big enough.
HybridBitSet hybridBitSet = new HybridBitSet(60000000);
/*
* Collect the ACLID's from the matching acl records.
* This is done in a separate step so the initial ACL query can be cached in the FilterCache
* The initial ACL query may be expensive if the number of authorities is very large.
*/
List<LeafReaderContext> leaves = searcher.getTopReaderContext().leaves();
LeafReaderContext context = leaves.get(0);
NumericDocValues aclValues = DocValuesCache.getNumericDocValues(QueryConstants.FIELD_ACLID, context.reader());

View File

@@ -81,8 +81,9 @@ public class AlfrescoFTSQParserPlugin extends QParserPlugin
rerankPhase = RerankPhase.valueOf(arg.toString());
}
//First check the System property.
//Then check solrcore.properties, defaulting to the postFilter.
/*
* This turns on the postFilter
*/
postfilter = Boolean.parseBoolean(System.getProperty("alfresco.postfilter",
req.getCore().getCoreDescriptor().getCoreProperty("alfresco.postfilter",
@@ -109,7 +110,11 @@ public class AlfrescoFTSQParserPlugin extends QParserPlugin
if(authset && postfilter)
{
//Return the PostFilter
/*
* The cost of 200 turns on the postfilter inside Solr
* The postfilter query pulls out all the post filters in the
* query and applies them.
*/
return new PostFilterQuery(200, query);
}

View File

@@ -563,9 +563,17 @@ public class Solr4QueryParser extends QueryParser implements QueryConstants
return createReaderSetQuery(queryText);
} else if (field.equals(FIELD_AUTHORITY))
{
/*
* ACL DOCUMENTATION STARTS HERE
* This creates the query that applies the ACL filter
*/
return createAuthorityQuery(queryText);
} else if (field.equals(FIELD_AUTHORITYSET))
{
/*
* ACL DOCUMENTATION STARTS HERE
* This creates the query that applies the ACL filter
*/
return createAuthoritySetQuery(queryText);
} else if (field.equals(FIELD_DENIED))
{

View File

@@ -108,6 +108,21 @@ public class SolrAuthoritySetQuery extends AbstractAuthoritySetQuery implements
}
}
/*
* ACL PostFilter
*
* The getFilterCollector function returns a DelegatingCollector
* which is used to filter the documents that match the query.
*
* A delegating collector wraps the TopDocs Collector which gathers the top documents that
* match a query. A delegating can filter the documents before "delegating" to the TopDocs
* collector. This filtering process is where the ACL logic is applied.
*
* The getFilterCollector method sets up the data structures needed to apply the acl rules.
* These data structures are then passed to the access control collectors.
*
*/
public DelegatingCollector getFilterCollector(IndexSearcher searcher)
{
@@ -136,7 +151,19 @@ public class SolrAuthoritySetQuery extends AbstractAuthoritySetQuery implements
try
{
/*
* Collect the ACLID's that match the authorities.
* This is done by querying the ACL records in the index. See the method for more
* documentation on this query.
*/
HybridBitSet aclSet = getACLSet(auths, QueryConstants.FIELD_READER, solrIndexSearcher);
/*
* Collect the documents that the user owns.
*/
BitsFilter ownerFilter = getOwnerFilter(auths, solrIndexSearcher);
if (globalReaders.contains(PermissionService.OWNER_AUTHORITY))
@@ -251,6 +278,11 @@ public class SolrAuthoritySetQuery extends AbstractAuthoritySetQuery implements
}
}
/*
* The AccessControlCollector applies that ACL logic given aclIds and ownerFilter
*/
class AccessControlCollector extends DelegatingCollector
{
private HybridBitSet aclIds;
@@ -276,6 +308,12 @@ public class SolrAuthoritySetQuery extends AbstractAuthoritySetQuery implements
this.ownerDocs = ownerFilter.getBitSets().get(context.ord);
}
/*
* The collect method is applied to each document that matches the
* query. The document's aclId must be in the set of aclId's passed into the collector,
* or the documents id must be in the ownerDocs.
*/
public void collect(int doc) throws IOException
{
long aclId = this.fieldValues.get(doc);

View File

@@ -180,6 +180,11 @@ public abstract class AbstractTracker implements Tracker
try
{
/*
* The runLock ensures that for each tracker type (metadata, content, commit, cascade) only one tracker will
* be running at a time.
*/
runLock.acquire();
if(state==null && Boolean.parseBoolean(System.getProperty("alfresco.test", "false")))

View File

@@ -131,6 +131,18 @@ public class CommitTracker extends AbstractTracker
//See if we need a rollback
if(metadataTracker.getRollback() || aclTracker.getRollback()) {
/*
* The metadataTracker and aclTracker will return true if an unhandled exception has occurred during indexing.
*
* The doRollback method rolls the index back to the state that it was in at the last commit. This will undo
* all the work that has been done by other trackers after the last commit.
*
* The state of the other trackers is then set to null so the trackers will initialize their state from
* the index, rather then the in-memory state. This keeps the trackers in-sync with index if their work is
* rolled back.
*/
doRollback();
return;
}

View File

@@ -143,6 +143,15 @@ public class MetadataTracker extends AbstractTracker implements Tracker
if(!isMaster && isSlave)
{
// Dynamic registration
/*
* This section allows Solr's master/slave setup to be used with dynamic shard registration.
* In this scenario the slave is polling a "tracking" Solr node. The code below calls
* the repo to register the state of the node without pulling any real transactions from the repo.
*
* This allows the repo to register the replica so that it will be included in queries. But the slave Solr node
* will pull its data from a "tracking" Solr node using Solr's master/slave replication, rather then tracking the repository.
*
*/
ShardState shardstate = getShardState();
client.getTransactions(0L, null, 0L, null, 0, shardstate);
@@ -610,6 +619,11 @@ public class MetadataTracker extends AbstractTracker implements Tracker
{
try
{
/*
* This write lock is used to lock out the Commit Tracker. The ensures that the MetaDataTracker will
* not be indexing content while commits or rollbacks are occurring.
*/
getWriteLock().acquire();
/*
@@ -621,6 +635,26 @@ public class MetadataTracker extends AbstractTracker implements Tracker
this.state = getTrackerState();
/*
* The fromCommitTime tells getSomeTransactions() where to start, this actually fairly straight forward.
*
* What makes this code so tricky to understand is the state.getTimeToStopIndexing().
*
* There are two scenarios to keep in mind:
*
* 1) Full re-index: In this scenario the state.getTimeToStopIndexing() will never stop the indexing.
*
* 2) Up-to-date indexing: This is where state.getTimeToStopIndexing() gets interesting. In this scenario
* the Solr index is already up to date with the repo and it is tracking new transactions. The state.getTimeToStopIndexing()
* in this scenario causes the getSomeTransactions() call to stop returning results if it finds a transaction
* beyond a specific point in time. This will break out of this loop and end the tracker run.
*
* The next time the metadata tracker runs the "continueState()" method applies the "hole retention"
* to state.getLastGoodTxCommitTimeInIndex(). This causes the state.getLastGoodTxCommitTimeInIndex() to scan
* for prior transactions that might have been missed.
*
*/
Long fromCommitTime = getTxFromCommitTime(txnsFound, state.getLastGoodTxCommitTimeInIndex());
transactions = getSomeTransactions(txnsFound, fromCommitTime, TIME_STEP_1_HR_IN_MS, 2000,
state.getTimeToStopIndexing());
@@ -640,6 +674,22 @@ public class MetadataTracker extends AbstractTracker implements Tracker
ArrayList<Transaction> txBatch = new ArrayList<>();
for (Transaction info : transactions.getTransactions()) {
/*
* isInIndex is used to ensure transactions that are being re-pulled due to "hole retention" are not re-indexed if
* they have already been indexed.
*
* The logic in infoSrv.txnInIndex() first checks an in-memory LRUcache for the txnId. If it doesn't find it in the cache
* it checks the index. The LRUCache is only needed for txnId's that have been indexed but are not yet visible in the index for
* one of two reasons:
*
* 1) The commit tracker has not yet committed the transaction.
* 2) The txnId has been committed to the index but the new searcher has not yet been warmed.
*
* This means that to ensure txnId's are not needlessly reprocessed during hole retention, the LRUCache must be large
* enough to cover the time between when a txnId is indexed and when it becomes visible.
*/
boolean isInIndex = (infoSrv.txnInIndex(info.getId(), true) && info.getCommitTimeMs() <= state.getLastIndexedTxCommitTime());
if (isInIndex) {
txnsFound.add(info);