mirror of
https://github.com/Alfresco/SearchServices.git
synced 2025-09-17 14:21:20 +00:00
SEARCH-1001: Add javadoc
This commit is contained in:
@@ -1439,7 +1439,23 @@ public class SolrInformationServer implements InformationServer
|
||||
|
||||
/*
|
||||
* Choose the max between the last commit time in the index and the last time the tracker started.
|
||||
* Hole retention is applied to both. *
|
||||
* Hole retention is applied to both.
|
||||
*
|
||||
* This logic is very tricky and very important to understand.
|
||||
*
|
||||
* state.getLastGoodTxCommitTimeInIndex() is used to determine where to start pulling transactions from the repo on the
|
||||
* current tracker run.
|
||||
*
|
||||
* If we simply take the current value of state.getLastIndexedTxCommitTime() we have the following problem:
|
||||
*
|
||||
* If no data is added to the repo for a long period of time state.getLastIndexedTxCommitTime() never moves forward. This causes the
|
||||
* loop inside MetadataTracker.getSomeTransactions() to hammer the repo as the time between state.getLastIndexedTxCommitTime()
|
||||
* and state.setTimeToStopIndexing increases.
|
||||
*
|
||||
* To resolve this we choose the max between the last commit time in the index and the last time the tracker started. In theory
|
||||
* if we start looking for transactions after the last tracker was started (and apply hole retention), we should never miss a
|
||||
* transaction. Or atleast ensure that principal behind hole retention is respected. This theory should be closely looked at if
|
||||
* the trackers ever lose data.
|
||||
*/
|
||||
|
||||
timeBeforeWhichThereCanBeNoTxHolesInIndex = Math.max(timeBeforeWhichThereCanBeNoTxHolesInIndex, lastStartTimeWhichThereCanBeNoTxHolesInIndex);
|
||||
|
@@ -82,15 +82,29 @@ public abstract class AbstractAuthoritySetQuery extends Query
|
||||
return authorities.hashCode();
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* This method collects the bitset of documents that match the authorities.
|
||||
*/
|
||||
|
||||
protected HybridBitSet getACLSet(String[] auths, String field, SolrIndexSearcher searcher) throws IOException
|
||||
{
|
||||
/*
|
||||
* Build a query that matches the authorities with a field in the ACL records in the index.
|
||||
*/
|
||||
|
||||
BooleanQuery.Builder queryBuilder = new BooleanQuery.Builder();
|
||||
for(String current : auths)
|
||||
{
|
||||
queryBuilder.add(new TermQuery(new Term(field, current)), BooleanClause.Occur.SHOULD);
|
||||
}
|
||||
|
||||
//NOTE: this query will be in the filter cache. Ideally it would remain cached throughout the users session.
|
||||
|
||||
/*
|
||||
* Collect a docset containing the ACL records that match the query.
|
||||
* This query will be in the filter cache. Ideally it would remain cached throughout the users session.
|
||||
*/
|
||||
|
||||
DocSet docSet = searcher.getDocSet(queryBuilder.build());
|
||||
|
||||
DocIterator iterator = docSet.iterator();
|
||||
@@ -102,6 +116,12 @@ public abstract class AbstractAuthoritySetQuery extends Query
|
||||
//TODO : makes this configurable. For some systems this is huge and for others not big enough.
|
||||
HybridBitSet hybridBitSet = new HybridBitSet(60000000);
|
||||
|
||||
/*
|
||||
* Collect the ACLID's from the matching acl records.
|
||||
* This is done in a separate step so the initial ACL query can be cached in the FilterCache
|
||||
* The initial ACL query may be expensive if the number of authorities is very large.
|
||||
*/
|
||||
|
||||
List<LeafReaderContext> leaves = searcher.getTopReaderContext().leaves();
|
||||
LeafReaderContext context = leaves.get(0);
|
||||
NumericDocValues aclValues = DocValuesCache.getNumericDocValues(QueryConstants.FIELD_ACLID, context.reader());
|
||||
|
@@ -81,8 +81,9 @@ public class AlfrescoFTSQParserPlugin extends QParserPlugin
|
||||
rerankPhase = RerankPhase.valueOf(arg.toString());
|
||||
}
|
||||
|
||||
//First check the System property.
|
||||
//Then check solrcore.properties, defaulting to the postFilter.
|
||||
/*
|
||||
* This turns on the postFilter
|
||||
*/
|
||||
|
||||
postfilter = Boolean.parseBoolean(System.getProperty("alfresco.postfilter",
|
||||
req.getCore().getCoreDescriptor().getCoreProperty("alfresco.postfilter",
|
||||
@@ -109,7 +110,11 @@ public class AlfrescoFTSQParserPlugin extends QParserPlugin
|
||||
|
||||
if(authset && postfilter)
|
||||
{
|
||||
//Return the PostFilter
|
||||
/*
|
||||
* The cost of 200 turns on the postfilter inside Solr
|
||||
* The postfilter query pulls out all the post filters in the
|
||||
* query and applies them.
|
||||
*/
|
||||
return new PostFilterQuery(200, query);
|
||||
}
|
||||
|
||||
|
@@ -563,9 +563,17 @@ public class Solr4QueryParser extends QueryParser implements QueryConstants
|
||||
return createReaderSetQuery(queryText);
|
||||
} else if (field.equals(FIELD_AUTHORITY))
|
||||
{
|
||||
/*
|
||||
* ACL DOCUMENTATION STARTS HERE
|
||||
* This creates the query that applies the ACL filter
|
||||
*/
|
||||
return createAuthorityQuery(queryText);
|
||||
} else if (field.equals(FIELD_AUTHORITYSET))
|
||||
{
|
||||
/*
|
||||
* ACL DOCUMENTATION STARTS HERE
|
||||
* This creates the query that applies the ACL filter
|
||||
*/
|
||||
return createAuthoritySetQuery(queryText);
|
||||
} else if (field.equals(FIELD_DENIED))
|
||||
{
|
||||
|
@@ -108,6 +108,21 @@ public class SolrAuthoritySetQuery extends AbstractAuthoritySetQuery implements
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* ACL PostFilter
|
||||
*
|
||||
* The getFilterCollector function returns a DelegatingCollector
|
||||
* which is used to filter the documents that match the query.
|
||||
*
|
||||
* A delegating collector wraps the TopDocs Collector which gathers the top documents that
|
||||
* match a query. A delegating can filter the documents before "delegating" to the TopDocs
|
||||
* collector. This filtering process is where the ACL logic is applied.
|
||||
*
|
||||
* The getFilterCollector method sets up the data structures needed to apply the acl rules.
|
||||
* These data structures are then passed to the access control collectors.
|
||||
*
|
||||
*/
|
||||
|
||||
public DelegatingCollector getFilterCollector(IndexSearcher searcher)
|
||||
{
|
||||
|
||||
@@ -136,7 +151,19 @@ public class SolrAuthoritySetQuery extends AbstractAuthoritySetQuery implements
|
||||
|
||||
try
|
||||
{
|
||||
|
||||
/*
|
||||
* Collect the ACLID's that match the authorities.
|
||||
* This is done by querying the ACL records in the index. See the method for more
|
||||
* documentation on this query.
|
||||
*/
|
||||
|
||||
HybridBitSet aclSet = getACLSet(auths, QueryConstants.FIELD_READER, solrIndexSearcher);
|
||||
|
||||
/*
|
||||
* Collect the documents that the user owns.
|
||||
*/
|
||||
|
||||
BitsFilter ownerFilter = getOwnerFilter(auths, solrIndexSearcher);
|
||||
|
||||
if (globalReaders.contains(PermissionService.OWNER_AUTHORITY))
|
||||
@@ -251,6 +278,11 @@ public class SolrAuthoritySetQuery extends AbstractAuthoritySetQuery implements
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* The AccessControlCollector applies that ACL logic given aclIds and ownerFilter
|
||||
*/
|
||||
|
||||
class AccessControlCollector extends DelegatingCollector
|
||||
{
|
||||
private HybridBitSet aclIds;
|
||||
@@ -276,6 +308,12 @@ public class SolrAuthoritySetQuery extends AbstractAuthoritySetQuery implements
|
||||
this.ownerDocs = ownerFilter.getBitSets().get(context.ord);
|
||||
}
|
||||
|
||||
/*
|
||||
* The collect method is applied to each document that matches the
|
||||
* query. The document's aclId must be in the set of aclId's passed into the collector,
|
||||
* or the documents id must be in the ownerDocs.
|
||||
*/
|
||||
|
||||
public void collect(int doc) throws IOException
|
||||
{
|
||||
long aclId = this.fieldValues.get(doc);
|
||||
|
@@ -180,6 +180,11 @@ public abstract class AbstractTracker implements Tracker
|
||||
|
||||
try
|
||||
{
|
||||
/*
|
||||
* The runLock ensures that for each tracker type (metadata, content, commit, cascade) only one tracker will
|
||||
* be running at a time.
|
||||
*/
|
||||
|
||||
runLock.acquire();
|
||||
|
||||
if(state==null && Boolean.parseBoolean(System.getProperty("alfresco.test", "false")))
|
||||
|
@@ -131,6 +131,18 @@ public class CommitTracker extends AbstractTracker
|
||||
|
||||
//See if we need a rollback
|
||||
if(metadataTracker.getRollback() || aclTracker.getRollback()) {
|
||||
|
||||
/*
|
||||
* The metadataTracker and aclTracker will return true if an unhandled exception has occurred during indexing.
|
||||
*
|
||||
* The doRollback method rolls the index back to the state that it was in at the last commit. This will undo
|
||||
* all the work that has been done by other trackers after the last commit.
|
||||
*
|
||||
* The state of the other trackers is then set to null so the trackers will initialize their state from
|
||||
* the index, rather then the in-memory state. This keeps the trackers in-sync with index if their work is
|
||||
* rolled back.
|
||||
*/
|
||||
|
||||
doRollback();
|
||||
return;
|
||||
}
|
||||
|
@@ -143,6 +143,15 @@ public class MetadataTracker extends AbstractTracker implements Tracker
|
||||
if(!isMaster && isSlave)
|
||||
{
|
||||
// Dynamic registration
|
||||
/*
|
||||
* This section allows Solr's master/slave setup to be used with dynamic shard registration.
|
||||
* In this scenario the slave is polling a "tracking" Solr node. The code below calls
|
||||
* the repo to register the state of the node without pulling any real transactions from the repo.
|
||||
*
|
||||
* This allows the repo to register the replica so that it will be included in queries. But the slave Solr node
|
||||
* will pull its data from a "tracking" Solr node using Solr's master/slave replication, rather then tracking the repository.
|
||||
*
|
||||
*/
|
||||
|
||||
ShardState shardstate = getShardState();
|
||||
client.getTransactions(0L, null, 0L, null, 0, shardstate);
|
||||
@@ -610,6 +619,11 @@ public class MetadataTracker extends AbstractTracker implements Tracker
|
||||
{
|
||||
try
|
||||
{
|
||||
/*
|
||||
* This write lock is used to lock out the Commit Tracker. The ensures that the MetaDataTracker will
|
||||
* not be indexing content while commits or rollbacks are occurring.
|
||||
*/
|
||||
|
||||
getWriteLock().acquire();
|
||||
|
||||
/*
|
||||
@@ -621,6 +635,26 @@ public class MetadataTracker extends AbstractTracker implements Tracker
|
||||
this.state = getTrackerState();
|
||||
|
||||
|
||||
/*
|
||||
* The fromCommitTime tells getSomeTransactions() where to start, this actually fairly straight forward.
|
||||
*
|
||||
* What makes this code so tricky to understand is the state.getTimeToStopIndexing().
|
||||
*
|
||||
* There are two scenarios to keep in mind:
|
||||
*
|
||||
* 1) Full re-index: In this scenario the state.getTimeToStopIndexing() will never stop the indexing.
|
||||
*
|
||||
* 2) Up-to-date indexing: This is where state.getTimeToStopIndexing() gets interesting. In this scenario
|
||||
* the Solr index is already up to date with the repo and it is tracking new transactions. The state.getTimeToStopIndexing()
|
||||
* in this scenario causes the getSomeTransactions() call to stop returning results if it finds a transaction
|
||||
* beyond a specific point in time. This will break out of this loop and end the tracker run.
|
||||
*
|
||||
* The next time the metadata tracker runs the "continueState()" method applies the "hole retention"
|
||||
* to state.getLastGoodTxCommitTimeInIndex(). This causes the state.getLastGoodTxCommitTimeInIndex() to scan
|
||||
* for prior transactions that might have been missed.
|
||||
*
|
||||
*/
|
||||
|
||||
Long fromCommitTime = getTxFromCommitTime(txnsFound, state.getLastGoodTxCommitTimeInIndex());
|
||||
transactions = getSomeTransactions(txnsFound, fromCommitTime, TIME_STEP_1_HR_IN_MS, 2000,
|
||||
state.getTimeToStopIndexing());
|
||||
@@ -640,6 +674,22 @@ public class MetadataTracker extends AbstractTracker implements Tracker
|
||||
|
||||
ArrayList<Transaction> txBatch = new ArrayList<>();
|
||||
for (Transaction info : transactions.getTransactions()) {
|
||||
|
||||
/*
|
||||
* isInIndex is used to ensure transactions that are being re-pulled due to "hole retention" are not re-indexed if
|
||||
* they have already been indexed.
|
||||
*
|
||||
* The logic in infoSrv.txnInIndex() first checks an in-memory LRUcache for the txnId. If it doesn't find it in the cache
|
||||
* it checks the index. The LRUCache is only needed for txnId's that have been indexed but are not yet visible in the index for
|
||||
* one of two reasons:
|
||||
*
|
||||
* 1) The commit tracker has not yet committed the transaction.
|
||||
* 2) The txnId has been committed to the index but the new searcher has not yet been warmed.
|
||||
*
|
||||
* This means that to ensure txnId's are not needlessly reprocessed during hole retention, the LRUCache must be large
|
||||
* enough to cover the time between when a txnId is indexed and when it becomes visible.
|
||||
*/
|
||||
|
||||
boolean isInIndex = (infoSrv.txnInIndex(info.getId(), true) && info.getCommitTimeMs() <= state.getLastIndexedTxCommitTime());
|
||||
if (isInIndex) {
|
||||
txnsFound.add(info);
|
||||
|
Reference in New Issue
Block a user