alfresco-community-repo/source/java/org/alfresco/repo/node/index/FullIndexRecoveryComponent.java
Derek Hulley 6e225ce15d Merged V2.9 to HEAD
9018: MT: system-wide jobs should run across all stores/indexes
   9204: Merged V2.2 to V2.9
      8633: Merged V2.1 to V2.2
         8629: Merged V2.1-A to V2.1
            8493: Fixed ADB-51: ImporterBootstrap doesn't use transaction retrying
            8494: EHCache and JGroup patches and upgrades
            8546: ACT-1650: performance optimization
            8550: Fixes to transactional cache handling
            8553: Fixed tests: MLText is a Map, but will always have at least one entry, even that entry is null.
            8583: ACT-954: IndexInfo files now reopen when they close (for whatever reason)
      8640: Merged V2.1 to V2.2
         8638: Used correct exception type for IO channel reopen logic
      9102: Unit test to check that transactional cache size overrun is handled
      9106: Merged V2.1 to V2.2
         9043: Fixed AR-2291: SchemaBootstrap lock is only required before first SQL execution
         9045: Fix AR-2291: SchemaBootstrap lock is only required before first SQL execution
         9047: Fixed AR-2305: Index tracking in AUTO mode doesn't report anything on bootstrap
         9048: Fixed AR-2300: Random-based GUID instead of time-based GUIDs
         9049: Fix patches to only run once
         9050 <Defered>: Changed getString() method to use the available buffer length rather than a hard coded value.
         9060: Fixed ETWOONE-109 and ETWOONE-128: RetryingTransactionHelper fixes and improvements
         9061: Fixed NodeRefPropertyMethodInterceptorTest
         9075 <Defered>: Added delete permission check when marking a file for delete on close. ETWOONE-141/ACT-2416.
         9080: Fixed EHCache source zip
         9081: Fixed ETWOONE-118: Tomcat failed bootstrap doesn't clean up EHCache cluster structures
         9085: Fixed ETWOONE-154: Added JSR107 Jar to WAR
      9115: Fixed test: TransactionalCache uses LRU so repeatedly checking if a entry is there keeps it in the cache.
   9206: Merged V2.2 to V2.9
      8857: Improvements to ACL performance for large ACLs
      8951: Always check permission entry changes are made at position 0
   9219 <No change>: Made NTLMLogonDetails class Serializable, port of r8973.
   9220: Added delete permission check when marking a file for delete on close. Port of r9075.
   9222: Merged V2.1 to V2.9
      8683: Early warning for nodes indexed in the wrong store (ACT-964)
      8684: Enhanced tests
      8685: Enhanced tests
      8686: Additional tests
   9223: Merged V2.2 to V2.9
      9120: Merged V2.1 to V2.2
         8740: Fix for AR-2173 - do no recheck case of the user name when validating tickets (it has been done)
      9122: Additional unit test from support case.
   9224: Merged V2.2 to V2.9
      9076: Fixed ETWOTWO-426: Upgrading alfresco from 2.1.1 to 2.2 throws errors with Mysql 5.0.51
      9104: Merged V2.1 to V2.2
         9025: Fixed AR-2314, AR-2299: Optimizations after profiling
      9105: Merged V2.1 to V2.2
         8745: Fix AR-2233 (regression introduced by fix for AR-2221)
      9121: Merged V2.1 to V2.2
         9017: Fix index back up failing due to background index merge/deletions (inlcudes back port of CHK-2588)
      9137: Incorporated additions from Will into AVM console (ETWOTWO-439)
   9225: Merged V2.1 to V2.9
      8641: Merged V2.1-A to V2.1
         7729: Fix to Repository Web Service (queryAssociated) to allow reverse association lookup (ie. given target, get the source)
      8673: Fix for AR-2098 - shorter URL form now has NTLM filter mapping example in web.xml
      8682: Fix for AR-2005
      8695: AR-2054.
      8696: Improved sort test to include prefix form of field name
   9226: Fix ALFCOM-994 (see also earlier change in r9223)


git-svn-id: https://svn.alfresco.com/repos/alfresco-enterprise/alfresco/HEAD/root@9233 c4b6b30b-aa2e-2d43-bbcb-ca4b014f7261
2008-05-22 12:40:46 +00:00

390 lines
15 KiB
Java

/*
* Copyright (C) 2005-2007 Alfresco Software Limited.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
* As a special exception to the terms and conditions of version 2.0 of
* the GPL, you may redistribute this Program in connection with Free/Libre
* and Open Source Software ("FLOSS") applications as described in Alfresco's
* FLOSS exception. You should have recieved a copy of the text describing
* the FLOSS exception, and it is also available here:
* http://www.alfresco.com/legal/licensing"
*/
package org.alfresco.repo.node.index;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Date;
import java.util.List;
import org.alfresco.i18n.I18NUtil;
import org.alfresco.model.ContentModel;
import org.alfresco.repo.domain.Transaction;
import org.alfresco.repo.node.index.IndexTransactionTracker.IndexTransactionTrackerListener;
import org.alfresco.repo.transaction.RetryingTransactionHelper.RetryingTransactionCallback;
import org.alfresco.service.cmr.repository.ChildAssociationRef;
import org.alfresco.service.cmr.repository.NodeRef;
import org.alfresco.service.cmr.repository.NodeRef.Status;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
/**
* Component to check and recover the indexes. By default, the server is
* put into read-only mode during the reindex process in order to prevent metadata changes.
* This is not critical and can be {@link #setLockServer(boolean) switched off} if the
* server is required immediately.
* <p>
*
* @see RecoveryMode
*
* @author Derek Hulley
*/
public class FullIndexRecoveryComponent extends AbstractReindexComponent
{
private static final String ERR_INDEX_OUT_OF_DATE = "index.recovery.out_of_date";
private static final String MSG_TRACKING_STARTING = "index.tracking.starting";
private static final String MSG_TRACKING_COMPLETE = "index.tracking.complete";
private static final String MSG_TRACKING_PROGRESS = "index.tracking.progress";
private static final String MSG_RECOVERY_STARTING = "index.recovery.starting";
private static final String MSG_RECOVERY_COMPLETE = "index.recovery.complete";
private static final String MSG_RECOVERY_PROGRESS = "index.recovery.progress";
private static final String MSG_RECOVERY_TERMINATED = "index.recovery.terminated";
private static final String MSG_RECOVERY_ERROR = "index.recovery.error";
private static Log logger = LogFactory.getLog(FullIndexRecoveryComponent.class);
public static enum RecoveryMode
{
/** Do nothing - not even a check. */
NONE,
/**
* Perform a quick check on the state of the indexes only. This only checks that the
* first N transactions are present in the index and doesn't guarantee that the indexes
* are wholely consistent. Normally, the indexes are consistent up to a certain time.
* The system does a precautionary index top-up by default, so the last transactions are
* not validated.
*/
VALIDATE,
/**
* Performs a validation and starts a recovery if necessary. In this mode, if start
* transactions are missing then FULL mode is enabled. If end transactions are missing
* then the indexes will be "topped up" to bring them up to date.
*/
AUTO,
/**
* Performs a full pass-through of all recorded transactions to ensure that the indexes
* are up to date.
*/
FULL;
}
private RecoveryMode recoveryMode;
private boolean lockServer;
private IndexTransactionTracker indexTracker;
private boolean stopOnError;
/**
* <ul>
* <li><b>recoveryMode: </b>VALIDATE</li>
* <li><b>stopOnError:</b> true</li>
* </ul>
*
*/
public FullIndexRecoveryComponent()
{
recoveryMode = RecoveryMode.VALIDATE;
}
/**
* Set the type of recovery to perform. Default is {@link RecoveryMode#VALIDATE to validate}
* the indexes only.
*
* @param recoveryMode one of the {@link RecoveryMode } values
*/
public void setRecoveryMode(String recoveryMode)
{
this.recoveryMode = RecoveryMode.valueOf(recoveryMode);
}
/**
* Set this on to put the server into READ-ONLY mode for the duration of the index recovery.
* The default is <tt>true</tt>, i.e. the server will be locked against further updates.
*
* @param lockServer true to force the server to be read-only
*/
public void setLockServer(boolean lockServer)
{
this.lockServer = lockServer;
}
/**
* Set the tracker that will be used for AUTO mode.
*
* @param indexTracker an index tracker component
*/
public void setIndexTracker(IndexTransactionTracker indexTracker)
{
this.indexTracker = indexTracker;
}
/**
* Set whether a full rebuild should stop in the event of encoutering an error. The default is
* to stop reindexing, and this will lead to the server startup failing when index recovery mode
* is <b>FULL</b>. Sometimes, it is necessary to start the server up regardless of any errors
* with particular nodes.
*
* @param stopOnError <tt>true</tt> to stop reindexing when an error is encountered.
*/
public void setStopOnError(boolean stopOnError)
{
this.stopOnError = stopOnError;
}
@Override
protected void reindexImpl()
{
if (logger.isDebugEnabled())
{
logger.debug("Performing index recovery for type: " + recoveryMode);
}
// Ignore when NONE
if (recoveryMode == RecoveryMode.NONE)
{
return;
}
// put the server into read-only mode for the duration
boolean allowWrite = !transactionService.isReadOnly();
try
{
if (lockServer)
{
// set the server into read-only mode
transactionService.setAllowWrite(false);
}
// Check that the first and last meaningful transactions are indexed
List<Transaction> startTxns = nodeDaoService.getTxnsByCommitTimeAscending(
Long.MIN_VALUE, Long.MAX_VALUE, 10, null);
boolean startAllPresent = areTxnsInIndex(startTxns);
List<Transaction> endTxns = nodeDaoService.getTxnsByCommitTimeDescending(
Long.MIN_VALUE, Long.MAX_VALUE, 10, null);
boolean endAllPresent = areTxnsInIndex(endTxns);
// check the level of cover required
switch (recoveryMode)
{
case AUTO:
if (!startAllPresent)
{
// Initial transactions are missing - rebuild
performFullRecovery();
}
else if (!endAllPresent)
{
performPartialRecovery();
}
break;
case VALIDATE:
// Check
if (!startAllPresent || !endAllPresent)
{
// Index is out of date
logger.warn(I18NUtil.getMessage(ERR_INDEX_OUT_OF_DATE));
}
break;
case FULL:
performFullRecovery();
break;
}
}
finally
{
// restore read-only state
transactionService.setAllowWrite(allowWrite);
}
}
private void performPartialRecovery()
{
// Log the AUTO recovery
IndexTransactionTrackerListener trackerListener = new IndexTransactionTrackerListener()
{
long lastLogged = 0L;
public void indexedTransactions(long fromTimeInclusive, long toTimeExclusive)
{
long now = System.currentTimeMillis();
if (now - lastLogged < 10000L)
{
// Don't log more than once a minute
return;
}
lastLogged = now;
// Log it
Date toTimeDate = new Date(toTimeExclusive);
String msgAutoProgress = I18NUtil.getMessage(MSG_TRACKING_PROGRESS, toTimeDate.toString());
logger.info(msgAutoProgress);
}
};
try
{
// Register the listener
indexTracker.setListener(trackerListener);
// Trigger the tracker, which will top up the indexes
logger.info(I18NUtil.getMessage(MSG_TRACKING_STARTING));
indexTracker.reindex();
logger.info(I18NUtil.getMessage(MSG_TRACKING_COMPLETE));
}
finally
{
// Remove the listener
indexTracker.setListener(null);
}
}
private static final int MAX_TRANSACTIONS_PER_ITERATION = 1000;
private void performFullRecovery()
{
int txnCount = nodeDaoService.getTransactionCount();
// starting
String msgStart = I18NUtil.getMessage(MSG_RECOVERY_STARTING, txnCount);
logger.info(msgStart);
// count the transactions
int processedCount = 0;
long fromTimeInclusive = Long.MIN_VALUE;
long toTimeExclusive = Long.MAX_VALUE;
List<Long> lastTxnIds = Collections.<Long>emptyList();
while(true)
{
List<Transaction> nextTxns = nodeDaoService.getTxnsByCommitTimeAscending(
fromTimeInclusive,
toTimeExclusive,
MAX_TRANSACTIONS_PER_ITERATION,
lastTxnIds);
lastTxnIds = new ArrayList<Long>(nextTxns.size());
// reindex each transaction
for (Transaction txn : nextTxns)
{
Long txnId = txn.getId();
// Keep it to ensure we exclude it from the next iteration
lastTxnIds.add(txnId);
// check if we have to terminate
if (isShuttingDown())
{
String msgTerminated = I18NUtil.getMessage(MSG_RECOVERY_TERMINATED);
logger.warn(msgTerminated);
return;
}
// Allow exception to bubble out or not
if (stopOnError)
{
reindexTransaction(txnId);
}
else
{
try
{
reindexTransaction(txnId);
}
catch (Throwable e)
{
String msgError = I18NUtil.getMessage(MSG_RECOVERY_ERROR, txnId, e.getMessage());
logger.info(msgError, e);
}
}
// Although we use the same time as this transaction for the next iteration, we also
// make use of the exclusion list to ensure that it doesn't get pulled back again.
fromTimeInclusive = txn.getCommitTimeMs();
// dump a progress report every 10% of the way
double before = (double) processedCount / (double) txnCount * 10.0; // 0 - 10
processedCount++;
double after = (double) processedCount / (double) txnCount * 10.0; // 0 - 10
if (Math.floor(before) < Math.floor(after)) // crossed a 0 - 10 integer boundary
{
int complete = ((int)Math.floor(after))*10;
String msgProgress = I18NUtil.getMessage(MSG_RECOVERY_PROGRESS, complete);
logger.info(msgProgress);
}
}
// have we finished?
if (nextTxns.size() == 0)
{
// there are no more
break;
}
}
// done
String msgDone = I18NUtil.getMessage(MSG_RECOVERY_COMPLETE);
logger.info(msgDone);
}
/**
* Perform a full reindexing of the given transaction in the context of a completely
* new transaction.
*
* @param txnId the transaction identifier
*/
public void reindexTransaction(final long txnId)
{
if (logger.isDebugEnabled())
{
logger.debug("Reindexing transaction: " + txnId);
}
RetryingTransactionCallback<Object> reindexWork = new RetryingTransactionCallback<Object>()
{
public Object execute() throws Exception
{
// get the node references pertinent to the transaction
List<NodeRef> nodeRefs = nodeDaoService.getTxnChanges(txnId);
// reindex each node
for (NodeRef nodeRef : nodeRefs)
{
Status nodeStatus = nodeService.getNodeStatus(nodeRef);
if (nodeStatus == null)
{
// it's not there any more
continue;
}
if (nodeStatus.isDeleted()) // node deleted
{
// only the child node ref is relevant
ChildAssociationRef assocRef = new ChildAssociationRef(
ContentModel.ASSOC_CHILDREN,
null,
null,
nodeRef);
indexer.deleteNode(assocRef);
}
else // node created
{
// reindex
indexer.updateNode(nodeRef);
}
}
// done
return null;
}
};
transactionService.getRetryingTransactionHelper().doInTransaction(reindexWork, true, true);
// done
}
}