Content URLs are now generated with an extra HOUR folder to handle high volume input in one day better

Added cleanup job for content stores
 - content is moved into (alf_data)/contentstore.deleted and mirrors the live content store
 - We'll make a call about disabling the trigger for the job, but currently it will fire at 4am


git-svn-id: https://svn.alfresco.com/repos/alfresco-enterprise/alfresco/HEAD/root@2422 c4b6b30b-aa2e-2d43-bbcb-ca4b014f7261
This commit is contained in:
Derek Hulley
2006-02-16 20:01:57 +00:00
parent 530b2b9026
commit 440fa299b4
15 changed files with 621 additions and 252 deletions

View File

@@ -0,0 +1,218 @@
/*
* Copyright (C) 2005 Alfresco, Inc.
*
* Licensed under the Mozilla Public License version 1.1
* with a permitted attribution clause. You may obtain a
* copy of the License at
*
* http://www.alfresco.org/legal/license.txt
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
* either express or implied. See the License for the specific
* language governing permissions and limitations under the
* License.
*/
package org.alfresco.repo.content.cleanup;
import java.util.ArrayList;
import java.util.Date;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import org.alfresco.error.AlfrescoRuntimeException;
import org.alfresco.repo.content.ContentStore;
import org.alfresco.repo.node.db.NodeDaoService;
import org.alfresco.repo.transaction.TransactionUtil;
import org.alfresco.repo.transaction.TransactionUtil.TransactionWork;
import org.alfresco.service.cmr.dictionary.DictionaryService;
import org.alfresco.service.cmr.repository.ContentData;
import org.alfresco.service.cmr.repository.ContentReader;
import org.alfresco.service.transaction.TransactionService;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
/**
* This component is responsible for finding orphaned content in a given
* content store or stores. Deletion handlers can be provided to ensure
* that the content is moved to another location prior to being removed
* from the store(s) being cleaned.
*
* @author Derek Hulley
*/
public class ContentStoreCleaner
{
private static Log logger = LogFactory.getLog(ContentStoreCleaner.class);
private DictionaryService dictionaryService;
private NodeDaoService nodeDaoService;
private TransactionService transactionService;
private List<ContentStore> stores;
private List<ContentStoreCleanerListener> listeners;
private int protectDays;
public ContentStoreCleaner()
{
this.stores = new ArrayList<ContentStore>(0);
this.listeners = new ArrayList<ContentStoreCleanerListener>(0);
this.protectDays = 7;
}
/**
* @param dictionaryService used to determine which properties are content properties
*/
public void setDictionaryService(DictionaryService dictionaryService)
{
this.dictionaryService = dictionaryService;
}
/**
* @param nodeDaoService used to get the property values
*/
public void setNodeDaoService(NodeDaoService nodeDaoService)
{
this.nodeDaoService = nodeDaoService;
}
/**
* @param transactionService the component to ensure proper transactional wrapping
*/
public void setTransactionService(TransactionService transactionService)
{
this.transactionService = transactionService;
}
/**
* @param stores the content stores to clean
*/
public void setStores(List<ContentStore> stores)
{
this.stores = stores;
}
/**
* @param listeners the listeners that can react to deletions
*/
public void setListeners(List<ContentStoreCleanerListener> listeners)
{
this.listeners = listeners;
}
/**
* Set the minimum number of days old that orphaned content must be
* before deletion is possible. The default is 7 days.
*
* @param protectDays minimum age (in days) of deleted content
*/
public void setProtectDays(int protectDays)
{
this.protectDays = protectDays;
}
/**
* Perform basic checks to ensure that the necessary dependencies were injected.
*/
private void checkProperties()
{
if (dictionaryService == null)
{
throw new AlfrescoRuntimeException("Property 'dictionaryService' not set");
}
if (nodeDaoService == null)
{
throw new AlfrescoRuntimeException("Property 'nodeDaoService' not set");
}
if (transactionService == null)
{
throw new AlfrescoRuntimeException("Property 'transactionService' not set");
}
if (stores == null || stores.size() == 0)
{
throw new AlfrescoRuntimeException("Property 'stores' not set");
}
if (listeners == null)
{
throw new AlfrescoRuntimeException("Property 'listeners' not set");
}
}
private Set<String> getValidUrls()
{
// wrap to make the request in a transaction
TransactionWork<List<String>> getUrlsWork = new TransactionWork<List<String>>()
{
public List<String> doWork() throws Exception
{
return nodeDaoService.getContentDataStrings();
};
};
// execute in READ-ONLY txn
List<String> contentDataStrings = TransactionUtil.executeInUserTransaction(
transactionService,
getUrlsWork,
true);
// get all valid URLs
Set<String> validUrls = new HashSet<String>(contentDataStrings.size());
// convert the strings to objects and extract the URL
for (String contentDataString : contentDataStrings)
{
ContentData contentData = ContentData.createContentProperty(contentDataString);
if (contentData.getContentUrl() != null)
{
// a URL was present
validUrls.add(contentData.getContentUrl());
}
}
// done
if (logger.isDebugEnabled())
{
logger.debug("Found " + validUrls.size() + " valid URLs in metadata");
}
return validUrls;
}
public void execute()
{
checkProperties();
Set<String> validUrls = getValidUrls();
// now clean each store in turn
for (ContentStore store : stores)
{
clean(validUrls, store);
}
}
private void clean(Set<String> validUrls, ContentStore store)
{
Date checkAllBeforeDate = new Date(System.currentTimeMillis() - (long) protectDays * 3600L * 1000L * 24L);
// get the store's URLs
Set<String> storeUrls = store.getUrls(null, checkAllBeforeDate);
// remove all URLs that occur in the validUrls
storeUrls.removeAll(validUrls);
// now clean the store
for (String url : storeUrls)
{
ContentReader sourceReader = store.getReader(url);
// announce this to the listeners
for (ContentStoreCleanerListener listener : listeners)
{
// get a fresh reader
ContentReader listenerReader = sourceReader.getReader();
// call it
listener.beforeDelete(listenerReader);
}
// delete it
store.delete(url);
if (logger.isDebugEnabled())
{
logger.debug("Removed URL from store: \n" +
" Store: " + store + "\n" +
" URL: " + url);
}
}
}
}

View File

@@ -0,0 +1,32 @@
/*
* Copyright (C) 2005 Alfresco, Inc.
*
* Licensed under the Mozilla Public License version 1.1
* with a permitted attribution clause. You may obtain a
* copy of the License at
*
* http://www.alfresco.org/legal/license.txt
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
* either express or implied. See the License for the specific
* language governing permissions and limitations under the
* License.
*/
package org.alfresco.repo.content.cleanup;
import org.alfresco.service.cmr.repository.ContentIOException;
import org.alfresco.service.cmr.repository.ContentReader;
/**
* A listener that can be plugged into a
* {@link org.alfresco.repo.content.cleanup.ContentStoreCleaner cleaner} to
* move soon-to-be-deleted content to a new location.
*
* @author Derek Hulley
*/
public interface ContentStoreCleanerListener
{
public void beforeDelete(ContentReader reader) throws ContentIOException;
}

View File

@@ -0,0 +1,115 @@
/*
* Copyright (C) 2005 Alfresco, Inc.
*
* Licensed under the Mozilla Public License version 1.1
* with a permitted attribution clause. You may obtain a
* copy of the License at
*
* http://www.alfresco.org/legal/license.txt
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
* either express or implied. See the License for the specific
* language governing permissions and limitations under the
* License.
*/
package org.alfresco.repo.content.cleanup;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import org.alfresco.repo.content.ContentStore;
import org.alfresco.repo.content.filestore.FileContentStore;
import org.alfresco.repo.node.db.NodeDaoService;
import org.alfresco.service.ServiceRegistry;
import org.alfresco.service.cmr.dictionary.DictionaryService;
import org.alfresco.service.cmr.repository.ContentIOException;
import org.alfresco.service.cmr.repository.ContentReader;
import org.alfresco.service.cmr.repository.ContentWriter;
import org.alfresco.service.transaction.TransactionService;
import org.alfresco.util.ApplicationContextHelper;
import org.alfresco.util.TempFileProvider;
import org.springframework.context.ApplicationContext;
import junit.framework.TestCase;
/**
* @see org.alfresco.repo.content.cleanup.ContentStoreCleaner
*
* @author Derek Hulley
*/
public class ContentStoreCleanerTest extends TestCase
{
private static ApplicationContext ctx = ApplicationContextHelper.getApplicationContext();
private ContentStoreCleaner cleaner;
private ContentStore store;
private ContentStoreCleanerListener listener;
private List<String> deletedUrls;
@Override
public void setUp() throws Exception
{
ServiceRegistry serviceRegistry = (ServiceRegistry) ctx.getBean("ServiceRegistry");
TransactionService transactionService = serviceRegistry.getTransactionService();
DictionaryService dictionaryService = serviceRegistry.getDictionaryService();
NodeDaoService nodeDaoService = (NodeDaoService) ctx.getBean("nodeDaoService");
// we need a store
store = new FileContentStore(TempFileProvider.getTempDir().getAbsolutePath());
// and a listener
listener = new DummyCleanerListener();
// initialise record of deleted URLs
deletedUrls = new ArrayList<String>(5);
// construct the test cleaner
cleaner = new ContentStoreCleaner();
cleaner.setTransactionService(transactionService);
cleaner.setDictionaryService(dictionaryService);
cleaner.setNodeDaoService(nodeDaoService);
cleaner.setStores(Collections.singletonList(store));
cleaner.setListeners(Collections.singletonList(listener));
}
public void testImmediateRemoval() throws Exception
{
cleaner.setProtectDays(0);
// add some content to the store
ContentWriter writer = store.getWriter(null, null);
writer.putContent("ABC");
String contentUrl = writer.getContentUrl();
// fire the cleaner
cleaner.execute();
// the content should have disappeared as it is not in the database
assertFalse("Unprotected content was not deleted", store.exists(contentUrl));
assertTrue("Content listener was not called with deletion", deletedUrls.contains(contentUrl));
}
public void testProtectedRemoval() throws Exception
{
cleaner.setProtectDays(1);
// add some content to the store
ContentWriter writer = store.getWriter(null, null);
writer.putContent("ABC");
String contentUrl = writer.getContentUrl();
// fire the cleaner
cleaner.execute();
// the content should have disappeared as it is not in the database
assertTrue("Protected content was deleted", store.exists(contentUrl));
assertFalse("Content listener was called with deletion of protected URL", deletedUrls.contains(contentUrl));
}
private class DummyCleanerListener implements ContentStoreCleanerListener
{
public void beforeDelete(ContentReader reader) throws ContentIOException
{
deletedUrls.add(reader.getContentUrl());
}
}
}

View File

@@ -0,0 +1,58 @@
/*
* Copyright (C) 2005 Alfresco, Inc.
*
* Licensed under the Mozilla Public License version 1.1
* with a permitted attribution clause. You may obtain a
* copy of the License at
*
* http://www.alfresco.org/legal/license.txt
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
* either express or implied. See the License for the specific
* language governing permissions and limitations under the
* License.
*/
package org.alfresco.repo.content.cleanup;
import org.alfresco.error.AlfrescoRuntimeException;
import org.quartz.Job;
import org.quartz.JobDataMap;
import org.quartz.JobExecutionContext;
import org.quartz.JobExecutionException;
/**
* Triggers the deletion of unused content using a
* {@link org.alfresco.repo.content.cleanup.ContentStoreCleaner}.
* <p>
* The following parameters are required:
* <ul>
* <li><b>contentStoreCleaner</b>: The content store cleaner bean</li>
* </ul>
*
* @author Derek Hulley
*/
public class ContentStoreCleanupJob implements Job
{
public ContentStoreCleanupJob()
{
}
/**
* Calls the cleaner to do its work
*/
public void execute(JobExecutionContext context) throws JobExecutionException
{
JobDataMap jobData = context.getJobDetail().getJobDataMap();
// extract the content cleaner to use
Object contentStoreCleanerObj = jobData.get("contentStoreCleaner");
if (contentStoreCleanerObj == null || !(contentStoreCleanerObj instanceof ContentStoreCleaner))
{
throw new AlfrescoRuntimeException(
"ContentStoreCleanupJob data must contain valid 'contentStoreCleaner' reference");
}
ContentStoreCleaner contentStoreCleaner = (ContentStoreCleaner) contentStoreCleanerObj;
contentStoreCleaner.execute();
}
}

View File

@@ -0,0 +1,66 @@
/*
* Copyright (C) 2005 Alfresco, Inc.
*
* Licensed under the Mozilla Public License version 1.1
* with a permitted attribution clause. You may obtain a
* copy of the License at
*
* http://www.alfresco.org/legal/license.txt
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
* either express or implied. See the License for the specific
* language governing permissions and limitations under the
* License.
*/
package org.alfresco.repo.content.cleanup;
import org.alfresco.repo.content.ContentStore;
import org.alfresco.service.cmr.repository.ContentIOException;
import org.alfresco.service.cmr.repository.ContentReader;
import org.alfresco.service.cmr.repository.ContentWriter;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
/**
* Listens for content that is about to be deleted and moves it into the store
* configured as the backup store.
*
* @author Derek Hulley
*/
public class DeletedContentBackupCleanerListener implements ContentStoreCleanerListener
{
private static Log logger = LogFactory.getLog(DeletedContentBackupCleanerListener.class);
private ContentStore store;
public DeletedContentBackupCleanerListener()
{
}
/**
* Set the store to copy soon-to-be-deleted content into
*
* @param store the deleted content backup store
*/
public void setStore(ContentStore store)
{
this.store = store;
}
public void beforeDelete(ContentReader reader) throws ContentIOException
{
// write the content into the target store
ContentWriter writer = store.getWriter(null, reader.getContentUrl());
// copy across
writer.putContent(reader);
// done
if (logger.isDebugEnabled())
{
logger.debug("Moved content before deletion: \n" +
" URL: " + reader.getContentUrl() + "\n" +
" Store: " + store);
}
}
}