From 38e82ebe68e4d59b56a70712f09855ba0b3b87e7 Mon Sep 17 00:00:00 2001 From: Kevin Roast Date: Tue, 5 Jul 2011 16:03:56 +0000 Subject: [PATCH] SE.S62 Share - DM Remote Store migration patch - WIP git-svn-id: https://svn.alfresco.com/repos/alfresco-enterprise/alfresco/HEAD/root@28814 c4b6b30b-aa2e-2d43-bbcb-ca4b014f7261 --- .../patch/impl/AVMToADMRemoteStorePatch.java | 545 ++++++++++++++++++ 1 file changed, 545 insertions(+) create mode 100644 source/java/org/alfresco/repo/admin/patch/impl/AVMToADMRemoteStorePatch.java diff --git a/source/java/org/alfresco/repo/admin/patch/impl/AVMToADMRemoteStorePatch.java b/source/java/org/alfresco/repo/admin/patch/impl/AVMToADMRemoteStorePatch.java new file mode 100644 index 0000000000..537530b89b --- /dev/null +++ b/source/java/org/alfresco/repo/admin/patch/impl/AVMToADMRemoteStorePatch.java @@ -0,0 +1,545 @@ +/* + * Copyright (C) 2005-2011 Alfresco Software Limited. + * + * This file is part of Alfresco + * + * Alfresco is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Alfresco is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with Alfresco. If not, see . + */ +package org.alfresco.repo.admin.patch.impl; + +import java.io.IOException; +import java.io.Serializable; +import java.util.ArrayList; +import java.util.Collection; +import java.util.HashMap; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.SortedMap; +import java.util.StringTokenizer; +import java.util.TreeMap; +import java.util.concurrent.ConcurrentHashMap; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import org.alfresco.model.ContentModel; +import org.alfresco.repo.admin.patch.AbstractPatch; +import org.alfresco.repo.batch.BatchProcessWorkProvider; +import org.alfresco.repo.batch.BatchProcessor; +import org.alfresco.repo.batch.BatchProcessor.BatchProcessWorker; +import org.alfresco.repo.security.authentication.AuthenticationUtil; +import org.alfresco.repo.security.authentication.AuthenticationUtil.RunAsWork; +import org.alfresco.repo.transaction.AlfrescoTransactionSupport; +import org.alfresco.repo.transaction.RetryingTransactionHelper.RetryingTransactionCallback; +import org.alfresco.service.cmr.avm.AVMNodeDescriptor; +import org.alfresco.service.cmr.avm.AVMService; +import org.alfresco.service.cmr.model.FileExistsException; +import org.alfresco.service.cmr.model.FileFolderService; +import org.alfresco.service.cmr.model.FileFolderUtil; +import org.alfresco.service.cmr.model.FileInfo; +import org.alfresco.service.cmr.repository.ChildAssociationRef; +import org.alfresco.service.cmr.repository.ContentService; +import org.alfresco.service.cmr.repository.ContentWriter; +import org.alfresco.service.cmr.repository.NodeRef; +import org.alfresco.service.cmr.site.SiteInfo; +import org.alfresco.service.cmr.site.SiteService; +import org.alfresco.service.namespace.NamespaceService; +import org.alfresco.service.namespace.QName; +import org.alfresco.util.Pair; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.springframework.extensions.surf.util.I18NUtil; +import org.springframework.extensions.surf.util.URLDecoder; + +/** + * Patch to migrate the AVM 'sitestore' Remote Store content to the new ADM + * location for surf-configuration under the Sites folder in 4.0. + * + * @see org.alfresco.repo.web.scripts.bean.ADMRemoteStore + * @author Kevin Roast + * @since 4.0 + */ +public class AVMToADMRemoteStorePatch extends AbstractPatch +{ + private static final Log logger = LogFactory.getLog(AVMToADMRemoteStorePatch.class); + + private static final String MSG_MIGRATION_COMPLETE = "patch.avmToAdmRemoteStore.complete"; + private static final String SITE_CACHE_ID = "_SITE_CACHE"; + + // patterns used to match site and user specific configuration locations + // @see org.alfresco.repo.web.scripts.bean.ADMRemoteStore + private static final Pattern USER_PATTERN_1 = Pattern.compile(".*/components/.*\\.user~(.*)~.*"); + private static final Pattern USER_PATTERN_2 = Pattern.compile(".*/pages/user/(.*?)(/.*)?$"); + private static final Pattern SITE_PATTERN_1 = Pattern.compile(".*/components/.*\\.site~(.*)~.*"); + private static final Pattern SITE_PATTERN_2 = Pattern.compile(".*/pages/site/(.*?)(/.*)?$"); + // name of the surf config folder + private static final String SURF_CONFIG = "surf-config"; + + private static final int BATCH_THREADS = 8; + private static final int BATCH_SIZE = 100; + + private Map> siteReferenceCache = null; + private SortedMap paths; + private SortedMap retryPaths; + private NodeRef surfConfigRef = null; + private ThreadLocal> lastFolderCache = new ThreadLocal>() + { + protected Pair initialValue() + { + return new Pair("", null); + }; + }; + + private ContentService contentService; + private FileFolderService fileFolderService; + private SiteService siteService; + private AVMService avmService; + private String avmStore; + private String avmRootPath = "/"; + + + /** + * @param contentService the ContentService to set + */ + public void setContentService(ContentService contentService) + { + this.contentService = contentService; + } + + /** + * @param fileFolderService the FileFolderService to set + */ + public void setFileFolderService(FileFolderService fileFolderService) + { + this.fileFolderService = fileFolderService; + } + + /** + * @param siteService the SiteService to set + */ + public void setSiteService(SiteService siteService) + { + this.siteService = siteService; + } + + /** + * @param avmService the avmService to set + */ + public void setAvmService(AVMService avmService) + { + this.avmService = avmService; + } + + /** + * @param avmStore the avmStore to set + */ + public void setAvmStore(String avmStore) + { + this.avmStore = avmStore; + } + + /** + * @param avmRootPath the avmRootPath to set + */ + public void setAvmRootPath(String avmRootPath) + { + if (avmRootPath != null && avmRootPath.length() != 0) + { + this.avmRootPath = avmRootPath; + } + } + + @Override + protected void checkProperties() + { + super.checkProperties(); + checkPropertyNotNull(avmService, "avmService"); + checkPropertyNotNull(avmStore, "avmStore"); + } + + /* (non-Javadoc) + * @see org.alfresco.repo.admin.patch.AbstractPatch#applyInternal() + */ + @Override + protected String applyInternal() throws Exception + { + this.retryPaths = new TreeMap(); + + // firstly retrieve all AVM paths and descriptors that we need to process + // execute in a single transaction to retrieve the stateless object list + RetryingTransactionCallback work = new RetryingTransactionCallback() + { + public Void execute() throws Exception + { + long start = System.currentTimeMillis(); + paths = retrieveAVMPaths(); + logger.info("Retrieved: " + paths.size() + " AVM paths in " + (System.currentTimeMillis()-start) + "ms"); + + // also calculate the surf-config reference under the Sites folder while in the txn + surfConfigRef = getSurfConfigNodeRef(siteService.getSiteRoot()); + + // pre-create folders that may cause contention later during multi-threaded batch processing + List folderPath = new ArrayList(); + folderPath.add("components"); + FileFolderUtil.makeFolders(fileFolderService, surfConfigRef, folderPath, ContentModel.TYPE_FOLDER); + folderPath.clear(); + folderPath.add("pages"); + folderPath.add("user"); + FileFolderUtil.makeFolders(fileFolderService, surfConfigRef, folderPath, ContentModel.TYPE_FOLDER); + + return null; + } + }; + this.transactionHelper.doInTransaction(work, false, true); + + try + { + // init our cache + this.siteReferenceCache = new ConcurrentHashMap>(16384); + + // TODO: just retrieve a List of AVM NodeDescriptor objects - sort Collection based on Path? + // retrieve AVM NodeDescriptor objects for the paths + final Iterator pathItr = this.paths.keySet().iterator(); + BatchProcessWorkProvider workProvider = new BatchProcessWorkProvider() + { + @Override + public synchronized Collection getNextWork() + { + int batchCount = 0; + + List nodes = new ArrayList(BATCH_SIZE); + while (pathItr.hasNext() && batchCount++ != BATCH_SIZE) + { + nodes.add(paths.get(pathItr.next())); + } + return nodes; + } + + @Override + public synchronized int getTotalEstimatedWorkSize() + { + return paths.size(); + } + }; + + // prepare the batch processor and worker object + BatchProcessor batchProcessor = new BatchProcessor( + "AVMToADMRemoteStorePatch", + this.transactionHelper, + workProvider, + BATCH_THREADS, + BATCH_SIZE, + this.applicationEventPublisher, + logger, + BATCH_SIZE * 10); + + String systemUser = AuthenticationUtil.getSystemUserName(); + final String tenantSystemUser = this.tenantAdminService.getDomainUser( + systemUser, this.tenantAdminService.getCurrentUserDomain()); + BatchProcessWorker worker = new BatchProcessWorker() + { + @Override + public void beforeProcess() throws Throwable + { + AuthenticationUtil.setRunAsUser(tenantSystemUser); + } + + @Override + public void afterProcess() throws Throwable + { + AuthenticationUtil.clearCurrentSecurityContext(); + } + + @Override + public String getIdentifier(AVMNodeDescriptor entry) + { + return entry.getPath(); + } + + @Override + public void process(AVMNodeDescriptor entry) throws Throwable + { + migrateNode(entry); + } + }; + + long start = System.currentTimeMillis(); + batchProcessor.process(worker, true); + + // retry the paths that were blocked due to multiple threads attemping to create + // the same folder at the same time - these are dealt with now in a single thread! + if (this.retryPaths.size() != 0) + { + logger.info("Retrying " + this.retryPaths.size() + " paths..."); + work = new RetryingTransactionCallback() + { + public Void execute() throws Exception + { + for (String path : retryPaths.keySet()) + { + migrateNode(retryPaths.get(path)); + } + return null; + } + }; + this.transactionHelper.doInTransaction(work, false, true); + } + + logger.info("Migrated: " + this.paths.size() + " AVM nodes to DM in " + (System.currentTimeMillis()-start) + "ms"); + } + finally + { + // dispose of our cache + this.siteReferenceCache = null; + } + + return I18NUtil.getMessage(MSG_MIGRATION_COMPLETE); + } + + /** + * Migrate a single AVM node. Match, convert and copy the AVM surf config path to + * the new ADM surf-config folder location, creating appropriate sub-folders and + * finally copying the content from the AVM to the DM. + * + * @param avmNode AVMNodeDescriptor + */ + private void migrateNode(final AVMNodeDescriptor avmNode) + { + String path = avmNode.getPath(); + + final boolean debug = logger.isDebugEnabled(); + // what type of path is this? + int index = path.indexOf(this.avmRootPath); + if (index != -1) + { + // crop path removing the early paths we are not interested in + path = path.substring(index + this.avmRootPath.length()); + if (debug) logger.debug("...processing path: " + path); + + // break down the path into its component elements to generate the parent folders + List pathElements = new ArrayList(4); + final StringTokenizer t = new StringTokenizer(path, "/"); + // the remainining path is of the form /[/]/.xml + while (t.hasMoreTokens()) + { + pathElements.add(t.nextToken()); + } + + // match path against generic, user and site + String userId = null; + String siteName = null; + Matcher matcher; + if ((matcher = USER_PATTERN_1.matcher(path)).matches()) + { + userId = URLDecoder.decode(matcher.group(1)); + } + else if ((matcher = USER_PATTERN_2.matcher(path)).matches()) + { + userId = URLDecoder.decode(matcher.group(1)); + } + else if ((matcher = SITE_PATTERN_1.matcher(path)).matches()) + { + siteName = matcher.group(1); + } + else if ((matcher = SITE_PATTERN_2.matcher(path)).matches()) + { + siteName = matcher.group(1); + } + + NodeRef surfConfigRef; + if (siteName != null) + { + if (debug) logger.debug("...resolved site id: " + siteName); + NodeRef siteRef = null; + String key = AlfrescoTransactionSupport.getTransactionId() + siteName; + Pair refCache = siteReferenceCache.get(key); + if (refCache == null) + { + refCache = new Pair(null, null); + siteReferenceCache.put(key, refCache); + } + siteRef = refCache.getFirst(); + if (siteRef == null) + { + siteRef = getSiteNodeRef(siteName); + refCache.setFirst(siteRef); + } + if (siteRef != null) + { + surfConfigRef = refCache.getSecond(); + if (surfConfigRef == null) + { + surfConfigRef = getSurfConfigNodeRef(siteRef); + refCache.setSecond(surfConfigRef); + } + } + else + { + logger.info("WARNING: unable to migrate path as site id cannot be found: " + siteName); + return; + } + } + else if (userId != null) + { + if (debug) logger.debug("...resolved user id: " + userId); + surfConfigRef = this.surfConfigRef; + } + else + { + if (debug) logger.debug("...resolved generic path."); + surfConfigRef = this.surfConfigRef; + } + + // ensure folders exist down to the specified parent + NodeRef parentFolder = null; + Pair lastFolderCache = this.lastFolderCache.get(); + String folderKey = (siteName != null) ? siteName + path : path; + if (folderKey.equals(lastFolderCache.getFirst())) + { + // found match to last used folder NodeRef + if (debug) logger.debug("...cache hit - matched last folder reference."); + parentFolder = lastFolderCache.getSecond(); + } + if (parentFolder == null) + { + List folderPath = pathElements.subList(0, pathElements.size() - 1); + try + { + parentFolder = FileFolderUtil.makeFolders( + this.fileFolderService, + surfConfigRef, + folderPath, + ContentModel.TYPE_FOLDER).getNodeRef(); + } + catch (FileExistsException fe) + { + // this occurs if a different thread running a separate txn has created a folder + // that we expected to exist - save a reference to this path to retry it again later + logger.warn("Unable to create folder: " + fe.getName() + " for path: " + avmNode.getPath() + + " - as another txn is busy, will retry later."); + retryPaths.put(avmNode.getPath(), avmNode); + return; + } + // save in last folder cache + lastFolderCache.setFirst(folderKey); + lastFolderCache.setSecond(parentFolder); + } + + if (userId != null) + { + // run as the appropriate user id to execute + final NodeRef parentFolderRef = parentFolder; + AuthenticationUtil.runAs(new RunAsWork() + { + public Void doWork() throws Exception + { + // create new node and perform writer content copy of the content from the AVM to the DM store + FileInfo fileInfo = fileFolderService.create( + parentFolderRef, avmNode.getName(), ContentModel.TYPE_CONTENT); + ContentWriter writer = contentService.getWriter( + fileInfo.getNodeRef(), ContentModel.PROP_CONTENT, true); + writer.putContent(avmService.getContentReader(-1, avmNode.getPath())); + return null; + } + }, userId); + } + else + { + // create new node and perform writer content copy of the content from the AVM to the DM store + FileInfo fileInfo = fileFolderService.create( + parentFolder, avmNode.getName(), ContentModel.TYPE_CONTENT); + ContentWriter writer = contentService.getWriter( + fileInfo.getNodeRef(), ContentModel.PROP_CONTENT, true); + writer.putContent(avmService.getContentReader(-1, avmNode.getPath())); + } + } + } + + /** + * @param shortName Site shortname + * + * @return the given Site folder node reference + */ + private NodeRef getSiteNodeRef(String shortName) + { + SiteInfo siteInfo = this.siteService.getSite(shortName); + return siteInfo != null ? siteInfo.getNodeRef() : null; + } + + /** + * Return the "surf-config" noderef under the given root. Create the folder if it + * does not exist yet. + * + * @param rootRef Parent node reference where the "surf-config" folder should be + * + * @return surf-config folder ref + */ + private NodeRef getSurfConfigNodeRef(final NodeRef rootRef) + { + NodeRef surfConfigRef = this.nodeService.getChildByName( + rootRef, ContentModel.ASSOC_CONTAINS, SURF_CONFIG); + if (surfConfigRef == null) + { + if (logger.isDebugEnabled()) + logger.debug("'surf-config' folder not found under current path, creating..."); + QName assocQName = QName.createQName(NamespaceService.CONTENT_MODEL_1_0_URI, SURF_CONFIG); + Map properties = new HashMap(1, 1.0f); + properties.put(ContentModel.PROP_NAME, (Serializable) SURF_CONFIG); + ChildAssociationRef ref = this.nodeService.createNode( + rootRef, ContentModel.ASSOC_CONTAINS, assocQName, ContentModel.TYPE_FOLDER, properties); + surfConfigRef = ref.getChildRef(); + } + return surfConfigRef; + } + + /** + * @return the AVM paths for surf config object in the AVM sitestore + */ + private SortedMap retrieveAVMPaths() throws Exception + { + logger.info("Retrieving paths from AVM store: " + this.avmStore + ":" + this.avmRootPath); + + SortedMap paths = new TreeMap(); + + String avmPath = this.avmStore + ":" + this.avmRootPath; + AVMNodeDescriptor node = this.avmService.lookup(-1, avmPath); + if (node != null) + { + traverseNode(paths, node); + } + + logger.info("Found: " + paths.size() + " AVM files nodes to migrate"); + + return paths; + } + + private void traverseNode(final SortedMap paths, final AVMNodeDescriptor node) + throws IOException + { + final boolean debug = logger.isDebugEnabled(); + final SortedMap listing = this.avmService.getDirectoryListing(node); + for (final AVMNodeDescriptor n : listing.values()) + { + if (n.isFile()) + { + if (debug) logger.debug("...adding path: " + n.getPath()); + paths.put(n.getPath(), n); + } + else if (n.isDirectory()) + { + traverseNode(paths, n); + } + } + } +}