Initial commit for ALF-10419:

- Bulk Filesystem Importer
- Adapted from work by Peter Monks (see http://code.google.com/p/alfresco-bulk-filesystem-import) and Romain Guinot
- Refactored to limit repeated code
- Refactored to use the Bulk Processor
- Currently limited to one bulk import at a time (enforced by lock service)
- Unit tests added
- Some fixes applied
- Performance tests; tried different filesystem "walking" strategies
- Still to do: yui dependencies - are these necessary?

git-svn-id: https://svn.alfresco.com/repos/alfresco-enterprise/alfresco/HEAD/root@31100 c4b6b30b-aa2e-2d43-bbcb-ca4b014f7261
This commit is contained in:
Steven Glover
2011-10-10 18:45:00 +00:00
parent 0866d3eb0b
commit 87967a9447
18 changed files with 1704 additions and 0 deletions

View File

@@ -0,0 +1,213 @@
/*
* Copyright (C) 2005-2011 Alfresco Software Limited.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
* As a special exception to the terms and conditions of version 2.0 of
* the GPL, you may redistribute this Program in connection with Free/Libre
* and Open Source Software ("FLOSS") applications as described in Alfresco's
* FLOSS exception. You should have received a copy of the text describing
* the FLOSS exception, and it is also available here:
* http://www.alfresco.com/legal/licensing"
*/
package org.alfresco.repo.web.scripts.bulkimport;
import java.net.InetAddress;
import java.net.UnknownHostException;
import java.util.Arrays;
import java.util.Date;
import org.alfresco.repo.model.Repository;
import org.alfresco.service.cmr.model.FileFolderService;
import org.alfresco.service.cmr.model.FileInfo;
import org.alfresco.service.cmr.model.FileNotFoundException;
import org.alfresco.service.cmr.repository.NodeRef;
import org.springframework.extensions.webscripts.DeclarativeWebScript;
import org.springframework.extensions.webscripts.WebScriptException;
/**
* contains common fields and methods for the import web scripts.
*/
public class AbstractBulkFileSystemImportWebScript extends DeclarativeWebScript
{
protected static final String WEB_SCRIPT_URI_BULK_FILESYSTEM_IMPORT_STATUS = "/bulk/import/filesystem/status";
protected static final String PARAMETER_TARGET_NODEREF = "targetNodeRef";
protected static final String PARAMETER_TARGET_PATH = "targetPath";
protected static final String COMPANY_HOME_NAME = "Company Home";
protected static final String COMPANY_HOME_PATH = "/" + COMPANY_HOME_NAME;
// Web scripts parameters (common)
protected static final String PARAMETER_REPLACE_EXISTING = "replaceExisting";
protected static final String PARAMETER_VALUE_REPLACE_EXISTING = "replaceExisting";
protected static final String PARAMETER_SOURCE_DIRECTORY = "sourceDirectory";
protected static final String IMPORT_ALREADY_IN_PROGRESS_MODEL_KEY = "importInProgress";
protected static final String IMPORT_ALREADY_IN_PROGRESS_ERROR_KEY ="bfsit.error.importAlreadyInProgress";
protected static final String PARAMETER_BATCH_SIZE = "batchSize";
protected static final String PARAMETER_NUM_THREADS = "numThreads";
protected FileFolderService fileFolderService;
protected Repository repository;
protected volatile boolean importInProgress;
protected NodeRef getTargetNodeRef(String targetNodeRefStr, String targetPath) throws FileNotFoundException
{
NodeRef targetNodeRef;
if (targetNodeRefStr == null || targetNodeRefStr.trim().length() == 0)
{
if (targetPath == null || targetPath.trim().length() == 0)
{
throw new WebScriptException("Error: neither parameter '" + PARAMETER_TARGET_NODEREF +
"' nor parameter '" + PARAMETER_TARGET_PATH +
"' was provided, but at least one is required !");
}
targetNodeRef = convertPathToNodeRef(targetPath.trim());
}
else
{
targetNodeRef = new NodeRef(targetNodeRefStr.trim());
}
return targetNodeRef;
}
protected NodeRef convertPathToNodeRef(String targetPath) throws FileNotFoundException
{
NodeRef result = null;
NodeRef companyHome = repository.getCompanyHome();
String cleanTargetPath = targetPath.replaceAll("/+", "/");
if (cleanTargetPath.startsWith(COMPANY_HOME_PATH))
cleanTargetPath = cleanTargetPath.substring(COMPANY_HOME_PATH.length());
if (cleanTargetPath.startsWith("/"))
cleanTargetPath = cleanTargetPath.substring(1);
if (cleanTargetPath.endsWith("/"))
cleanTargetPath = cleanTargetPath.substring(0, cleanTargetPath.length() - 1);
if (cleanTargetPath.length() == 0)
result = companyHome;
else
{
FileInfo info = fileFolderService.resolveNamePath(companyHome, Arrays.asList(cleanTargetPath.split("/")));
if(info == null)
throw new WebScriptException("could not determine NodeRef for path :'"+cleanTargetPath+"'");
result = info.getNodeRef();
}
return(result);
}
protected String buildTextMessage(Throwable t)
{
StringBuffer result = new StringBuffer();
String timeOfFailure = (new Date()).toString();
String hostName = null;
String ipAddress = null;
try
{
hostName = InetAddress.getLocalHost().getHostName();
ipAddress = InetAddress.getLocalHost().getHostAddress();
}
catch (UnknownHostException uhe)
{
hostName = "unknown";
ipAddress = "unknown";
}
result.append("\nTime of failure: " + timeOfFailure);
result.append("\nHost where failure occurred: " + hostName + " (" + ipAddress + ")");
if (t != null)
{
result.append("\nRoot exception:");
result.append(renderExceptionStackAsText(t));
}
else
{
result.append("\nNo exception was provided.");
}
return(result.toString());
}
private String renderExceptionStackAsText( Throwable t)
{
StringBuffer result = new StringBuffer();
if (t != null)
{
String message = t.getMessage();
Throwable cause = t.getCause();
if (cause != null)
{
result.append(renderExceptionStackAsText(cause));
result.append("\nWrapped by:");
}
if (message == null)
{
message = "";
}
result.append("\n");
result.append(t.getClass().getName());
result.append(": ");
result.append(message);
result.append("\n");
result.append(renderStackTraceElements(t.getStackTrace()));
}
return(result.toString());
}
private String renderStackTraceElements(StackTraceElement[] elements)
{
StringBuffer result = new StringBuffer();
if (elements != null)
{
for (int i = 0; i < elements.length; i++)
{
result.append("\tat " + elements[i].toString() + "\n");
}
}
return(result.toString());
}
// boilerplate setters
public void setFileFolderService(FileFolderService fileFolderService)
{
this.fileFolderService = fileFolderService;
}
public void setRepository(Repository repository)
{
this.repository = repository;
}
}

View File

@@ -0,0 +1,73 @@
/*
* Copyright (C) 2005-2011 Alfresco Software Limited.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
* As a special exception to the terms and conditions of version 2.0 of
* the GPL, you may redistribute this Program in connection with Free/Libre
* and Open Source Software ("FLOSS") applications as described in Alfresco's
* FLOSS exception. You should have received a copy of the text describing
* the FLOSS exception, and it is also available here:
* http://www.alfresco.com/legal/licensing"
*/
package org.alfresco.repo.web.scripts.bulkimport;
import java.util.HashMap;
import java.util.Map;
import org.alfresco.repo.bulkimport.BulkFilesystemImporter;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.springframework.extensions.webscripts.Cache;
import org.springframework.extensions.webscripts.DeclarativeWebScript;
import org.springframework.extensions.webscripts.Status;
import org.springframework.extensions.webscripts.WebScriptRequest;
/**
* Web Script class that provides status information on the bulk filesystem import process.
*
* @since 4.0
*/
public class BulkFilesystemImportStatusWebScript extends DeclarativeWebScript
{
private final static Log logger = LogFactory.getLog(BulkFilesystemImportStatusWebScript.class);
// Output parameters (for Freemarker)
private final static String RESULT_IMPORT_STATUS = "importStatus";
// Attributes
private BulkFilesystemImporter bulkImporter;
public void setBulkImporter(BulkFilesystemImporter bulkImporter)
{
this.bulkImporter = bulkImporter;
}
/**
* @see org.alfresco.web.scripts.DeclarativeWebScript#executeImpl(org.alfresco.web.scripts.WebScriptRequest, org.alfresco.web.scripts.Status, org.alfresco.web.scripts.Cache)
*/
@Override
protected Map<String, Object> executeImpl(WebScriptRequest request, Status status, Cache cache)
{
Map<String, Object> result = new HashMap<String, Object>();
cache.setNeverCache(true);
result.put(RESULT_IMPORT_STATUS, bulkImporter.getStatus());
return(result);
}
}

View File

@@ -0,0 +1,194 @@
/*
* Copyright (C) 2005-2011 Alfresco Software Limited.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
* As a special exception to the terms and conditions of version 2.0 of
* the GPL, you may redistribute this Program in connection with Free/Libre
* and Open Source Software ("FLOSS") applications as described in Alfresco's
* FLOSS exception. You should have received a copy of the text describing
* the FLOSS exception, and it is also available here:
* http://www.alfresco.com/legal/licensing"
*/
package org.alfresco.repo.web.scripts.bulkimport.copy;
import java.io.File;
import java.util.HashMap;
import java.util.Map;
import org.alfresco.repo.bulkimport.BulkImportParameters;
import org.alfresco.repo.bulkimport.NodeImporter;
import org.alfresco.repo.bulkimport.impl.MultiThreadedBulkFilesystemImporter;
import org.alfresco.repo.bulkimport.impl.StreamingNodeImporterFactory;
import org.alfresco.repo.web.scripts.bulkimport.AbstractBulkFileSystemImportWebScript;
import org.alfresco.service.cmr.model.FileNotFoundException;
import org.alfresco.service.cmr.repository.NodeRef;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.springframework.extensions.surf.util.I18NUtil;
import org.springframework.extensions.webscripts.Cache;
import org.springframework.extensions.webscripts.Status;
import org.springframework.extensions.webscripts.WebScriptException;
import org.springframework.extensions.webscripts.WebScriptRequest;
/**
* Web Script class that invokes a BulkFilesystemImporter implementation.
*
* @since 4.0
*/
public class BulkFilesystemImportWebScript extends AbstractBulkFileSystemImportWebScript
{
private final static Log logger = LogFactory.getLog(BulkFilesystemImportWebScript.class);
private MultiThreadedBulkFilesystemImporter bulkImporter;
private StreamingNodeImporterFactory nodeImporterFactory;
public void setBulkImporter(MultiThreadedBulkFilesystemImporter bulkImporter)
{
this.bulkImporter = bulkImporter;
}
public void setNodeImporterFactory(StreamingNodeImporterFactory nodeImporterFactory)
{
this.nodeImporterFactory = nodeImporterFactory;
}
/**
* @see org.springframework.extensions.webscripts.DeclarativeWebScript#executeImpl(org.springframework.extensions.webscripts.WebScriptRequest, org.springframework.extensions.webscripts.Status, org.springframework.extensions.webscripts.Cache)
*/
@Override
protected Map<String, Object> executeImpl(final WebScriptRequest request, final Status status, final Cache cache)
{
Map<String, Object> model = new HashMap<String, Object>();
String targetNodeRefStr = null;
String targetPath = null;
String sourceDirectoryStr = null;
String replaceExistingStr = null;
String batchSizeStr = null;
String numThreadsStr = null;
cache.setNeverCache(true);
try
{
if(!bulkImporter.getStatus().inProgress())
{
NodeRef targetNodeRef = null;
File sourceDirectory = null;
boolean replaceExisting = false;
int batchSize = bulkImporter.getDefaultBatchSize();
int numThreads = bulkImporter.getDefaultNumThreads();
// Retrieve, validate and convert parameters
targetNodeRefStr = request.getParameter(PARAMETER_TARGET_NODEREF);
targetPath = request.getParameter(PARAMETER_TARGET_PATH);
sourceDirectoryStr = request.getParameter(PARAMETER_SOURCE_DIRECTORY);
replaceExistingStr = request.getParameter(PARAMETER_REPLACE_EXISTING);
batchSizeStr = request.getParameter(PARAMETER_BATCH_SIZE);
numThreadsStr = request.getParameter(PARAMETER_NUM_THREADS);
targetNodeRef = getTargetNodeRef(targetNodeRefStr, targetPath);
if (sourceDirectoryStr == null || sourceDirectoryStr.trim().length() == 0)
{
throw new RuntimeException("Error: mandatory parameter '" + PARAMETER_SOURCE_DIRECTORY + "' was not provided.");
}
sourceDirectory = new File(sourceDirectoryStr.trim());
if (replaceExistingStr != null && replaceExistingStr.trim().length() > 0)
{
replaceExisting = PARAMETER_VALUE_REPLACE_EXISTING.equals(replaceExistingStr);
}
// Initiate the import
NodeImporter nodeImporter = nodeImporterFactory.getNodeImporter(sourceDirectory);
//bulkImporter.asyncBulkImport(targetNodeRef, nodeImporter, replaceExisting);
BulkImportParameters bulkImportParameters = new BulkImportParameters();
if (numThreadsStr != null && numThreadsStr.trim().length() > 0)
{
try
{
numThreads = Integer.parseInt(numThreadsStr);
if(numThreads < 1)
{
throw new RuntimeException("Error: parameter '" + PARAMETER_NUM_THREADS + "' must be an integer > 0.");
}
bulkImportParameters.setNumThreads(numThreads);
}
catch(NumberFormatException e)
{
throw new RuntimeException("Error: parameter '" + PARAMETER_NUM_THREADS + "' must be an integer > 0.");
}
}
if (batchSizeStr != null && batchSizeStr.trim().length() > 0)
{
try
{
batchSize = Integer.parseInt(batchSizeStr);
if(batchSize < 1)
{
throw new RuntimeException("Error: parameter '" + PARAMETER_BATCH_SIZE + "' must be an integer > 0.");
}
bulkImportParameters.setBatchSize(batchSize);
}
catch(NumberFormatException e)
{
throw new RuntimeException("Error: parameter '" + PARAMETER_BATCH_SIZE + "' must be an integer > 0.");
}
}
bulkImportParameters.setReplaceExisting(replaceExisting);
bulkImportParameters.setTarget(targetNodeRef);
bulkImporter.asyncBulkImport(bulkImportParameters, nodeImporter);
// redirect to the status Web Script
status.setCode(Status.STATUS_MOVED_TEMPORARILY);
status.setRedirect(true);
status.setLocation(request.getServiceContextPath() + WEB_SCRIPT_URI_BULK_FILESYSTEM_IMPORT_STATUS);
}
else
{
model.put(IMPORT_ALREADY_IN_PROGRESS_MODEL_KEY, I18NUtil.getMessage(IMPORT_ALREADY_IN_PROGRESS_ERROR_KEY));
}
}
catch (WebScriptException wse)
{
status.setCode(Status.STATUS_BAD_REQUEST, wse.getMessage());
status.setRedirect(true);
}
catch (FileNotFoundException fnfe)
{
status.setCode(Status.STATUS_BAD_REQUEST,"The repository path '" + targetPath + "' does not exist !");
status.setRedirect(true);
}
catch(IllegalArgumentException iae)
{
status.setCode(Status.STATUS_BAD_REQUEST,iae.getMessage());
status.setRedirect(true);
}
catch (Throwable t)
{
throw new WebScriptException(Status.STATUS_INTERNAL_SERVER_ERROR, buildTextMessage(t), t);
}
return model;
}
}