Point checkin.

Added APIs to allow broken links due to deletions & new/modified files in a
workflow to be detected via sync service (differencing).   Not configured
as transactions yet, nor has much testing been done, but things are starting
to shape up.   Now also detecting broken IMG tags (as well as A tags).
Also, the linkvalidation service now has its own file extension excluder bean.
Eventually the old APIs will go away, but I'm leaving them in there for
now to make life simple.


git-svn-id: https://svn.alfresco.com/repos/alfresco-enterprise/alfresco/HEAD/root@5992 c4b6b30b-aa2e-2d43-bbcb-ca4b014f7261
This commit is contained in:
Jon Cox
2007-06-16 04:27:01 +00:00
parent 1bd38892b4
commit e59e2ab976
8 changed files with 237 additions and 79 deletions

View File

@@ -0,0 +1,85 @@
/*-----------------------------------------------------------------------------
* Copyright 2007 Alfresco Inc.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
* or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* for more details.
*
* You should have received a copy of the GNU General Public License along
* with this program; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. As a special
* exception to the terms and conditions of version 2.0 of the GPL, you may
* redistribute this Program in connection with Free/Libre and Open Source
* Software ("FLOSS") applications as described in Alfresco's FLOSS exception.
* You should have received a copy of the text describing the FLOSS exception,
* and it is also available here: http://www.alfresco.com/legal/licensing
*
*
* Author Jon Cox <jcox@alfresco.com>
* File HrefDifference.java
*----------------------------------------------------------------------------*/
package org.alfresco.linkvalidation;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
public class HrefDifference
{
HrefStatusMap href_status_map_; //status of links + maybe dep info
HrefManifest href_manifest_; // overall manifest in of change
// Hrefs no longer used by the system anywhere
HashMap<String,String> obsolete_href_md5_;
HrefManifest broken_in_newmod_; // errors in new files
HrefManifest broken_by_del_; // errors caused by new deletions
HrefManifest repaired_by_delmod_; // fix by removing links (mod or del)
HrefManifest repaired_by_new_; // new file satisfies broken dep
public HrefDifference()
{
href_manifest_ = new HrefManifest();
href_status_map_ = new HrefStatusMap();
obsolete_href_md5_ = new HashMap<String,String>();
broken_by_del_ = new HrefManifest();
broken_in_newmod_ = new HrefManifest();
repaired_by_delmod_ = new HrefManifest();
repaired_by_new_ = new HrefManifest();
}
public HrefManifest getHrefManifest() { return href_manifest_; }
public HrefStatusMap getHrefStatusMap() { return href_status_map_; }
Map<String,String> getObsoleteHrefMd5() { return obsolete_href_md5_; }
public HrefManifest getBrokenByDeletionHrefManifest( )
{
return broken_by_del_;
}
public HrefManifest getBrokenInNewModHrefManifest()
{
return broken_in_newmod_;
}
public HrefManifest getRepairedByDeletionAndModHrefManifest()
{
return repaired_by_delmod_;
}
public HrefManifest getRepairedByNewHreManifest()
{
return repaired_by_new_;
}
}

View File

@@ -22,34 +22,33 @@
*
*
* Author Jon Cox <jcox@alfresco.com>
* File HrefManifest.java
* File HrefManifestEntry.java
*----------------------------------------------------------------------------*/
package org.alfresco.linkvalidation;
import java.io.Serializable;
import java.util.List;
import java.util.ArrayList;
/**
* Contains a (possibly filtered) list of the hrefs within a file.
* Common uses of this class are to fetch the links in a web page
* or just the broken ones (i.e.: response status 400-599).
*/
public class HrefManifest implements Serializable
public class HrefManifest
{
static final long serialVersionUID = 6532525229716576911L;
protected List<HrefManifestEntry> manifest_entries_;
protected String file_;
protected List<String> hrefs_;
public HrefManifest( String file,
List<String> hrefs
)
public HrefManifest()
{
file_ = file;
hrefs_ = hrefs;
manifest_entries_ = new ArrayList<HrefManifestEntry>();
}
public String getFileName() { return file_; }
public List<String> getHrefs() { return hrefs_;}
public List<HrefManifestEntry> getManifestEntries() { return manifest_entries_;}
synchronized void add( HrefManifestEntry entry )
{
manifest_entries_.add( entry );
}
}

View File

@@ -22,34 +22,34 @@
*
*
* Author Jon Cox <jcox@alfresco.com>
* File BrokenHrefConcordanceDifference.java
* File HrefManifestEntry.java
*----------------------------------------------------------------------------*/
package org.alfresco.linkvalidation;
package org.alfresco.linkvalidation;
import java.io.Serializable;
import java.util.List;
import java.util.ArrayList;
public class BrokenHrefConcordanceDifference
/**
* Contains a (possibly filtered) list of the hrefs within a file.
* Common uses of this class are to fetch the links in a web page
* or just the broken ones (i.e.: response status 400-599).
*/
public class HrefManifestEntry implements Serializable
{
static final long serialVersionUID = 6532525229716576911L;
List<HrefConcordanceEntry> repaired_;
List<HrefConcordanceEntry> newly_broken_;
protected String file_;
protected List<String> hrefs_;
public BrokenHrefConcordanceDifference()
public HrefManifestEntry( String file,
List<String> hrefs
)
{
repaired_ = new ArrayList<HrefConcordanceEntry>();
newly_broken_ = new ArrayList<HrefConcordanceEntry>();
file_ = file;
hrefs_ = hrefs;
}
public List<HrefConcordanceEntry> getRepairedHrefConcordanceEntries()
{
return repaired_;
}
public List<HrefConcordanceEntry> getNewlyBrokenHrefConcordanceEntries( )
{
return newly_broken_;
}
public String getFileName() { return file_; }
public List<String> getHrefs() { return hrefs_;}
}

View File

@@ -0,0 +1,70 @@
/*-----------------------------------------------------------------------------
* Copyright 2007 Alfresco Inc.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
* or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* for more details.
*
* You should have received a copy of the GNU General Public License along
* with this program; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. As a special
* exception to the terms and conditions of version 2.0 of the GPL, you may
* redistribute this Program in connection with Free/Libre and Open Source
* Software ("FLOSS") applications as described in Alfresco's FLOSS exception.
* You should have received a copy of the text describing the FLOSS exception,
* and it is also available here: http://www.alfresco.com/legal/licensing
*
*
* Author Jon Cox <jcox@alfresco.com>
* File HrefStatusMap.java
*----------------------------------------------------------------------------*/
package org.alfresco.linkvalidation;
import java.util.HashMap;
import java.util.Map;
import java.util.List;
import org.alfresco.util.Pair;
/**
* A synchronized wrapper for the ephemeral cache of href status results.
* The key is a url, the value is a pair consisting of the url's status code
* and the list of files accessed when the URL is requested, if known.
*
* This class also allows the non-synchronized map it wraps to be extracted.
*/
public class HrefStatusMap
{
Map< String, Pair<Integer,List<String>>> status_;
public HrefStatusMap()
{
status_ = new HashMap<String,Pair<Integer,List<String>>>();
}
public HrefStatusMap( Map<String,Pair<Integer,List<String>>> status )
{ status_ = status; }
/**
* Takes the url and the Pair: status code, file dependency list
*/
public synchronized void put( String url, Pair<Integer,List<String>> status)
{
status_.put( url, status );
}
public synchronized Pair<Integer,List<String>> get( String url)
{
return status_.get( url );
}
Map< String, Pair<Integer,List<String>>> getStatusMap() { return status_;}
}

View File

@@ -116,7 +116,7 @@ public class LinkValidationAction extends ActionExecuterAbstractBase
this.linkValidationService.updateHrefInfo(store, false, 10000, 30000, 5, monitor);
// retrieve the manifest of all the broken links and files
List<HrefManifest> manifests = this.linkValidationService.getBrokenHrefManifests(store);
List<HrefManifestEntry> manifests = this.linkValidationService.getBrokenHrefManifestEntries(store);
// create the report object using the link check results
report = new LinkValidationReport(monitor, manifests);

View File

@@ -51,7 +51,7 @@ public class LinkValidationReport implements Serializable
private Throwable error;
private List<String> brokenFiles;
private Map<String, HrefManifest> brokenLinksByFile;
private Map<String, HrefManifestEntry> brokenLinksByFile;
private static final long serialVersionUID = 7562964706845609991L;
@@ -62,7 +62,7 @@ public class LinkValidationReport implements Serializable
* of files containing broken links
* @param manifests The manifest of broken links and files
*/
public LinkValidationReport(HrefValidationProgress status, List<HrefManifest> manifests)
public LinkValidationReport(HrefValidationProgress status, List<HrefManifestEntry> manifests)
{
this.numberFilesChecked = status.getFileUpdateCount();
this.numberLinksChecked = status.getUrlUpdateCount();
@@ -71,10 +71,10 @@ public class LinkValidationReport implements Serializable
this.brokenFiles = new ArrayList<String>(manifests.size());
// create a map of broken links by file.
this.brokenLinksByFile = new HashMap<String, HrefManifest>(manifests.size());
this.brokenLinksByFile = new HashMap<String, HrefManifestEntry>(manifests.size());
// build the required list and maps
for (HrefManifest manifest : manifests)
for (HrefManifestEntry manifest : manifests)
{
String fileName = manifest.getFileName();
this.brokenFiles.add(fileName);
@@ -125,7 +125,7 @@ public class LinkValidationReport implements Serializable
{
List<String> links = null;
HrefManifest manifest = this.brokenLinksByFile.get(file);
HrefManifestEntry manifest = this.brokenLinksByFile.get(file);
if (manifest != null)
{
links = manifest.getHrefs();

View File

@@ -137,50 +137,25 @@ public interface LinkValidationService
/**
* This function is just a convenience wrapper for calling
* getHrefManifests with statusGTE=400 and statusLTE=599.
* getHrefManifestEntries with statusGTE=400 and statusLTE=599.
*/
public List<HrefManifest> getBrokenHrefManifests(
String storeNameOrWebappPath
) throws AVMNotFoundException;
public List<HrefManifestEntry> getBrokenHrefManifestEntries(
String storeNameOrWebappPath
) throws AVMNotFoundException;
/**
* Returns a manifest consisting of just the broken hrefs
* within each file containing one or more broken href.
* The HrefManifest list is sorted in increasing lexicographic
* order by file name. The hrefs within each HrefManifest
* The HrefManifestEntry list is sorted in increasing lexicographic
* order by file name. The hrefs within each HrefManifestEntry
* are also sorted in increasing lexicographic order.
*/
public List<HrefManifest> getHrefManifests(
public List<HrefManifestEntry> getHrefManifestEntries(
String storeNameOrWebappPath,
int statusGTE,
int statusLTE) throws
AVMNotFoundException;
/**
* This function is just a convenience wrapper for calling
* getHrefManifest with statusGTE=400 and statusLTE=599.
* <p>
* Note: If you want to get the broken links in every file in
* a webapp or store, it's much more efficient to use
* getBrokenHrefManifests instead of this function.
*/
public HrefManifest getBrokenHrefManifest( String path)
throws AVMNotFoundException;
/**
* Returns a manifest of all the hrefs within the file specified by 'path'
* whose response status is greater than or equal to statusGTE,
* and less than or equal to statusLTE.
* <p>
* Note: If you want to get a list of manifests of every file in a
* webapp or store, it's much more efficient to use getHrefManifests
* instead of this function.
*/
public HrefManifest getHrefManifest( String path,
int statusGTE,
int statusLTE) throws
AVMNotFoundException;
/**
* Fetch all hyperlinks that rely upon the existence of the file specified
@@ -203,16 +178,25 @@ public interface LinkValidationService
*/
public List<String> getHrefsDependentUponFile(String path);
public HrefDifference getHrefDifference(
String srcWebappPath,
String dstWebappPath,
int connectTimeout,
int readTimeout,
int nthreads,
HrefValidationProgress progress
) throws AVMNotFoundException;
/**
* Don't use yet - does nothing at the moment.
*/
public BrokenHrefConcordanceDifference getBrokenHrefConcordanceDifference(
int srcVersion,
String srcPath,
int dstVersion,
String dstPath,
HrefValidationProgress progress)
public HrefDifference getHrefDifference( int srcVersion,
String srcWebappPath,
int dstVersion,
String dstWebappPath,
int connectTimeout,
int readTimeout,
int nthreads,
HrefValidationProgress progress)
throws AVMNotFoundException;
}