From e59e2ab97617647fcb42da54a652967c1a574d79 Mon Sep 17 00:00:00 2001 From: Jon Cox Date: Sat, 16 Jun 2007 04:27:01 +0000 Subject: [PATCH] Point checkin. Added APIs to allow broken links due to deletions & new/modified files in a workflow to be detected via sync service (differencing). Not configured as transactions yet, nor has much testing been done, but things are starting to shape up. Now also detecting broken IMG tags (as well as A tags). Also, the linkvalidation service now has its own file extension excluder bean. Eventually the old APIs will go away, but I'm leaving them in there for now to make life simple. git-svn-id: https://svn.alfresco.com/repos/alfresco-enterprise/alfresco/HEAD/root@5992 c4b6b30b-aa2e-2d43-bbcb-ca4b014f7261 --- .../linkvalidation-service-context.xml | 20 +++++ .../linkvalidation/HrefDifference.java | 85 +++++++++++++++++++ .../alfresco/linkvalidation/HrefManifest.java | 25 +++--- ...Difference.java => HrefManifestEntry.java} | 38 ++++----- .../linkvalidation/HrefStatusMap.java | 70 +++++++++++++++ .../linkvalidation/LinkValidationAction.java | 2 +- .../linkvalidation/LinkValidationReport.java | 10 +-- .../linkvalidation/LinkValidationService.java | 66 ++++++-------- 8 files changed, 237 insertions(+), 79 deletions(-) create mode 100644 source/java/org/alfresco/linkvalidation/HrefDifference.java rename source/java/org/alfresco/linkvalidation/{BrokenHrefConcordanceDifference.java => HrefManifestEntry.java} (62%) create mode 100644 source/java/org/alfresco/linkvalidation/HrefStatusMap.java diff --git a/config/alfresco/linkvalidation-service-context.xml b/config/alfresco/linkvalidation-service-context.xml index 7507113748..57d96a377a 100644 --- a/config/alfresco/linkvalidation-service-context.xml +++ b/config/alfresco/linkvalidation-service-context.xml @@ -3,6 +3,20 @@ "http://www.springframework.org/dtd/spring-beans.dtd"> + + + + + .o + .bak + .tmp + .swp + ~ + + + + @@ -15,5 +29,11 @@ + + + + + + diff --git a/source/java/org/alfresco/linkvalidation/HrefDifference.java b/source/java/org/alfresco/linkvalidation/HrefDifference.java new file mode 100644 index 0000000000..f38fb98dd5 --- /dev/null +++ b/source/java/org/alfresco/linkvalidation/HrefDifference.java @@ -0,0 +1,85 @@ +/*----------------------------------------------------------------------------- +* Copyright 2007 Alfresco Inc. +* +* This program is free software; you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation; either version 2 of the License, or +* (at your option) any later version. +* +* This program is distributed in the hope that it will be useful, but +* WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +* or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +* for more details. +* +* You should have received a copy of the GNU General Public License along +* with this program; if not, write to the Free Software Foundation, Inc., +* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. As a special +* exception to the terms and conditions of version 2.0 of the GPL, you may +* redistribute this Program in connection with Free/Libre and Open Source +* Software ("FLOSS") applications as described in Alfresco's FLOSS exception. +* You should have received a copy of the text describing the FLOSS exception, +* and it is also available here: http://www.alfresco.com/legal/licensing +* +* +* Author Jon Cox +* File HrefDifference.java +*----------------------------------------------------------------------------*/ + +package org.alfresco.linkvalidation; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +public class HrefDifference +{ + HrefStatusMap href_status_map_; //status of links + maybe dep info + HrefManifest href_manifest_; // overall manifest in of change + + // Hrefs no longer used by the system anywhere + HashMap obsolete_href_md5_; + + HrefManifest broken_in_newmod_; // errors in new files + HrefManifest broken_by_del_; // errors caused by new deletions + HrefManifest repaired_by_delmod_; // fix by removing links (mod or del) + HrefManifest repaired_by_new_; // new file satisfies broken dep + + + public HrefDifference() + { + href_manifest_ = new HrefManifest(); + href_status_map_ = new HrefStatusMap(); + obsolete_href_md5_ = new HashMap(); + + broken_by_del_ = new HrefManifest(); + broken_in_newmod_ = new HrefManifest(); + repaired_by_delmod_ = new HrefManifest(); + repaired_by_new_ = new HrefManifest(); + } + + public HrefManifest getHrefManifest() { return href_manifest_; } + public HrefStatusMap getHrefStatusMap() { return href_status_map_; } + Map getObsoleteHrefMd5() { return obsolete_href_md5_; } + + public HrefManifest getBrokenByDeletionHrefManifest( ) + { + return broken_by_del_; + } + + public HrefManifest getBrokenInNewModHrefManifest() + { + return broken_in_newmod_; + } + + public HrefManifest getRepairedByDeletionAndModHrefManifest() + { + return repaired_by_delmod_; + } + + public HrefManifest getRepairedByNewHreManifest() + { + return repaired_by_new_; + } +} + diff --git a/source/java/org/alfresco/linkvalidation/HrefManifest.java b/source/java/org/alfresco/linkvalidation/HrefManifest.java index f1495c49d0..ab4f1f1049 100644 --- a/source/java/org/alfresco/linkvalidation/HrefManifest.java +++ b/source/java/org/alfresco/linkvalidation/HrefManifest.java @@ -22,34 +22,33 @@ * * * Author Jon Cox -* File HrefManifest.java +* File HrefManifestEntry.java *----------------------------------------------------------------------------*/ package org.alfresco.linkvalidation; import java.io.Serializable; import java.util.List; +import java.util.ArrayList; /** * Contains a (possibly filtered) list of the hrefs within a file. * Common uses of this class are to fetch the links in a web page * or just the broken ones (i.e.: response status 400-599). */ -public class HrefManifest implements Serializable +public class HrefManifest { - static final long serialVersionUID = 6532525229716576911L; + protected List manifest_entries_; - protected String file_; - protected List hrefs_; - - public HrefManifest( String file, - List hrefs - ) + public HrefManifest() { - file_ = file; - hrefs_ = hrefs; + manifest_entries_ = new ArrayList(); } - public String getFileName() { return file_; } - public List getHrefs() { return hrefs_;} + public List getManifestEntries() { return manifest_entries_;} + + synchronized void add( HrefManifestEntry entry ) + { + manifest_entries_.add( entry ); + } } diff --git a/source/java/org/alfresco/linkvalidation/BrokenHrefConcordanceDifference.java b/source/java/org/alfresco/linkvalidation/HrefManifestEntry.java similarity index 62% rename from source/java/org/alfresco/linkvalidation/BrokenHrefConcordanceDifference.java rename to source/java/org/alfresco/linkvalidation/HrefManifestEntry.java index 980f83a373..5532643f01 100644 --- a/source/java/org/alfresco/linkvalidation/BrokenHrefConcordanceDifference.java +++ b/source/java/org/alfresco/linkvalidation/HrefManifestEntry.java @@ -22,34 +22,34 @@ * * * Author Jon Cox -* File BrokenHrefConcordanceDifference.java +* File HrefManifestEntry.java *----------------------------------------------------------------------------*/ -package org.alfresco.linkvalidation; +package org.alfresco.linkvalidation; +import java.io.Serializable; import java.util.List; -import java.util.ArrayList; -public class BrokenHrefConcordanceDifference +/** +* Contains a (possibly filtered) list of the hrefs within a file. +* Common uses of this class are to fetch the links in a web page +* or just the broken ones (i.e.: response status 400-599). +*/ +public class HrefManifestEntry implements Serializable { + static final long serialVersionUID = 6532525229716576911L; - List repaired_; - List newly_broken_; + protected String file_; + protected List hrefs_; - public BrokenHrefConcordanceDifference() + public HrefManifestEntry( String file, + List hrefs + ) { - repaired_ = new ArrayList(); - newly_broken_ = new ArrayList(); + file_ = file; + hrefs_ = hrefs; } - public List getRepairedHrefConcordanceEntries() - { - return repaired_; - } - - public List getNewlyBrokenHrefConcordanceEntries( ) - { - return newly_broken_; - } + public String getFileName() { return file_; } + public List getHrefs() { return hrefs_;} } - diff --git a/source/java/org/alfresco/linkvalidation/HrefStatusMap.java b/source/java/org/alfresco/linkvalidation/HrefStatusMap.java new file mode 100644 index 0000000000..f8e32f078c --- /dev/null +++ b/source/java/org/alfresco/linkvalidation/HrefStatusMap.java @@ -0,0 +1,70 @@ +/*----------------------------------------------------------------------------- +* Copyright 2007 Alfresco Inc. +* +* This program is free software; you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation; either version 2 of the License, or +* (at your option) any later version. +* +* This program is distributed in the hope that it will be useful, but +* WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +* or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +* for more details. +* +* You should have received a copy of the GNU General Public License along +* with this program; if not, write to the Free Software Foundation, Inc., +* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. As a special +* exception to the terms and conditions of version 2.0 of the GPL, you may +* redistribute this Program in connection with Free/Libre and Open Source +* Software ("FLOSS") applications as described in Alfresco's FLOSS exception. +* You should have received a copy of the text describing the FLOSS exception, +* and it is also available here: http://www.alfresco.com/legal/licensing +* +* +* Author Jon Cox +* File HrefStatusMap.java +*----------------------------------------------------------------------------*/ + +package org.alfresco.linkvalidation; + +import java.util.HashMap; +import java.util.Map; +import java.util.List; +import org.alfresco.util.Pair; + +/** +* A synchronized wrapper for the ephemeral cache of href status results. +* The key is a url, the value is a pair consisting of the url's status code +* and the list of files accessed when the URL is requested, if known. +* +* This class also allows the non-synchronized map it wraps to be extracted. +*/ +public class HrefStatusMap +{ + Map< String, Pair>> status_; + + public HrefStatusMap() + { + status_ = new HashMap>>(); + } + + public HrefStatusMap( Map>> status ) + { status_ = status; } + + + /** + * Takes the url and the Pair: status code, file dependency list + */ + public synchronized void put( String url, Pair> status) + { + status_.put( url, status ); + } + + public synchronized Pair> get( String url) + { + return status_.get( url ); + } + + Map< String, Pair>> getStatusMap() { return status_;} +} + diff --git a/source/java/org/alfresco/linkvalidation/LinkValidationAction.java b/source/java/org/alfresco/linkvalidation/LinkValidationAction.java index 422e8af1ab..e27485810b 100644 --- a/source/java/org/alfresco/linkvalidation/LinkValidationAction.java +++ b/source/java/org/alfresco/linkvalidation/LinkValidationAction.java @@ -116,7 +116,7 @@ public class LinkValidationAction extends ActionExecuterAbstractBase this.linkValidationService.updateHrefInfo(store, false, 10000, 30000, 5, monitor); // retrieve the manifest of all the broken links and files - List manifests = this.linkValidationService.getBrokenHrefManifests(store); + List manifests = this.linkValidationService.getBrokenHrefManifestEntries(store); // create the report object using the link check results report = new LinkValidationReport(monitor, manifests); diff --git a/source/java/org/alfresco/linkvalidation/LinkValidationReport.java b/source/java/org/alfresco/linkvalidation/LinkValidationReport.java index cd62979d89..3b18c8521f 100755 --- a/source/java/org/alfresco/linkvalidation/LinkValidationReport.java +++ b/source/java/org/alfresco/linkvalidation/LinkValidationReport.java @@ -51,7 +51,7 @@ public class LinkValidationReport implements Serializable private Throwable error; private List brokenFiles; - private Map brokenLinksByFile; + private Map brokenLinksByFile; private static final long serialVersionUID = 7562964706845609991L; @@ -62,7 +62,7 @@ public class LinkValidationReport implements Serializable * of files containing broken links * @param manifests The manifest of broken links and files */ - public LinkValidationReport(HrefValidationProgress status, List manifests) + public LinkValidationReport(HrefValidationProgress status, List manifests) { this.numberFilesChecked = status.getFileUpdateCount(); this.numberLinksChecked = status.getUrlUpdateCount(); @@ -71,10 +71,10 @@ public class LinkValidationReport implements Serializable this.brokenFiles = new ArrayList(manifests.size()); // create a map of broken links by file. - this.brokenLinksByFile = new HashMap(manifests.size()); + this.brokenLinksByFile = new HashMap(manifests.size()); // build the required list and maps - for (HrefManifest manifest : manifests) + for (HrefManifestEntry manifest : manifests) { String fileName = manifest.getFileName(); this.brokenFiles.add(fileName); @@ -125,7 +125,7 @@ public class LinkValidationReport implements Serializable { List links = null; - HrefManifest manifest = this.brokenLinksByFile.get(file); + HrefManifestEntry manifest = this.brokenLinksByFile.get(file); if (manifest != null) { links = manifest.getHrefs(); diff --git a/source/java/org/alfresco/linkvalidation/LinkValidationService.java b/source/java/org/alfresco/linkvalidation/LinkValidationService.java index a428ff8b7c..1933a13a8c 100644 --- a/source/java/org/alfresco/linkvalidation/LinkValidationService.java +++ b/source/java/org/alfresco/linkvalidation/LinkValidationService.java @@ -137,50 +137,25 @@ public interface LinkValidationService /** * This function is just a convenience wrapper for calling - * getHrefManifests with statusGTE=400 and statusLTE=599. + * getHrefManifestEntries with statusGTE=400 and statusLTE=599. */ - public List getBrokenHrefManifests( - String storeNameOrWebappPath - ) throws AVMNotFoundException; + public List getBrokenHrefManifestEntries( + String storeNameOrWebappPath + ) throws AVMNotFoundException; /** * Returns a manifest consisting of just the broken hrefs * within each file containing one or more broken href. - * The HrefManifest list is sorted in increasing lexicographic - * order by file name. The hrefs within each HrefManifest + * The HrefManifestEntry list is sorted in increasing lexicographic + * order by file name. The hrefs within each HrefManifestEntry * are also sorted in increasing lexicographic order. */ - public List getHrefManifests( + public List getHrefManifestEntries( String storeNameOrWebappPath, int statusGTE, int statusLTE) throws AVMNotFoundException; - /** - * This function is just a convenience wrapper for calling - * getHrefManifest with statusGTE=400 and statusLTE=599. - *

- * Note: If you want to get the broken links in every file in - * a webapp or store, it's much more efficient to use - * getBrokenHrefManifests instead of this function. - */ - public HrefManifest getBrokenHrefManifest( String path) - throws AVMNotFoundException; - - /** - * Returns a manifest of all the hrefs within the file specified by 'path' - * whose response status is greater than or equal to statusGTE, - * and less than or equal to statusLTE. - *

- * Note: If you want to get a list of manifests of every file in a - * webapp or store, it's much more efficient to use getHrefManifests - * instead of this function. - */ - public HrefManifest getHrefManifest( String path, - int statusGTE, - int statusLTE) throws - AVMNotFoundException; - /** * Fetch all hyperlinks that rely upon the existence of the file specified @@ -203,16 +178,25 @@ public interface LinkValidationService */ public List getHrefsDependentUponFile(String path); + public HrefDifference getHrefDifference( + String srcWebappPath, + String dstWebappPath, + int connectTimeout, + int readTimeout, + int nthreads, + HrefValidationProgress progress + ) throws AVMNotFoundException; - /** - * Don't use yet - does nothing at the moment. - */ - public BrokenHrefConcordanceDifference getBrokenHrefConcordanceDifference( - int srcVersion, - String srcPath, - int dstVersion, - String dstPath, - HrefValidationProgress progress) + + public HrefDifference getHrefDifference( int srcVersion, + String srcWebappPath, + int dstVersion, + String dstWebappPath, + int connectTimeout, + int readTimeout, + int nthreads, + HrefValidationProgress progress) throws AVMNotFoundException; + }