Async/incremental link validation.

Mostly working with the webapp, but the webapp needs 
to check for an null pointer to ensure that some
result is ready.   Other more minor wrinkles exist,
such as capturing the number of files checked and
displaying the version validated; this may be different
from the latest snapshot if the validation gets behind
the checkins for a while.


git-svn-id: https://svn.alfresco.com/repos/alfresco-enterprise/alfresco/HEAD/root@6136 c4b6b30b-aa2e-2d43-bbcb-ca4b014f7261
This commit is contained in:
Jon Cox
2007-07-02 16:31:43 +00:00
parent 6ae3aab451
commit e82a21f590
8 changed files with 23089 additions and 146 deletions

View File

@@ -294,13 +294,10 @@
<!-- LinkValidationService (turned off until it's been tested) --> <!-- LinkValidationService -->
<!--
<bean id="linkValidationServiceBootstrap" <bean id="linkValidationServiceBootstrap"
class="org.alfresco.linkvalidation.LinkValidationServiceBootstrap"> class="org.alfresco.linkvalidation.LinkValidationServiceBootstrap">
</bean> </bean>
-->
<!-- This component checks the interconnection between the metadata, indexes and content --> <!-- This component checks the interconnection between the metadata, indexes and content -->

View File

@@ -17,11 +17,111 @@
</property> </property>
</bean> </bean>
<bean id="hrefBearingRequestPathNameMatcher"
class="org.alfresco.repo.avm.util.HrefBearingRequestPathNameMatcher">
<property name="extensions">
<list>
<!--
See also: http://filext.com/alphalist.php
and: HrefBearingRequestPathNameMatcher
Note: HrefBearingRequestPathNameMatcher does not require
a leading '.' (unlike FileExtensionNameMatcher),
nor does it suffer from performance problems when
the number of extensions is large. However, it
will not tolerate extensions with internal "." chars;
everything up to and including the final "." will
simply be ignored. Matching is not case sensitive.
-->
<value></value> <!-- null extension for dirs -->
<value>ahtml</value>
<value>ahtm</value>
<value>asphtml</value>
<value>asp</value>
<value>axs</value>
<value>bhtml</value>
<value>dci</value>
<value>dht</value>
<value>dochtml</value>
<value>docmhtml</value>
<value>ehtml</value>
<value>ephtml</value>
<value>fhtml</value>
<value>fphtml</value>
<value>hhtml</value>
<value>ht3</value>
<value>htc</value>
<value>htmls</value>
<value>html</value>
<value>htm</value>
<value>ihtml</value>
<value>jcs</value>
<value>jhtml</value>
<value>jhtm</value>
<value>jsp</value>
<value>log</value>
<value>mdhtml</value>
<value>mhtml</value>
<value>mhtm</value>
<value>mht</value>
<value>mml</value>
<value>php2</value>
<value>php3</value>
<value>php4</value>
<value>php5</value>
<value>php6</value>
<value>php7</value>
<value>php8</value>
<value>php9</value>
<value>php</value>
<value>phtml</value>
<value>phtml</value>
<value>phtm</value>
<value>pht</value>
<value>pl</value>
<value>ppthtml</value>
<value>pptm</value>
<value>pt</value>
<value>pubhtml</value>
<value>pubmhtml</value>
<value>rbx</value>
<value>rhtml</value>
<value>rmh</value>
<value>s1h</value>
<value>shtml3</value>
<value>shtml</value>
<value>shtm</value>
<value>sht</value>
<value>ssi</value>
<value>stml</value>
<value>stm</value>
<value>thtml</value>
<value>txt</value>
<value>whtek</value>
<value>xhtml</value>
<value>xhtml</value>
<value>xhtm</value>
<value>xhtm</value>
<value>xht</value>
<value>xlshtml</value>
<value>xlshtm</value>
<value>xlsmhtml</value>
<value>xml</value>
<value>xtml</value>
<value>ybhtm</value>
<!-- Add others here, if you'd like! -->
</list>
</property>
</bean>
<bean id="linkValidationService" <bean id="linkValidationService"
class="org.alfresco.linkvalidation.LinkValidationServiceImpl" class="org.alfresco.linkvalidation.LinkValidationServiceImpl"
lazy-init="true"> lazy-init="true">
<property name="attributeService"> <property name="attributeService">
<ref bean="attributeService"/> <ref bean="AttributeService"/>
</property> </property>
<property name="avmRemote"> <property name="avmRemote">
<ref bean="avmRemote"/> <ref bean="avmRemote"/>
@@ -30,10 +130,16 @@
<ref bean="VirtServerRegistry"/> <ref bean="VirtServerRegistry"/>
</property> </property>
<property name="AVMSyncService"> <property name="AVMSyncService">
<ref bean="avmSyncService"/> <ref bean="AVMSyncService"/>
</property> </property>
<property name="excludeMatcher"> <property name="excludeMatcher">
<ref bean="linkValidationServiceExcludeExtensionMatcher"/> <ref bean="linkValidationServiceExcludeExtensionMatcher"/>
</property> </property>
<property name="hrefBearingRequestPathMatcher">
<ref bean="hrefBearingRequestPathNameMatcher"/>
</property>
<property name="retryingTransactionHelper">
<ref bean="retryingTransactionHelper"/>
</property>
</bean> </bean>
</beans> </beans>

View File

@@ -747,47 +747,47 @@
</list> </list>
</property> </property>
</bean> </bean>
<!-- The AVMLockingAwareService --> <!-- The AVMLockingAwareService -->
<bean id="avmLockingAwareService" class="org.alfresco.repo.avm.AVMLockingAwareService"> <bean id="avmLockingAwareService" class="org.alfresco.repo.avm.AVMLockingAwareService">
<!-- Because of circular dependendencies, this bean's dependencies <!-- Because of circular dependendencies, this bean's dependencies
are grabbed at bootstrap time.x --> are grabbed at bootstrap time.x -->
</bean> </bean>
<bean id="AVMLockingAwareService" class="org.springframework.aop.framework.ProxyFactoryBean"> <bean id="AVMLockingAwareService" class="org.springframework.aop.framework.ProxyFactoryBean">
<property name="proxyInterfaces"> <property name="proxyInterfaces">
<list> <list>
<value>org.alfresco.service.cmr.avm.AVMService</value> <value>org.alfresco.service.cmr.avm.AVMService</value>
</list> </list>
</property> </property>
<property name="targetName"> <property name="targetName">
<value>avmLockingAwareService</value> <value>avmLockingAwareService</value>
</property> </property>
<property name="interceptorNames"> <property name="interceptorNames">
<list> <list>
<value>avmServiceWriteTxnAdvisor</value> <value>avmServiceWriteTxnAdvisor</value>
<value>avmServiceReadTxnAdvisor</value> <value>avmServiceReadTxnAdvisor</value>
<value>avmSnapShotTriggeredIndexingMethodInterceptor</value> <value>avmSnapShotTriggeredIndexingMethodInterceptor</value>
</list> </list>
</property> </property>
</bean> </bean>
<bean id="indexingAVMLockingAwareService" class="org.springframework.aop.framework.ProxyFactoryBean"> <bean id="indexingAVMLockingAwareService" class="org.springframework.aop.framework.ProxyFactoryBean">
<property name="proxyInterfaces"> <property name="proxyInterfaces">
<list> <list>
<value>org.alfresco.service.cmr.avm.AVMService</value> <value>org.alfresco.service.cmr.avm.AVMService</value>
</list> </list>
</property> </property>
<property name="targetName"> <property name="targetName">
<value>avmLockingAwareService</value> <value>avmLockingAwareService</value>
</property> </property>
<property name="interceptorNames"> <property name="interceptorNames">
<list> <list>
<value>avmSnapShotTriggeredIndexingMethodInterceptor</value> <value>avmSnapShotTriggeredIndexingMethodInterceptor</value>
</list> </list>
</property> </property>
</bean> </bean>
<!-- The AVMService --> <!-- The AVMService -->
@@ -1377,14 +1377,12 @@
</property> </property>
<property name="mappedNames"> <property name="mappedNames">
<list> <list>
<value>getBrokenHrefConcordance</value> <value>getHrefManifestEntries</value>
<value>getHrefConcordance</value> <value>getHrefDifference</value>
<value>getHrefManifestBrokenByDelete</value>
<value>getBrokenHrefManifests</value> <value>getHrefManifestBrokenByNewOrMod</value>
<value>getHrefManifests</value> <value>getHrefConcordanceEntries</value>
<value>getHrefsDependentUponFile</value>
<value>getBrokenHrefManifest</value>
<value>getHrefManifest</value>
</list> </list>
</property> </property>
</bean> </bean>
@@ -1397,7 +1395,7 @@
</property> </property>
<property name="mappedNames"> <property name="mappedNames">
<list> <list>
<value>updateHrefInfo</value> <value>updateHrefInfo</value>
</list> </list>
</property> </property>
</bean> </bean>

View File

@@ -195,11 +195,11 @@ public class LinkValidationAction extends ActionExecuterAbstractBase
} }
else else
{ {
// firstly call updateHrefInfo to scan the whole store for broken links // Not calling linkValidationService.updateHrefInfo explicitly anymore
// NOTE: currently this is NOT done incrementally // so tell the system we're done. Note that the monitor won't have
this.linkValidationService.updateHrefInfo(webappPath, false, this.connectionTimeout, // valid update counts
this.readTimeout, this.noThreads, monitor); monitor.setDone( true );
// retrieve the manifest of all the broken links and files for the webapp // retrieve the manifest of all the broken links and files for the webapp
List<HrefManifestEntry> manifests = this.linkValidationService.getBrokenHrefManifestEntries(webappPath); List<HrefManifestEntry> manifests = this.linkValidationService.getBrokenHrefManifestEntries(webappPath);

View File

@@ -39,12 +39,119 @@ public interface LinkValidationService
public void onBootstrap(); public void onBootstrap();
public void onShutdown(); public void onShutdown();
//-------------------------------------------------------------------------
/** /**
* Updates href status and href file dependencies for path. * This function is just a convenience wrapper for calling
* * getHrefManifestEntries with statusGTE=400 and statusLTE=599.
* <p>
* Note: Files and urls within this list of manifests pertain to
* the latest validated snapshot of staging (which may be
* somewhat older than the very latest snapshot). The
* validation service attempts to keep the latest validated
* snapshot as new as possible, automatically.
*/
//-------------------------------------------------------------------------
public List<HrefManifestEntry> getBrokenHrefManifestEntries(
String storeNameOrWebappPath
) throws AVMNotFoundException;
//-------------------------------------------------------------------------
/**
* Returns a manifest consisting of just the broken hrefs
* within each file containing one or more broken href.
* The HrefManifestEntry list is sorted in increasing lexicographic
* order by file name. The hrefs within each HrefManifestEntry
* are also sorted in increasing lexicographic order.
*/
//-------------------------------------------------------------------------
public List<HrefManifestEntry> getHrefManifestEntries(
String storeNameOrWebappPath,
int statusGTE,
int statusLTE) throws
AVMNotFoundException;
//-------------------------------------------------------------------------
/**
* Fetch the difference between two areas.
* Version -1 is assumed for src; dst relies on the state of the
* link validation service updating link validity tables.
* Typically, this will be for some version close to the latest
* snapshot, but it's async, so it might be older.
*/
//-------------------------------------------------------------------------
public HrefDifference getHrefDifference(
String srcWebappPath,
String dstWebappPath,
int connectTimeout,
int readTimeout,
int nthreads,
HrefValidationProgress progress)
throws AVMNotFoundException,
SocketException,
SSLException,
LinkValidationAbortedException;
//-------------------------------------------------------------------------
/**
* Fetches a manifest of all hyperlinks broken by files
* deleted in a HrefDifference. Files and hrefs in this
* manifest will be in the namespace of the src in the
* HrefDifference. For example, suppose the "test"
* web project had a ROOT webapp with a link within
* "moo.html" that pointed to: "hamlet.html".
* Now suppose that user 'alice' proposes to delete "hamlet.html".
* Because 'alice' is the 'src' and staging is the 'dst'
* in the HrefDifference, all files and hyperlinks appear from
* the perspective of the main working store within
* alice's sandbox. Thus, the broken link info is as follows:
* *
* @param storeNameOrWebappPath * <pre>
* The store name or path to webapp * File containing broken link:
* test--alice:/www/avm_webapps/ROOT/moo.html
*
* Broken link:
* http://alice.test.www--sandbox.version--v-1.127-0-0-1.ip.alfrescodemo.net:8180/hamlet.html
* </pre>
*
* @param hdiff The difference between two webapps obtained
* by calling getHrefDifference().
*/
//-------------------------------------------------------------------------
public HrefManifest getHrefManifestBrokenByDelete(HrefDifference hdiff);
//-------------------------------------------------------------------------
/**
* Fetches a manifest of all hyperlinks broken in new or modified files in
* an HrefDifference. Similar to getHrefManifestBrokenByDelete(),
* the entries in this manifest are in the 'src' namespace of the
* HrefDifference operation (i.e.: files & urls from alice, not staging).
*
* @param hdiff The difference between two webapps obtained
* by calling getHrefDifference().
*/
//-------------------------------------------------------------------------
public HrefManifest getHrefManifestBrokenByNewOrMod(HrefDifference hdiff);
//-------------------------------------------------------------------------
/**
* WARNING: this function won't be part of the public interface for long.
* Updates href status and href file dependencies for path.
*
* @param path
* <ul>
* <li> If null, do all stores & all webapps in them.
* <li> If store, do all webapps in store
* <li> If webapp, do webapp.
* </ul>
* *
* @param incremental * @param incremental
* If true, updates information incrementally, based on the * If true, updates information incrementally, based on the
@@ -53,6 +160,10 @@ public interface LinkValidationService
* info associated with the store/webapp (if any), then does * info associated with the store/webapp (if any), then does
* a full rescan to update info. * a full rescan to update info.
* *
* @validateExternal
* Currently does nothing. Perhaps one day you'll be able to
* turn off validation of external links.
*
* @param connectTimeout * @param connectTimeout
* Amount of time in milliseconds that this function will wait * Amount of time in milliseconds that this function will wait
* before declaring that the connection has failed * before declaring that the connection has failed
@@ -71,28 +182,51 @@ public interface LinkValidationService
* 'status' may be polled in a separate thread to * 'status' may be polled in a separate thread to
* observe its progress. * observe its progress.
*/ */
public void updateHrefInfo( String storeNameOrWebappPath, //-------------------------------------------------------------------------
boolean incremental, public void updateHrefInfo( String path,
int connectTimeout, boolean incremental,
int readTimeout, boolean validateExternal,
int nthreads, int connectTimeout,
HrefValidationProgress progress int readTimeout,
) int nthreads,
throws AVMNotFoundException, HrefValidationProgress progress)
SocketException, throws AVMNotFoundException,
SSLException, SocketException,
LinkValidationAbortedException; SSLException,
LinkValidationAbortedException;
//-------------------------------------------------------------------------
/**
* Merges an HrefDifference into the master href info table.
* WARNING: This function won't be part of the public interface for long.
*/
//-------------------------------------------------------------------------
public void mergeHrefDiff( HrefDifference hdiff)
throws AVMNotFoundException,
SocketException,
SSLException,
LinkValidationAbortedException;
//-------------------------------------------------------------------------
/** /**
* Fetches information on broken hrefs within a store name or path * Fetches information on broken hrefs within a store name or path
* to a webapp. This function is just a convenience wrapper for calling * to a webapp. This function is just a convenience wrapper for calling
* getHrefConcordance with statusGTE=400 and statusLTE=599. * getHrefConcordance with statusGTE=400 and statusLTE=599.
*/ */
//-------------------------------------------------------------------------
public List<HrefConcordanceEntry> getBrokenHrefConcordanceEntries( public List<HrefConcordanceEntry> getBrokenHrefConcordanceEntries(
String storeNameOrWebappPath String storeNameOrWebappPath
) throws AVMNotFoundException; ) throws AVMNotFoundException;
//-------------------------------------------------------------------------
/** /**
* Returns information regarding the hrefs within storeNameOrWebappPath * Returns information regarding the hrefs within storeNameOrWebappPath
* whose return status is greater than or equal to 'statusGTE', and * whose return status is greater than or equal to 'statusGTE', and
@@ -135,6 +269,7 @@ public interface LinkValidationService
* http://www.w3.org/Protocols/rfc2616/rfc2616-sec10.html * http://www.w3.org/Protocols/rfc2616/rfc2616-sec10.html
* *
*/ */
//-------------------------------------------------------------------------
public List<HrefConcordanceEntry> getHrefConcordanceEntries( public List<HrefConcordanceEntry> getHrefConcordanceEntries(
String storeNameOrWebappPath, String storeNameOrWebappPath,
int statusGTE, int statusGTE,
@@ -142,28 +277,8 @@ public interface LinkValidationService
) throws AVMNotFoundException; ) throws AVMNotFoundException;
/**
* This function is just a convenience wrapper for calling
* getHrefManifestEntries with statusGTE=400 and statusLTE=599.
*/
public List<HrefManifestEntry> getBrokenHrefManifestEntries(
String storeNameOrWebappPath
) throws AVMNotFoundException;
/**
* Returns a manifest consisting of just the broken hrefs
* within each file containing one or more broken href.
* The HrefManifestEntry list is sorted in increasing lexicographic
* order by file name. The hrefs within each HrefManifestEntry
* are also sorted in increasing lexicographic order.
*/
public List<HrefManifestEntry> getHrefManifestEntries(
String storeNameOrWebappPath,
int statusGTE,
int statusLTE) throws
AVMNotFoundException;
//-------------------------------------------------------------------------
/** /**
* Fetch all hyperlinks that rely upon the existence of the file specified * Fetch all hyperlinks that rely upon the existence of the file specified
* by 'path', directly or indirectly. The list of hrefs returnd is * by 'path', directly or indirectly. The list of hrefs returnd is
@@ -183,57 +298,28 @@ public interface LinkValidationService
* *
* *
*/ */
//-------------------------------------------------------------------------
public List<String> getHrefsDependentUponFile(String path); public List<String> getHrefsDependentUponFile(String path);
public HrefDifference getHrefDifference(
String srcWebappPath,
String dstWebappPath,
int connectTimeout,
int readTimeout,
int nthreads,
HrefValidationProgress progress)
throws AVMNotFoundException,
SocketException,
SSLException,
LinkValidationAbortedException;
/**
* Fetches a manifest of all hyperlinks broken by files
* deleted in a HrefDifference between two webapps.
*
* @param hdiff The difference between two webapps obtained by calling getHrefDifference().
*/
public HrefManifest getHrefManifestBrokenByDelete(HrefDifference hdiff);
/**
* Fetches a manifest of all hyperlinks broken in new or modified files in
* an HrefDifference.
*
* @param hdiff The difference between two webapps obtained by calling getHrefDifference().
*/
public HrefManifest getHrefManifestBrokenByNewOrMod(HrefDifference hdiff);
/**
* Merges an HrefDifference into the master href info table. //-------------------------------------------------------------------------
*/ // NEARLY OBSOLETE!
public void mergeHrefDiff( HrefDifference hdiff) // NEARLY OBSOLETE!
throws AVMNotFoundException, // NEARLY OBSOLETE!
SocketException, // NEARLY OBSOLETE!
SSLException, //-------------------------------------------------------------------------
LinkValidationAbortedException; public void updateHrefInfo(
String storeNameOrWebappPath, // NEARLY OBSOLETE!
/** boolean incremental, // NEARLY OBSOLETE!
* int connectTimeout, // NEARLY OBSOLETE!
*/ int readTimeout, // NEARLY OBSOLETE!
public void updateHrefInfo( String webappPath, int nthreads, // NEARLY OBSOLETE!
boolean incremental, HrefValidationProgress progress) // NEARLY OBSOLETE!
boolean validateExternal, throws AVMNotFoundException, // NEARLY OBSOLETE!
int connectTimeout, SocketException, // NEARLY OBSOLETE!
int readTimeout, SSLException, // NEARLY OBSOLETE!
int nthreads, LinkValidationAbortedException; // NEARLY OBSOLETE!
HrefValidationProgress progress)
throws AVMNotFoundException,
SocketException,
SSLException,
LinkValidationAbortedException;
} }

View File

@@ -259,7 +259,7 @@ public class AVMRemoteLocal implements AVMRemote
*/ */
public int getLatestSnapshotID(String storeName) public int getLatestSnapshotID(String storeName)
{ {
return 0; return fService.getLatestSnapshotID(storeName);
} }
/* (non-Javadoc) /* (non-Javadoc)

File diff suppressed because it is too large Load Diff

View File

@@ -602,6 +602,10 @@ public interface AVMService
/** /**
* Get the latest snapshot ID of a store. * Get the latest snapshot ID of a store.
* Note: All stores have at least one snapshot ID: 0;
* this is the "empty" snapshot taken when
* the store is first created.
*
* @param storeName The store name. * @param storeName The store name.
* @return The ID of the latest extant version of the store. * @return The ID of the latest extant version of the store.
* @throws AVMNotFoundException * @throws AVMNotFoundException
@@ -611,6 +615,17 @@ public interface AVMService
/** /**
* Snapshot the given AVMStore. * Snapshot the given AVMStore.
* When files have been modified since the previous snapshot,
* a new snapshot version is created; otherwise, no extra
* snapshot is actually taken.
* <p>
* When no snapshot is actually taken, but either 'tag'
* or 'store' are non-null, they will override the value for
* the last snapshot (i.e.: the old values will be discarded);
* however, if both 'tag' and 'description' are null then
* invoking createSnapshot when no files have been modified
* becomes a true no-op.
*
* @param store The name of the AVMStore to snapshot. * @param store The name of the AVMStore to snapshot.
* @param tag The short description. * @param tag The short description.
* @param description The thick description. * @param description The thick description.