[SEARCH-1744] DocRouter improvements, refactoring + 95% unit test coverage

This commit is contained in:
agazzarini
2019-07-23 16:18:14 +02:00
parent 10d52cdb80
commit c38b7e3df7
26 changed files with 1992 additions and 263 deletions

View File

@@ -18,31 +18,45 @@
*/ */
package org.alfresco.solr.tracker; package org.alfresco.solr.tracker;
/*
* @author Joel
*/
import org.alfresco.solr.client.Acl; import org.alfresco.solr.client.Acl;
import org.alfresco.solr.client.Node; import org.alfresco.solr.client.Node;
/**
* Nodes and access control lists are grouped by their ACL ID.
* This places the nodes together with all the access control information required to determine the access to a node in the same shard.
* Both the nodes and access control information are sharded. The overall index size will be smaller than other methods as the ACL index information is not duplicated in every shard.
* Also, the ACL count is usually much smaller than the node count.
*
* This method is beneficial if you have lots of ACLs and the documents are evenly distributed over those ACLs.
* For example, if you have many Share sites, nodes and ACLs are assigned to shards randomly based on the ACL and the documents to which it applies.
*
* The node distribution may be uneven as it depends how many nodes share ACLs.
* To use this method, when creating a shard add a new configuration property:
*
* <ul>
* <li>shard.method=MOD_ACL_ID</li>
* <li>shard.instance=&lt;shard.instance></li>
* <li>shard.count=&lt;shard.count></li>
* </ul>
*
* @see <a href="https://docs.alfresco.com/search-enterprise/concepts/solr-shard-approaches.html">Search Services sharding methods</a>
*/
public class ACLIDModRouter implements DocRouter public class ACLIDModRouter implements DocRouter
{ {
@Override @Override
public boolean routeAcl(int shardCount, int shardInstance, Acl acl) { public Boolean routeAcl(int shardCount, int shardInstance, Acl acl)
if(shardCount <= 1) { {
return true; return shardCount <= 1 || route(acl.getId(), shardCount, shardInstance);
}
return acl.getId() % shardCount == shardInstance;
} }
@Override @Override
public boolean routeNode(int shardCount, int shardInstance, Node node) { public Boolean routeNode(int shardCount, int shardInstance, Node node)
if(shardCount <= 1) { {
return true; return (shardCount <= 1) || route(node.getAclId() , shardCount, shardInstance);
} }
//Route the node based on the mod of the aclId private boolean route(long id, int shardCount, int shardInstance)
return node.getAclId() % shardCount == shardInstance; {
return id % shardCount == shardInstance;
} }
} }

View File

@@ -22,31 +22,35 @@ import org.apache.solr.common.util.Hash;
import org.alfresco.solr.client.Node; import org.alfresco.solr.client.Node;
import org.alfresco.solr.client.Acl; import org.alfresco.solr.client.Acl;
/**
/* * Nodes are evenly distributed over the shards at random based on the murmur hash of the ACL ID.
* @author Joel * To use this method, when creating a shard add a new configuration property:
*
* <ul>
* <li>shard.method=ACL_ID</li>
* <li>shard.instance=&lt;shard.instance></li>
* <li>shard.count=&lt;shard.count></li>
* </ul>
*
* @see <a href="https://docs.alfresco.com/search-enterprise/concepts/solr-shard-approaches.html">Search Services sharding methods</a>
*/ */
public class ACLIDMurmurRouter implements DocRouter public class ACLIDMurmurRouter implements DocRouter
{ {
@Override @Override
public boolean routeAcl(int numShards, int shardInstance, Acl acl) { public Boolean routeAcl(int numShards, int shardInstance, Acl acl)
if(numShards <= 1) { {
return true; return (numShards <= 1) || route(acl.getId(), numShards, shardInstance);
}
String s = Long.toString(acl.getId());
return (Math.abs(Hash.murmurhash3_x86_32(s, 0, s.length(), 77)) % numShards) == shardInstance;
} }
@Override @Override
public boolean routeNode(int numShards, int shardInstance, Node node) { public Boolean routeNode(int numShards, int shardInstance, Node node)
if(numShards <= 1) { {
return true; return (numShards <= 1) || route(node.getAclId(), numShards, shardInstance);
} }
//Route the node based on the murmur hash of the aclId private boolean route(long id, int numShards, int shardInstance)
String s = Long.toString(node.getAclId()); {
return (Math.abs(Hash.murmurhash3_x86_32(s, 0, s.length(), 77)) % numShards) == shardInstance; String value = Long.toString(id);
return (Math.abs(Hash.murmurhash3_x86_32(value, 0, value.length(), 77)) % numShards) == shardInstance;
} }
} }

View File

@@ -0,0 +1,69 @@
package org.alfresco.solr.tracker;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* A Composable {@link DocRouter} is a document router that can be used standalone or nested in a primary-fallback
* composite document routing strategy.
* The main reason why we need this marker supertype is because the return value is different depending on how the
* document router is used:
*
* <ul>
* <li>Standalone or leaf in a primary-fallback chain: the method will return true (node accepted) or false (node not accepted)</li>
* <li>
* Primary routing strategy in a composite primary-fallback chain:
* the method will return true/false if the node is accepted/refused and null
* if a failure is met. In this way the {@link DocRouterWithFallback} can route the request to the fallback strategy.
* </li>
* </ul>
*
* @author agazzarini
*/
public abstract class ComposableDocRouter implements DocRouter
{
protected final Logger logger = LoggerFactory.getLogger(getClass());
private final boolean isRunningInStandaloneModeOrIsLeaf;
/**
* Builds a doc router istance with the given mode (standalone or not).
*
* @param standaloneOrLeafMode a flag indicating the active mode of this router.
*/
ComposableDocRouter(boolean standaloneOrLeafMode)
{
this.isRunningInStandaloneModeOrIsLeaf = standaloneOrLeafMode;
}
ComposableDocRouter()
{
this(true);
}
/**
* Properly handles the return value of this doc router.
* The return value is different depending on how the document router is used:
*
* <ul>
* <li>Standalone or leaf in a primary-fallback chain: the method will return true (node accepted) or false (node not accepted)</li>
* <li>
* Primary routing strategy in a composite primary-fallback chain:
* the method will return true/false if the node is accepted/refused and null
* if a failure is met. In this way the {@link DocRouterWithFallback} can route the request to the fallback strategy.
* </li>
* </ul>
* @return true/false or true/exception depending on the active mode of this router.
*/
Boolean negativeReturnValue()
{
return isRunningInStandaloneModeOrIsLeaf ? false : null;
}
protected void debug(String message, Object ... params)
{
if (logger.isDebugEnabled())
{
logger.debug(message, params);
}
}
}

View File

@@ -18,18 +18,32 @@
*/ */
package org.alfresco.solr.tracker; package org.alfresco.solr.tracker;
import org.apache.solr.common.util.Hash;
import org.alfresco.solr.client.Node; import org.alfresco.solr.client.Node;
import org.alfresco.solr.client.Acl; import org.alfresco.solr.client.Acl;
import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicLong; import java.util.concurrent.atomic.AtomicLong;
/**
/* * This routes documents within specific DBID ranges to specific shards.
* @author Joel * It adds new shards to the cluster without requiring a reindex.
* The access control information is duplicated in each shard.
* DBID range sharding is the only option to offer auto-scaling as opposed to defining your exact shard count at the start.
* All the other sharding methods require repartitioning in some way.
*
* For each shard, you specify the range of DBIDs to be included. As your repository grows you can add shards.
*
* To use this method, when creating a shard add a new configuration property:
*
* <ul>
* <li>shard.method=DB_ID_RANGE</li>
* <li>shard.range=0-20000000</li>
* <li>shard.instance=&lt;shard.instance></li>
* </ul>
*
* @author joel
* @see <a href="https://docs.alfresco.com/search-enterprise/concepts/solr-shard-approaches.html">Search Services sharding methods</a>
*/ */
public class DBIDRangeRouter implements DocRouter public class DBIDRangeRouter implements DocRouter
{ {
private long startRange; private long startRange;
@@ -37,52 +51,57 @@ public class DBIDRangeRouter implements DocRouter
private AtomicBoolean expanded = new AtomicBoolean(false); private AtomicBoolean expanded = new AtomicBoolean(false);
private AtomicBoolean initialized = new AtomicBoolean(false); private AtomicBoolean initialized = new AtomicBoolean(false);
public DBIDRangeRouter(long startRange, long endRange) { public DBIDRangeRouter(long startRange, long endRange)
{
this.startRange = startRange; this.startRange = startRange;
this.expandableRange = new AtomicLong(endRange); this.expandableRange = new AtomicLong(endRange);
} }
public void setEndRange(long endRange) { public void setEndRange(long endRange)
{
expandableRange.set(endRange); expandableRange.set(endRange);
} }
public void setExpanded(boolean expanded) { public void setExpanded(boolean expanded)
{
this.expanded.set(expanded); this.expanded.set(expanded);
} }
public void setInitialized(boolean initialized) { public void setInitialized(boolean initialized)
{
this.initialized.set(initialized); this.initialized.set(initialized);
} }
public boolean getInitialized() { public boolean getInitialized()
{
return this.initialized.get(); return this.initialized.get();
} }
public long getEndRange() { public long getEndRange()
{
return expandableRange.longValue(); return expandableRange.longValue();
} }
public long getStartRange() { public long getStartRange()
{
return this.startRange; return this.startRange;
} }
public boolean getExpanded() { public boolean getExpanded()
{
return this.expanded.get(); return this.expanded.get();
} }
@Override @Override
public boolean routeAcl(int shardCount, int shardInstance, Acl acl) { public Boolean routeAcl(int shardCount, int shardInstance, Acl acl)
//When routing by DBID range, all acls go to all shards. {
return true; return true;
} }
@Override @Override
public boolean routeNode(int shardCount, int shardInstance, Node node) { public Boolean routeNode(int shardCount, int shardInstance, Node node)
{
long dbid = node.getId(); long dbid = node.getId();
if(dbid >= startRange && dbid < expandableRange.longValue()) { return dbid >= startRange && dbid < expandableRange.longValue();
return true;
} else {
return false;
}
} }
} }

View File

@@ -22,29 +22,42 @@ import org.apache.solr.common.util.Hash;
import org.alfresco.solr.client.Node; import org.alfresco.solr.client.Node;
import org.alfresco.solr.client.Acl; import org.alfresco.solr.client.Acl;
/**
/* * DBID murmur hash based document router.
* This method is available in Alfresco Search Services 1.0 and later versions and is the default sharding option in Solr 6.
* Nodes are evenly distributed over the shards at random based on the murmur hash of the DBID.
* The access control information is duplicated in each shard.
* The distribution of nodes over each shard is very even and shards grow at the same rate.
* Also, this is the fall back method if any other sharding information is unavailable.
*
* To use this method, when creating a shard add a new configuration property:
*
* <ul>
* <li>shard.method=DB_ID</li>
* <li>shard.instance=&lt;shard.instance></li>
* <li>shard.count=&lt;shard.count></li>
* </ul>
*
* @author Joel * @author Joel
* @see <a href="https://docs.alfresco.com/search-enterprise/concepts/solr-shard-approaches.html">Search Services sharding methods</a>
*/ */
public class DBIDRouter implements DocRouter public class DBIDRouter implements DocRouter
{ {
@Override @Override
public boolean routeAcl(int shardCount, int shardInstance, Acl acl) { public Boolean routeAcl(int shardCount, int shardInstance, Acl acl)
//When routing by DBID, all acls go to all shards. {
return true; return true;
} }
@Override @Override
public boolean routeNode(int shardCount, int shardInstance, Node node) { public Boolean routeNode(int shardCount, int shardInstance, Node node)
{
if(shardCount <= 1) { if(shardCount <= 1)
{
return true; return true;
} }
//Route the node based on nodeId String dbid = Long.toString(node.getId());
return (Math.abs(Hash.murmurhash3_x86_32(dbid, 0, dbid.length(), 77)) % shardCount) == shardInstance;
String s = Long.toString(node.getId());
return (Math.abs(Hash.murmurhash3_x86_32(s, 0, s.length(), 77)) % shardCount) == shardInstance;
} }
} }

View File

@@ -19,19 +19,44 @@
package org.alfresco.solr.tracker; package org.alfresco.solr.tracker;
import org.alfresco.util.ISO8601DateFormat; import org.alfresco.util.ISO8601DateFormat;
import org.apache.solr.common.util.Hash;
import org.alfresco.solr.client.Node; import org.alfresco.solr.client.Node;
import org.alfresco.solr.client.Acl; import org.alfresco.solr.client.Acl;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import java.util.Calendar;
import java.util.Date; import java.util.Date;
import java.util.GregorianCalendar; import java.util.GregorianCalendar;
/* /**
* @author Joel * The date-based sharding assigns dates sequentially through shards based on the month.
*/ * For example: If there are 12 shards, each month would be assigned sequentially to each shard, wrapping round and
* starting again for each year.
* The non-random assignment facilitates easier shard management - dropping shards or scaling out replication for some
* date range.
* Typical ageing strategies could be based on the created date or destruction date.
*
* Each shard contains copies of all the ACL information, so this information is replicated in each shard.
* However, if the property is not present on a node, sharding falls back to the {@link DBIDRouter} to randomly distribute
* these nodes.
*
* To use this method, when creating a shard add the new configuration properties:
*
* <ul>
* <li>shard.key=exif:dateTimeOriginal</li>
* <li>shard.method=DATE</li>
* <li>shard.instance=&lt;shard.instance></li>
* <li>shard.count=&lt;shard.count></li>
* </ul>
*
* Months can be grouped together, for example, by quarter. Each quarter of data would be assigned sequentially through the available shards.
*
* <ul>
* <li>shard.date.grouping=3</li>
* </ul>
*
* @see <a href="https://docs.alfresco.com/search-enterprise/concepts/solr-shard-approaches.html">Search Services sharding methods</a>
*/
public class DateMonthRouter implements DocRouter public class DateMonthRouter implements DocRouter
{ {
protected final static Logger log = LoggerFactory.getLogger(DateMonthRouter.class); protected final static Logger log = LoggerFactory.getLogger(DateMonthRouter.class);
@@ -43,38 +68,52 @@ public class DateMonthRouter implements DocRouter
* Creates a date month router * Creates a date month router
* @param groupparam - the number of months that should be grouped together on a shard before moving to use the next shard in sequence * @param groupparam - the number of months that should be grouped together on a shard before moving to use the next shard in sequence
*/ */
public DateMonthRouter(String groupparam) { public DateMonthRouter(String groupparam)
try { {
try
{
this.grouping = Integer.parseInt(groupparam); this.grouping = Integer.parseInt(groupparam);
} catch (NumberFormatException e) { }
catch (NumberFormatException e)
{
log.error("shard.date.grouping needs to be a valid integer.", e); log.error("shard.date.grouping needs to be a valid integer.", e);
throw e; throw e;
} }
} }
@Override @Override
public boolean routeAcl(int numShards, int shardInstance, Acl acl) { public Boolean routeAcl(int numShards, int shardInstance, Acl acl)
{
return true; return true;
} }
@Override @Override
public boolean routeNode(int numShards, int shardInstance, Node node) { public Boolean routeNode(int numShards, int shardInstance, Node node)
if(numShards <= 1) { {
if(numShards <= 1)
{
return true; return true;
} }
String ISO8601Date = node.getShardPropertyValue(); String ISO8601Date = node.getShardPropertyValue();
if(ISO8601Date == null) { if(ISO8601Date == null)
{
return dbidRouter.routeNode(numShards, shardInstance, node); return dbidRouter.routeNode(numShards, shardInstance, node);
} }
Date date = ISO8601DateFormat.parse(ISO8601Date); try
GregorianCalendar cal = new GregorianCalendar(); {
cal.setTime(date); Date date = ISO8601DateFormat.parse(ISO8601Date);
int month = cal.get(cal.MONTH); GregorianCalendar cal = new GregorianCalendar();
int year = cal.get(cal.YEAR); cal.setTime(date);
return ((((year * 12) + month)/grouping) % numShards) == shardInstance; int month = cal.get(Calendar.MONTH);
int year = cal.get(Calendar.YEAR);
return ((((year * 12) + month) / grouping) % numShards) == shardInstance;
}
catch (Exception exception)
{
return dbidRouter.routeNode(numShards, shardInstance, node);
}
} }
} }

View File

@@ -19,34 +19,42 @@
package org.alfresco.solr.tracker; package org.alfresco.solr.tracker;
import org.alfresco.util.ISO8601DateFormat; import org.alfresco.util.ISO8601DateFormat;
import org.apache.solr.common.util.Hash;
import org.alfresco.solr.client.Node; import org.alfresco.solr.client.Node;
import org.alfresco.solr.client.Acl; import org.alfresco.solr.client.Acl;
import java.util.Calendar;
import java.util.Date; import java.util.Date;
import java.util.GregorianCalendar; import java.util.GregorianCalendar;
/* /**
* @author Joel * This {@link DocRouter} has been deprecated because it is a special case of {@link DateMonthRouter} with a grouping
*/ * parameter equal to 3.
*
* @see DateMonthRouter
* @see <a href="https://docs.alfresco.com/search-enterprise/concepts/solr-shard-approaches.html">Search Services sharding methods</a>
*/
@Deprecated
public class DateQuarterRouter implements DocRouter public class DateQuarterRouter implements DocRouter
{ {
public boolean routeAcl(int numShards, int shardInstance, Acl acl) { @Override
public Boolean routeAcl(int numShards, int shardInstance, Acl acl)
{
return true; return true;
} }
public boolean routeNode(int numShards, int shardInstance, Node node) { public Boolean routeNode(int numShards, int shardInstance, Node node)
if(numShards <= 1) { {
if(numShards <= 1)
{
return true; return true;
} }
String ISO8601Date = node.getShardPropertyValue(); String ISO8601Date = node.getShardPropertyValue();
//TODO: we can parse the string to make this more efficient rather then creating a calendar.
Date date = ISO8601DateFormat.parse(ISO8601Date); Date date = ISO8601DateFormat.parse(ISO8601Date);
GregorianCalendar cal = new GregorianCalendar(); GregorianCalendar calendar = new GregorianCalendar();
cal.setTime(date); calendar.setTime(date);
int month = cal.get(cal.MONTH); int month = calendar.get(Calendar.MONTH);
int year = cal.get(cal.YEAR); int year = calendar.get(Calendar.YEAR);
return Math.ceil(((year * 12) + (month+1)) / 3) % numShards == shardInstance; return Math.ceil(((year * 12) + (month+1)) / 3) % numShards == shardInstance;
} }
} }

View File

@@ -18,16 +18,44 @@
*/ */
package org.alfresco.solr.tracker; package org.alfresco.solr.tracker;
import org.alfresco.solr.client.Node; import org.alfresco.solr.client.Node;
import org.alfresco.solr.client.Acl; import org.alfresco.solr.client.Acl;
/* /**
* This tracks two things: transactions and metadata nodes * Defines the logic used for distributing data across the shards.
* A {@link DocRouter} implementor instance is properly configured on each shard.
* Each time an incoming document D arrives to the shard S, the DocRouter (on the S instance)
* will be used for deciding if D needs to be managed (i.e. indexed) by S.
*
* The {@link DocRouter} contract requires a concrete implementor to provide the logic for
* understanding:
*
* <li>
* <ul>If an incoming ACL belongs to the receiving shard or not</ul>
* <ul>If an incoming Node belongs to the receiving shard or not</ul>
* </li>
*
* @author Joel * @author Joel
*/ */
public interface DocRouter public interface DocRouter
{ {
public boolean routeAcl(int shardCount, int shardInstance, Acl acl); /**
public boolean routeNode(int shardCount, int shardInstance, Node node); * Checks if the incoming ACL document must be indexed on this shard.
*
* @param shardCount the total shard count.
* @param shardInstance the owning shard instance (i.e. instance number).
* @param acl the ACL.
* @return true if the ACL must be indexed in the shard which owns this {@link DocRouter} instance, false otherwise.
*/
Boolean routeAcl(int shardCount, int shardInstance, Acl acl);
/**
* Checks if the incoming Node must be indexed on this shard.
*
* @param shardCount the total shard count.
* @param shardInstance the owning shard instance (i.e. instance number).
* @param node the {@link Node} instance.
* @return true if the {@link Node} instance must be indexed in the shard which owns this {@link DocRouter} instance, false otherwise.
*/
Boolean routeNode(int shardCount, int shardInstance, Node node);
} }

View File

@@ -24,55 +24,59 @@ import org.slf4j.LoggerFactory;
import java.util.Properties; import java.util.Properties;
/* /**
* Routing strategy Factory.
*
* @author Joel * @author Joel
*/ */
public class DocRouterFactory public class DocRouterFactory
{ {
protected final static Logger log = LoggerFactory.getLogger(DocRouterFactory.class); private final static Logger LOGGER = LoggerFactory.getLogger(DocRouterFactory.class);
public static DocRouter getRouter(Properties properties, ShardMethodEnum method) { public static DocRouter getRouter(Properties properties, ShardMethodEnum method)
{
switch(method) { switch(method)
{
case DB_ID: case DB_ID:
log.info("Sharding via DB_ID"); LOGGER.info("Sharding via DB_ID");
return new DBIDRouter(); return new DBIDRouter();
case DB_ID_RANGE: case DB_ID_RANGE:
//
if(properties.containsKey("shard.range")) if(properties.containsKey("shard.range"))
{ {
log.info("Sharding via DB_ID_RANGE"); LOGGER.info("Sharding via DB_ID_RANGE");
String[] pair =properties.getProperty("shard.range").split("-"); String[] pair =properties.getProperty("shard.range").split("-");
long start = Long.parseLong(pair[0]); long start = Long.parseLong(pair[0]);
long end = Long.parseLong(pair[1]); long end = Long.parseLong(pair[1]);
return new DBIDRangeRouter(start, end); return new DBIDRangeRouter(start, end);
} }
case ACL_ID: case ACL_ID:
log.info("Sharding via ACL_ID"); LOGGER.info("Sharding via ACL_ID");
return new ACLIDMurmurRouter(); return new ACLIDMurmurRouter();
case MOD_ACL_ID: case MOD_ACL_ID:
log.info("Sharding via MOD_ACL_ID"); LOGGER.info("Sharding via MOD_ACL_ID");
return new ACLIDModRouter(); return new ACLIDModRouter();
case DATE: case DATE:
log.info("Sharding via DATE"); LOGGER.info("Sharding via DATE");
return new DateMonthRouter(properties.getProperty("shard.date.grouping", "1")); return new DateMonthRouter(properties.getProperty("shard.date.grouping", "1"));
case PROPERTY: case PROPERTY:
log.info("Sharding via PROPERTY"); LOGGER.info("Sharding via PROPERTY");
return new PropertyRouter(properties.getProperty("shard.regex", "")); return new PropertyRouter(properties.getProperty("shard.regex", ""));
case LAST_REGISTERED_INDEXING_SHARD: case LAST_REGISTERED_INDEXING_SHARD:
log.info("Sharding via LAST_REGISTERED_INDEXING_SHARD"); LOGGER.info("Sharding via LAST_REGISTERED_INDEXING_SHARD");
return new LastRegisteredShardRouter(); return new ExplicitShardIdWithStaticPropertyRouter();
case EXPLICIT_ID_FALLBACK_LRIS: case EXPLICIT_ID_FALLBACK_LRIS:
log.info("Sharding via EXPLICIT_ID_FALLBACK_LRIS"); LOGGER.info("Sharding via EXPLICIT_ID_FALLBACK_LRIS");
return new ExplicitRouter(new LastRegisteredShardRouter()); return new DocRouterWithFallback(
new ExplicitShardIdWithDynamicPropertyRouter(false),
new ExplicitShardIdWithStaticPropertyRouter());
case EXPLICIT_ID: case EXPLICIT_ID:
log.info("Sharding via EXPLICIT_ID"); LOGGER.info("Sharding via EXPLICIT_ID");
return new ExplicitRouter(new DBIDRouter()); return new DocRouterWithFallback(
new ExplicitShardIdWithDynamicPropertyRouter(false),
new DBIDRouter());
default: default:
log.info("Sharding via DB_ID (default)"); LOGGER.warn("WARNING! Unknown/unsupported sharding method ({}). System will fallback to DB_ID", method);
return new DBIDRouter(); return new DBIDRouter();
} }
} }
} }

View File

@@ -0,0 +1,48 @@
package org.alfresco.solr.tracker;
import org.alfresco.solr.client.Acl;
import org.alfresco.solr.client.Node;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.Objects;
import static java.util.Optional.ofNullable;
/**
* A composable {@link DocRouter} which consists of
*
* <ul>
* <li>A primary routing strategy</li>
* <li>A fallback strategy used in case of failure of the strategy above</li>
* </ul>
*
* @author agazzarini
*/
public class DocRouterWithFallback implements DocRouter
{
private final static Logger LOGGER = LoggerFactory.getLogger(ExplicitShardIdWithDynamicPropertyRouter.class);
private final DocRouter primaryStrategy;
private final DocRouter fallbackStrategy;
public DocRouterWithFallback(DocRouter primaryStrategy, DocRouter fallbackStrategy)
{
this.primaryStrategy = Objects.requireNonNull(primaryStrategy);
this.fallbackStrategy = Objects.requireNonNull(fallbackStrategy);
}
@Override
public Boolean routeAcl(int shardCount, int shardInstance, Acl acl)
{
return primaryStrategy.routeAcl(shardCount, shardInstance, acl);
}
@Override
public Boolean routeNode(int shardCount, int shardInstance, Node node)
{
return ofNullable(primaryStrategy.routeNode(shardCount, shardInstance, node))
.orElseGet(() -> ofNullable(fallbackStrategy.routeNode(shardCount, shardInstance, node))
.orElse(false));
}
}

View File

@@ -1,78 +0,0 @@
/*
* Copyright (C) 2005-2019 Alfresco Software Limited.
*
* This file is part of Alfresco
*
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
*/
package org.alfresco.solr.tracker;
import org.alfresco.solr.client.Acl;
import org.alfresco.solr.client.Node;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* Routes a document only if the shardInstance matches the provided shardId
*/
public class ExplicitRouter implements DocRouter {
protected final static Logger log = LoggerFactory.getLogger(ExplicitRouter.class);
private final DocRouter fallbackRouter;
public ExplicitRouter(DocRouter fallbackRouter) {
this.fallbackRouter = fallbackRouter;
}
@Override
public boolean routeAcl(int shardCount, int shardInstance, Acl acl) {
//all acls go to all shards.
return true;
}
@Override
public boolean routeNode(int shardCount, int shardInstance, Node node) {
String shardBy = node.getShardPropertyValue();
if (shardBy != null && !shardBy.isEmpty())
{
try
{
int shardid = Integer.parseInt(shardBy);
return shardid == shardInstance;
}
catch (NumberFormatException e)
{
if (log.isDebugEnabled())
{
log.debug("Shard "+shardInstance+" EXPLICIT_ID routing specified but failed to parse a shard property value ("+shardBy+") for node "+node.getNodeRef());
}
}
}
else
{
if (log.isDebugEnabled())
{
log.debug("Shard "+shardInstance+" EXPLICIT_ID routing specified but no shard id property found for node "+node.getNodeRef());
}
}
if (log.isDebugEnabled())
{
log.debug("Shard "+shardInstance+" falling back to DBID routing for node "+node.getNodeRef());
}
return fallbackRouter.routeNode(shardCount, shardInstance, node);
}
}

View File

@@ -0,0 +1,75 @@
/*
* Copyright (C) 2005-2019 Alfresco Software Limited.
*
* This file is part of Alfresco
*
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
*/
package org.alfresco.solr.tracker;
import org.alfresco.solr.client.Acl;
import org.alfresco.solr.client.Node;
/**
* Routes a document only if the shardInstance matches the provided shardId.
* The access control information is duplicated in each shard.
* The target shard identifier is provided using a (configurable) property of the incoming document.
*
* @author agazzarini
* @see <a href="https://docs.alfresco.com/search-enterprise/concepts/solr-shard-approaches.html">Search Services sharding methods</a>
*/
public class ExplicitShardIdWithDynamicPropertyRouter extends ComposableDocRouter
{
public ExplicitShardIdWithDynamicPropertyRouter()
{
super();
}
public ExplicitShardIdWithDynamicPropertyRouter(boolean isInStandaloneMode)
{
super(isInStandaloneMode);
}
@Override
public Boolean routeAcl(int shardCount, int shardInstance, Acl acl)
{
return true;
}
@Override
public Boolean routeNode(int shardCount, int shardInstance, Node node)
{
String shardBy = node.getShardPropertyValue();
if (shardBy == null || shardBy.trim().length() == 0)
{
debug("Shard {}: EXPLICIT_ID routing specified but no shard id property found for node {}", shardInstance, node.getNodeRef());
return negativeReturnValue();
}
try
{
int shardid = Integer.parseInt(shardBy.trim());
return shardid == shardInstance;
}
catch (NumberFormatException exception)
{
debug("Shard {} EXPLICIT_ID routing specified but failed to parse a shard property value ({}) for node {}",
shardInstance,
shardBy,
node.getNodeRef());
return negativeReturnValue();
}
}
}

View File

@@ -24,39 +24,43 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
/** /**
* Routes a document only if the explicitShardId matches the provided shardId * Routes the incoming nodes (not ACLs!) on the last registered indexing shard (LRIS).
* The access control information is duplicated in each shard.
* *
* @author Elia * @author Elia
* @author agazzarini
*/ */
public class LastRegisteredShardRouter implements DocRouter public class ExplicitShardIdWithStaticPropertyRouter extends ComposableDocRouter
{ {
private final static Logger log = LoggerFactory.getLogger(ExplicitShardIdWithStaticPropertyRouter.class);
protected final static Logger log = LoggerFactory.getLogger(ExplicitRouter.class); public ExplicitShardIdWithStaticPropertyRouter()
public LastRegisteredShardRouter()
{ {
super();
}
public ExplicitShardIdWithStaticPropertyRouter(boolean isInStandaloneMode)
{
super(isInStandaloneMode);
} }
@Override @Override
public boolean routeAcl(int shardCount, int shardInstance, Acl acl) public Boolean routeAcl(int shardCount, int shardInstance, Acl acl)
{ {
//all acls go to all shards.
return true; return true;
} }
@Override @Override
public boolean routeNode(int shardCount, int shardInstance, Node node) public Boolean routeNode(int shardCount, int shardInstance, Node node)
{ {
Integer explicitShardId = node.getExplicitShardId(); Integer explicitShardId = node.getExplicitShardId();
if (explicitShardId == null) if (explicitShardId == null)
{ {
log.error("explicitShardId is not set for node " + node.getNodeRef()); debug("ExplicitShardId property is not set for node {} ", node.getNodeRef());
return false; return negativeReturnValue();
} }
return explicitShardId.equals(shardInstance); return explicitShardId.equals(shardInstance);
} }
} }

View File

@@ -30,47 +30,67 @@ import java.util.regex.Pattern;
/** /**
* Routes based on a text property field. * Routes based on a text property field.
* In this method, the value of some property is hashed and this hash is used to assign the node to a random shard.
* All nodes with the same property value will be assigned to the same shard.
* Each shard will duplicate all the ACL information.
*
* To use this method, when creating a shard add the new configuration properties:
*
* <ul>
* <li>shard.key=cm:creator</li>
* <li>shard.method=PROPERTY</li>
* <li>shard.instance=&lt;shard.instance></li>
* <li>shard.count=&lt;shard.count></li>
* </ul>
*
* It is possible to extract a part of the property value to use for sharding using a regular expression,
* for example, a year at the start of a string:
*
* <ul>
* <li>shard.regex=^\d{4}</li>
* </ul>
* *
* @author Gethin James * @author Gethin James
* @see <a href="https://docs.alfresco.com/search-enterprise/concepts/solr-shard-approaches.html">Search Services sharding methods</a>
*/ */
public class PropertyRouter implements DocRouter public class PropertyRouter implements DocRouter
{ {
protected final static Logger log = LoggerFactory.getLogger(PropertyRouter.class); protected final static Logger log = LoggerFactory.getLogger(PropertyRouter.class);
Pattern pattern = null; Pattern pattern;
//Fallback to DB_ID routing //Fallback to DB_ID routing
private DocRouter fallback = DocRouterFactory.getRouter(null, ShardMethodEnum.DB_ID); DocRouter fallback = DocRouterFactory.getRouter(null, ShardMethodEnum.DB_ID);
public PropertyRouter(String propertyRegEx) public PropertyRouter(String propertyRegEx)
{ {
if (propertyRegEx != null && !propertyRegEx.isEmpty()) if (propertyRegEx != null && propertyRegEx.trim().length() > 0)
{ {
pattern = Pattern.compile(propertyRegEx); pattern = Pattern.compile(propertyRegEx.trim());
} }
} }
@Override @Override
public boolean routeAcl(int shardCount, int shardInstance, Acl acl) public Boolean routeAcl(int shardCount, int shardInstance, Acl acl)
{ {
return true; return true;
} }
@Override @Override
public boolean routeNode(int shardCount, int shardInstance, Node node) public Boolean routeNode(int shardCount, int shardInstance, Node node)
{ {
if(shardCount <= 1) if(shardCount <= 1)
{ {
return true; return true;
} }
String shardBy = node.getShardPropertyValue();
String shardBy = node.getShardPropertyValue();
if (shardBy !=null && pattern != null) if (shardBy !=null && pattern != null)
{ {
try try
{ {
Matcher matcher = pattern.matcher(shardBy); Matcher matcher = pattern.matcher(shardBy);
if (matcher.find() && !matcher.group(1).isEmpty()) if (matcher.find() && matcher.groupCount() > 1 && !matcher.group(1).isEmpty())
{ {
shardBy = matcher.group(1); shardBy = matcher.group(1);
} }

View File

@@ -65,6 +65,7 @@ import java.util.LinkedHashMap;
import java.util.List; import java.util.List;
import java.util.Locale; import java.util.Locale;
import java.util.Map; import java.util.Map;
import java.util.Random;
import java.util.Set; import java.util.Set;
import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicLong; import java.util.concurrent.atomic.AtomicLong;
@@ -104,14 +105,16 @@ import org.apache.solr.response.SolrQueryResponse;
import org.apache.solr.update.AddUpdateCommand; import org.apache.solr.update.AddUpdateCommand;
import org.apache.solr.update.CommitUpdateCommand; import org.apache.solr.update.CommitUpdateCommand;
/** /**
* Alfresco Solr Utility class which provide helper methods. * Alfresco Solr Test Utility class which provide helper methods.
* @author Michael Suzuki
* *
* @author Michael Suzuki
* @author Andrea Gazzarini
*/ */
public class AlfrescoSolrUtils public class AlfrescoSolrUtils
{ {
public static final String TEST_NAMESPACE = "http://www.alfresco.org/test/solrtest"; public static final String TEST_NAMESPACE = "http://www.alfresco.org/test/solrtest";
public static long MAX_WAIT_TIME = 80000; public static long MAX_WAIT_TIME = 80000;
public static Random RANDOMIZER = new Random();
/** /**
* Get transaction. * Get transaction.
@@ -141,6 +144,25 @@ public class AlfrescoSolrUtils
return transaction; return transaction;
} }
/**
* Returns a pseudo-random number of shards always greater than 1.
*
* @return a pseudo-random number of shards always greater than 1.
*/
public static int randomShardCountGreaterThanOne()
{
return randomPositiveInteger() + 2;
}
/**
* Returns a pseudo-random number of shards always greater than 1.
*
* @return a pseudo-random number of shards always greater than 1.
*/
public static int randomPositiveInteger()
{
return RANDOMIZER.nextInt(100);
}
/** /**
* Get a node. * Get a node.

View File

@@ -0,0 +1,131 @@
/*
* Copyright (C) 2005-2014 Alfresco Software Limited.
*
* This file is part of Alfresco
*
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
*/
package org.alfresco.solr.tracker;
import static java.util.Arrays.stream;
import static java.util.stream.IntStream.range;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
import static org.alfresco.solr.AlfrescoSolrUtils.randomPositiveInteger;
import org.alfresco.repo.index.shard.ShardMethodEnum;
import org.alfresco.solr.client.Acl;
import org.alfresco.solr.client.Node;
import org.apache.commons.math3.stat.descriptive.moment.StandardDeviation;
import org.junit.Before;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.mockito.Mock;
import org.mockito.junit.MockitoJUnitRunner;
import java.util.HashMap;
import java.util.Map;
import java.util.Properties;
@RunWith(MockitoJUnitRunner.class)
public class ACLIDMurmurRouterTest
{
private DocRouter router;
@Mock
private Acl acl;
@Mock
private Node node;
@Before
public void setUp()
{
router = DocRouterFactory.getRouter(new Properties(), ShardMethodEnum.ACL_ID);
}
@Test
public void negativeShardCount_shouldAlwaysReturnTrue()
{
// Should never happen
int negativeShardCount = -14;
assertTrue(router.routeAcl(negativeShardCount, 1, acl));
assertTrue(router.routeNode(negativeShardCount, 1, node));
}
@Test
public void zeroShardCount_shouldAlwaysReturnTrue()
{
// Should never happen
int zeroShardCount = 0;
assertTrue(router.routeAcl(zeroShardCount, 1, acl));
assertTrue(router.routeNode(zeroShardCount, 1, node));
}
@Test
public void oneShardInTheCluster_shouldAlwaysReturnTrue()
{
// Should never happen
int zeroShardCount = 0;
assertTrue(router.routeAcl(zeroShardCount, 1, acl));
assertTrue(router.routeNode(zeroShardCount, 1, node));
}
@Test
public void sevenShardsInTheCluster_shouldBalanceNodesAndAcls()
{
int [] shardIdentifiers = range(0,7).toArray();
int shardCount = shardIdentifiers.length;
int howManyDocumentsPerShard = 1000;
// Maps used for validating the data distribution
Map<Integer, Integer> aclDistributionMap = new HashMap<>();
Map<Integer, Integer> nodeDistributionMap = new HashMap<>();
range(0, shardCount * howManyDocumentsPerShard)
.mapToLong(Long::valueOf)
.forEach(id -> {
Acl acl = new Acl(randomPositiveInteger(), id);
Node node = new Node();
node.setAclId(acl.getId());
stream(shardIdentifiers)
.forEach(shardId -> {
if (router.routeAcl(shardCount, shardId, acl))
{
aclDistributionMap.merge(shardId, 1, Integer::sum);
}
if (router.routeNode(shardCount, shardId, node))
{
nodeDistributionMap.merge(shardId, 1, Integer::sum);
}
});
});
StandardDeviation sd = new StandardDeviation();
double aclsDeviation = sd.evaluate(aclDistributionMap.values().stream().mapToDouble(Number::doubleValue).toArray());
double nodesDeviation = sd.evaluate(nodeDistributionMap.values().stream().mapToDouble(Number::doubleValue).toArray());
assertEquals(shardIdentifiers.length, nodeDistributionMap.size());
assertEquals(shardIdentifiers.length, aclDistributionMap.size());
// Asserts the standard deviation of the distribution map is in percentage lesser than 30%
assertTrue(aclDistributionMap.values().toString() + ", SD = " + aclsDeviation, aclsDeviation/(howManyDocumentsPerShard) * 100 < 30);
assertTrue(nodeDistributionMap.values().toString() + ", SD = " + nodesDeviation,nodesDeviation/(howManyDocumentsPerShard) * 100 < 30);
}
}

View File

@@ -0,0 +1,125 @@
/*
* Copyright (C) 2005-2014 Alfresco Software Limited.
*
* This file is part of Alfresco
*
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
*/
package org.alfresco.solr.tracker;
import static java.util.Arrays.stream;
import static java.util.stream.IntStream.range;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
import static org.mockito.Mockito.reset;
import static org.mockito.Mockito.when;
import org.alfresco.repo.index.shard.ShardMethodEnum;
import org.alfresco.solr.client.Acl;
import org.alfresco.solr.client.Node;
import org.junit.Before;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.mockito.Mock;
import org.mockito.junit.MockitoJUnitRunner;
import java.util.HashMap;
import java.util.Map;
import java.util.Properties;
@RunWith(MockitoJUnitRunner.class)
public class AclModCountRouterTest
{
private DocRouter router;
@Mock
private Acl acl;
@Mock
private Node node;
@Before
public void setUp()
{
router = DocRouterFactory.getRouter(new Properties(), ShardMethodEnum.MOD_ACL_ID);
}
@Test
public void negativeShardCount_shouldAlwaysReturnTrue()
{
// Should never happen
int negativeShardCount = -14;
assertTrue(router.routeAcl(negativeShardCount, 1, acl));
assertTrue(router.routeNode(negativeShardCount, 1, node));
}
@Test
public void zeroShardCount_shouldAlwaysReturnTrue()
{
// Should never happen
int zeroShardCount = 0;
assertTrue(router.routeAcl(zeroShardCount, 1, acl));
assertTrue(router.routeNode(zeroShardCount, 1, node));
}
@Test
public void oneShardInTheCluster_shouldAlwaysReturnTrue()
{
// Should never happen
int zeroShardCount = 0;
assertTrue(router.routeAcl(zeroShardCount, 1, acl));
assertTrue(router.routeNode(zeroShardCount, 1, node));
}
@Test
public void sevenShardsInTheCluster_shouldBalanceNodesAndAcls()
{
int [] shardIdentifiers = range(0,7).toArray();
int shardCount = shardIdentifiers.length;
int howManyDocumentsPerShard = 100;
// Maps used for validating the data distribution
Map<Integer, Integer> aclDistributionMap = new HashMap<>();
Map<Integer, Integer> nodeDistributionMap = new HashMap<>();
range(0, shardCount * howManyDocumentsPerShard)
.mapToLong(Long::valueOf)
.forEach(id -> {
when(acl.getId()).thenReturn(id);
when(node.getAclId()).thenReturn(id);
stream(shardIdentifiers)
.forEach(shardId -> {
if (router.routeAcl(shardCount, shardId, acl))
{
aclDistributionMap.merge(shardId, 1, Integer::sum);
}
if (router.routeNode(shardCount, shardId, node))
{
nodeDistributionMap.merge(shardId, 1, Integer::sum);
}
});
reset(acl, node);
});
assertEquals(shardIdentifiers.length, aclDistributionMap.size());
aclDistributionMap.forEach((k, v) -> assertEquals(howManyDocumentsPerShard, v.intValue()));
assertEquals(shardIdentifiers.length, nodeDistributionMap.size());
nodeDistributionMap.forEach((k, v) -> assertEquals(howManyDocumentsPerShard, v.intValue()));
}
}

View File

@@ -0,0 +1,88 @@
/*
* Copyright (C) 2005-2014 Alfresco Software Limited.
*
* This file is part of Alfresco
*
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
*/
package org.alfresco.solr.tracker;
import org.alfresco.repo.index.shard.ShardMethodEnum;
import org.alfresco.solr.client.Acl;
import org.alfresco.solr.client.Node;
import org.junit.Before;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.mockito.Mock;
import org.mockito.junit.MockitoJUnitRunner;
import java.util.Properties;
import java.util.Random;
import static java.util.stream.IntStream.range;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertTrue;
import static org.mockito.Mockito.when;
/**
* {@link DBIDRangeRouter} test case.
*
* @author agazzarini
*/
@RunWith(MockitoJUnitRunner.class)
public class DBIDRangeRouterTest
{
private final Random randomizer = new Random();
private DocRouter router;
@Mock
private Acl acl;
@Mock
private Node node;
@Before
public void setUp()
{
Properties properties = new Properties();
properties.put("shard.range", "200-20000");
router = DocRouterFactory.getRouter(properties, ShardMethodEnum.DB_ID_RANGE);
}
@Test
public void aclsAreReplicatedAcrossShards()
{
range(0, 100).forEach(index -> assertTrue(router.routeAcl(randomizer.nextInt(), randomizer.nextInt(), acl)));
}
@Test
public void outOfBoundsShouldRejectTheNode()
{
when(node.getId()).thenReturn(199L);
assertFalse(router.routeNode(randomizer.nextInt(), randomizer.nextInt(), node));
when(node.getId()).thenReturn(20000L);
assertFalse(router.routeNode(randomizer.nextInt(), randomizer.nextInt(), node));
}
@Test
public void inRange_shouldAcceptTheNode()
{
when(node.getId()).thenReturn(200L);
assertTrue(router.routeNode(randomizer.nextInt(), randomizer.nextInt(), node));
when(node.getId()).thenReturn(543L);
assertTrue(router.routeNode(randomizer.nextInt(), randomizer.nextInt(), node));
}
}

View File

@@ -0,0 +1,131 @@
/*
* Copyright (C) 2005-2014 Alfresco Software Limited.
*
* This file is part of Alfresco
*
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
*/
package org.alfresco.solr.tracker;
import static java.util.Arrays.stream;
import static java.util.stream.IntStream.range;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
import static org.mockito.Mockito.reset;
import static org.mockito.Mockito.when;
import static org.alfresco.solr.AlfrescoSolrUtils.randomPositiveInteger;
import static org.alfresco.solr.AlfrescoSolrUtils.randomShardCountGreaterThanOne;
import org.alfresco.repo.index.shard.ShardMethodEnum;
import org.alfresco.solr.client.Acl;
import org.alfresco.solr.client.Node;
import org.apache.commons.math3.stat.descriptive.moment.StandardDeviation;
import org.junit.Before;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.mockito.Mock;
import org.mockito.junit.MockitoJUnitRunner;
import java.util.HashMap;
import java.util.Map;
import java.util.Properties;
@RunWith(MockitoJUnitRunner.class)
public class DBIDRouterTest
{
private DocRouter router;
@Mock
private Acl acl;
@Mock
private Node node;
@Before
public void setUp()
{
router = DocRouterFactory.getRouter(new Properties(), ShardMethodEnum.DB_ID);
}
@Test
public void negativeShardCount_shouldAlwaysReturnTrue()
{
// Should never happen
int negativeShardCount = -14;
assertTrue(router.routeAcl(negativeShardCount, 1, acl));
assertTrue(router.routeNode(negativeShardCount, 1, node));
}
@Test
public void zeroShardCount_shouldAlwaysReturnTrue()
{
// Should never happen
int zeroShardCount = 0;
assertTrue(router.routeAcl(zeroShardCount, 1, acl));
assertTrue(router.routeNode(zeroShardCount, 1, node));
}
@Test
public void oneShardInTheCluster_shouldAlwaysReturnTrue()
{
// Should never happen
int zeroShardCount = 0;
assertTrue(router.routeAcl(zeroShardCount, 1, acl));
assertTrue(router.routeNode(zeroShardCount, 1, node));
}
@Test
public void aclsAreReplicatedAcrossShards()
{
range(0, 100).forEach(index -> assertTrue(router.routeAcl(randomShardCountGreaterThanOne(), randomPositiveInteger(), acl)));
}
@Test
public void multipleShardsInTheCluster_shouldBalanceNodes()
{
int [] shardIdentifiers = range(0,15).toArray();
int shardCount = shardIdentifiers.length;
int howManyDocumentsPerShard = 10000;
Map<Integer, Integer> nodeDistributionMap = new HashMap<>();
range(0, shardCount * howManyDocumentsPerShard)
.mapToLong(Long::valueOf)
.forEach(id -> {
Node node = new Node();
node.setId(id);
stream(shardIdentifiers)
.forEach(shardId -> {
if (router.routeNode(shardCount, shardId, node))
{
nodeDistributionMap.merge(shardId, 1, Integer::sum);
}
});
});
StandardDeviation sd = new StandardDeviation();
double deviation = sd.evaluate(nodeDistributionMap.values().stream().mapToDouble(Number::doubleValue).toArray());
double norm = deviation/(howManyDocumentsPerShard) * 100;
assertEquals(shardIdentifiers.length, nodeDistributionMap.size());
// Asserts the standard deviation of the distribution map is in percentage lesser than 30%
assertTrue(
nodeDistributionMap.values().toString() + ", SD = " + deviation + ", SD_NORM = " + norm + "%",
norm < 30);
}
}

View File

@@ -0,0 +1,143 @@
/*
* Copyright (C) 2005-2014 Alfresco Software Limited.
*
* This file is part of Alfresco
*
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
*/
package org.alfresco.solr.tracker;
import static java.util.Arrays.stream;
import static java.util.stream.IntStream.range;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.verify;
import static org.mockito.Mockito.when;
import static org.alfresco.solr.AlfrescoSolrUtils.randomShardCountGreaterThanOne;
import static org.alfresco.solr.AlfrescoSolrUtils.randomPositiveInteger;
import org.alfresco.repo.index.shard.ShardMethodEnum;
import org.alfresco.solr.client.Acl;
import org.alfresco.solr.client.Node;
import org.alfresco.util.ISO8601DateFormat;
import org.apache.commons.math3.stat.descriptive.moment.StandardDeviation;
import org.junit.Before;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.mockito.Mock;
import org.mockito.junit.MockitoJUnitRunner;
import java.util.Date;
import java.util.HashMap;
import java.util.Map;
import java.util.Properties;
import java.util.Random;
@RunWith(MockitoJUnitRunner.class)
public class DateMonthRouterTest
{
private Random randomizer = new Random();
private DocRouter router;
@Mock
private Acl acl;
@Mock
private Node node;
@Before
public void setUp()
{
router = DocRouterFactory.getRouter(new Properties(), ShardMethodEnum.DATE);
}
@Test
public void aclsAreReplicatedAcrossShards()
{
range(0, 100).forEach(index -> assertTrue(router.routeAcl(randomizer.nextInt(), randomizer.nextInt(), acl)));
}
@Test
public void invalidDate_shouldFallBackToDBIDRouting()
{
DBIDRouter fallbackRouting = mock(DBIDRouter.class);
((DateMonthRouter)router).dbidRouter = fallbackRouting;
when(node.getShardPropertyValue()).thenReturn("Something which is not an ISO Date");
int shardCount = randomShardCountGreaterThanOne();
int shardInstance = randomPositiveInteger();
router.routeNode(shardCount, shardInstance, node);
verify(fallbackRouting).routeNode(shardCount, shardInstance, node);
}
@Test
public void nullDate_shouldFallBackToDBIDRouting()
{
DBIDRouter fallbackRouting = mock(DBIDRouter.class);
((DateMonthRouter)router).dbidRouter = fallbackRouting;
when(node.getShardPropertyValue()).thenReturn(null);
int shardCount = randomShardCountGreaterThanOne();
int shardInstance = randomPositiveInteger();
router.routeNode(shardCount, shardInstance, node);
verify(fallbackRouting).routeNode(shardCount, shardInstance, node);
}
@Test
public void twelveShardsInTheCluster_shouldBalanceNodes()
{
int [] shardIdentifiers = range(0,12).toArray();
int shardCount = shardIdentifiers.length;
router.routeNode(shardCount, 0, node);
int howManyDocuments = shardCount * 10000;
Map<Integer, Integer> nodeDistributionMap = new HashMap<>();
range(0, howManyDocuments)
.mapToLong(Long::valueOf)
.forEach(id -> {
String date = ISO8601DateFormat.format(new Date(System.currentTimeMillis() + id * (1000L * 60 * 60 * 24 * 30)));
Node node = new Node();
node.setShardPropertyValue(date);
stream(shardIdentifiers)
.forEach(shardId -> {
if (router.routeNode(shardCount, shardId, node))
{
nodeDistributionMap.merge(shardId, 1, Integer::sum);
}
});
});
StandardDeviation sd = new StandardDeviation();
double deviation = sd.evaluate(nodeDistributionMap.values().stream().mapToDouble(Number::doubleValue).toArray());
assertEquals(shardIdentifiers.length, nodeDistributionMap.size());
// Asserts the standard deviation of the distribution map is in percentage lesser than 30%
assertTrue(
nodeDistributionMap.values().toString() + ", SD = " + deviation,
deviation/(howManyDocuments/shardCount) * 100 < 30);
}
}

View File

@@ -21,7 +21,6 @@ package org.alfresco.solr.tracker;
import java.util.Properties; import java.util.Properties;
import org.alfresco.model.ContentModel; import org.alfresco.model.ContentModel;
import org.alfresco.repo.index.shard.ShardMethodEnum; import org.alfresco.repo.index.shard.ShardMethodEnum;
import org.alfresco.repo.search.adaptor.lucene.QueryConstants;
import org.alfresco.solr.AbstractAlfrescoDistributedTest; import org.alfresco.solr.AbstractAlfrescoDistributedTest;
import org.alfresco.solr.client.Acl; import org.alfresco.solr.client.Acl;
import org.alfresco.solr.client.AclChangeSet; import org.alfresco.solr.client.AclChangeSet;
@@ -30,12 +29,6 @@ import org.alfresco.solr.client.Node;
import org.alfresco.solr.client.NodeMetaData; import org.alfresco.solr.client.NodeMetaData;
import org.alfresco.solr.client.StringPropertyValue; import org.alfresco.solr.client.StringPropertyValue;
import org.alfresco.solr.client.Transaction; import org.alfresco.solr.client.Transaction;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.LegacyNumericRangeQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util.LuceneTestCase;
import org.apache.solr.SolrTestCaseJ4; import org.apache.solr.SolrTestCaseJ4;
import org.junit.AfterClass; import org.junit.AfterClass;
@@ -60,7 +53,7 @@ import static org.carrot2.shaded.guava.common.collect.ImmutableList.of;
@SolrTestCaseJ4.SuppressSSL @SolrTestCaseJ4.SuppressSSL
@SolrTestCaseJ4.SuppressObjectReleaseTracker (bugUrl = "RAMDirectory") @SolrTestCaseJ4.SuppressObjectReleaseTracker (bugUrl = "RAMDirectory")
@LuceneTestCase.SuppressCodecs({"Appending","Lucene3x","Lucene40","Lucene41","Lucene42","Lucene43", "Lucene44", "Lucene45","Lucene46","Lucene47","Lucene48","Lucene49"}) @LuceneTestCase.SuppressCodecs({"Appending","Lucene3x","Lucene40","Lucene41","Lucene42","Lucene43", "Lucene44", "Lucene45","Lucene46","Lucene47","Lucene48","Lucene49"})
public class DistributedLastRegisteredShardRouterTest extends AbstractAlfrescoDistributedTest public class DistributedExplicitShardIdWithStaticPropertyRouterTest extends AbstractAlfrescoDistributedTest
{ {
private static long MAX_WAIT_TIME = 80000; private static long MAX_WAIT_TIME = 80000;
private final int timeout = 100000; private final int timeout = 100000;
@@ -68,7 +61,7 @@ public class DistributedLastRegisteredShardRouterTest extends AbstractAlfrescoDi
@Before @Before
private void initData() throws Throwable private void initData() throws Throwable
{ {
initSolrServers(2, "DistributedLastRegisteredShardRoutingTest", getProperties()); initSolrServers(2, getClass().getSimpleName(), getProperties());
indexData(); indexData();
} }
@@ -78,32 +71,6 @@ public class DistributedLastRegisteredShardRouterTest extends AbstractAlfrescoDi
dismissSolrServers(); dismissSolrServers();
} }
/**
* Setup, indexes and returns the ACL used within the tests.
*
* @return the ACL used within the test.
*/
private Acl getTestAcl() throws Exception
{
AclChangeSet aclChangeSet = getAclChangeSet(1);
Acl acl = getAcl(aclChangeSet);
AclReaders aclReaders = getAclReaders(aclChangeSet, acl, singletonList("joel"), singletonList("phil"), null);
indexAclChangeSet(aclChangeSet, singletonList(acl), singletonList(aclReaders));
//Check for the ACL state stamp.
BooleanQuery.Builder builder =
new BooleanQuery.Builder()
.add(new BooleanClause(new TermQuery(new Term(QueryConstants.FIELD_SOLR4_ID, "TRACKER!STATE!ACLTX")), BooleanClause.Occur.MUST))
.add(new BooleanClause(LegacyNumericRangeQuery.newLongRange(
QueryConstants.FIELD_S_ACLTXID, aclChangeSet.getId(), aclChangeSet.getId() + 1, true, false), BooleanClause.Occur.MUST));
Query waitForQuery = builder.build();
waitForDocCount(waitForQuery, 1, MAX_WAIT_TIME);
return acl;
}
/** /**
* Default data is indexed in solr. * Default data is indexed in solr.
* 1 folder node with 2 children nodes. * 1 folder node with 2 children nodes.
@@ -157,7 +124,6 @@ public class DistributedLastRegisteredShardRouterTest extends AbstractAlfrescoDi
Node node4 = getNode(3, txn, acl, Node.SolrApiNodeStatus.UPDATED); Node node4 = getNode(3, txn, acl, Node.SolrApiNodeStatus.UPDATED);
node4.setExplicitShardId(1); node4.setExplicitShardId(1);
NodeMetaData nodeMetaData4 = getNodeMetaData(node4, txn, acl, "elia", null, false); NodeMetaData nodeMetaData4 = getNodeMetaData(node4, txn, acl, "elia", null, false);
nodeMetaData4.getProperties().put(ContentModel.PROP_NAME, new StringPropertyValue("second"));
nodeMetaData4.getProperties().put(ContentModel.PROP_NAME, new StringPropertyValue("forth")); nodeMetaData4.getProperties().put(ContentModel.PROP_NAME, new StringPropertyValue("forth"));
/* /*
@@ -199,4 +165,4 @@ public class DistributedLastRegisteredShardRouterTest extends AbstractAlfrescoDi
prop.put("shard.method", ShardMethodEnum.LAST_REGISTERED_INDEXING_SHARD.toString()); prop.put("shard.method", ShardMethodEnum.LAST_REGISTERED_INDEXING_SHARD.toString());
return prop; return prop;
} }
} }

View File

@@ -0,0 +1,177 @@
/*
* Copyright (C) 2005-2014 Alfresco Software Limited.
*
* This file is part of Alfresco
*
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
*/
package org.alfresco.solr.tracker;
import static java.util.Arrays.stream;
import static java.util.stream.IntStream.range;
import static org.alfresco.solr.AlfrescoSolrUtils.randomPositiveInteger;
import static org.alfresco.solr.AlfrescoSolrUtils.randomShardCountGreaterThanOne;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertTrue;
import static org.mockito.Mockito.when;
import org.alfresco.repo.index.shard.ShardMethodEnum;
import org.alfresco.solr.client.Acl;
import org.alfresco.solr.client.Node;
import org.junit.Before;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.mockito.Mock;
import org.mockito.junit.MockitoJUnitRunner;
import java.util.HashMap;
import java.util.Map;
import java.util.Properties;
@RunWith(MockitoJUnitRunner.class)
public class ExplicitIDRouterTest
{
private DocRouter router;
@Mock
private Acl acl;
@Mock
private Node node;
@Before
public void setUp()
{
router = DocRouterFactory.getRouter(new Properties(), ShardMethodEnum.EXPLICIT_ID_FALLBACK_LRIS);
}
@Test
public void aclsAreReplicatedAcrossShards()
{
range(0, 100).forEach(index ->
assertTrue(router.routeAcl(randomPositiveInteger(), randomPositiveInteger(), acl)));
}
@Test
public void shardPropertyEmpty_shouldFallBackToLRIS()
{
Node node = new Node();
node.setShardPropertyValue("\n\n\n\n \t\t");
node.setExplicitShardId(4);
assertDataIsDistributedAccordingWithLRISRouting(node);
}
@Test
public void shardPropertyNull_shouldFallBackToLRIS()
{
Node node = new Node();
node.setShardPropertyValue(null);
node.setExplicitShardId(4);
assertDataIsDistributedAccordingWithLRISRouting(node);
}
@Test
public void shardPropertyNaN_shouldFallBackToDBID()
{
Node node = new Node();
node.setShardPropertyValue("This is not a valid Number that can be used as shard ID.");
node.setExplicitShardId(4);
assertDataIsDistributedAccordingWithLRISRouting(node);
}
@Test
public void dynamicAndStaticShardPropertyIsNotValid_shouldRejectTheNode()
{
Node node = new Node();
node.setShardPropertyValue("This is not a valid Number that can be used as shard ID.");
node.setExplicitShardId(null);
assertFalse(router.routeNode(randomShardCountGreaterThanOne(), randomPositiveInteger(), node));
}
@Test
public void explicitShardMatchesShardInstance()
{
int shardCount = 2;
int firstShardInstance = 0;
int secondShardInstance = 1;
Node prototypeNodeOnFirstShard = new Node();
prototypeNodeOnFirstShard.setShardPropertyValue(String.valueOf(firstShardInstance));
Node prototypeNodeOnSecondShard = new Node();
prototypeNodeOnSecondShard.setShardPropertyValue(String.valueOf(secondShardInstance));
int howManyDocumentsPerShard = 1000;
Map<Integer, Integer> nodeDistributionMap = new HashMap<>();
range(0,2).forEach(shardId ->
range(0, howManyDocumentsPerShard)
.forEach(index -> {
if (router.routeNode(shardCount, shardId, prototypeNodeOnFirstShard))
{
nodeDistributionMap.merge(shardId, 1, Integer::sum);
}
if (router.routeNode(shardCount, shardId, prototypeNodeOnSecondShard))
{
nodeDistributionMap.merge(shardId, 1, Integer::sum);
}
}));
assertEquals(shardCount, nodeDistributionMap.size());
assertEquals(howManyDocumentsPerShard, nodeDistributionMap.get(firstShardInstance).intValue());
assertEquals(howManyDocumentsPerShard, nodeDistributionMap.get(secondShardInstance).intValue());
}
@Test
public void explicitShardDoesntMatchShardInstance()
{
int shardCount = randomShardCountGreaterThanOne();
int shardInstance = randomPositiveInteger();
when(node.getShardPropertyValue()).thenReturn(String.valueOf(shardInstance));
assertFalse(router.routeNode(shardCount, shardInstance + 1, node));
}
private void assertDataIsDistributedAccordingWithLRISRouting(Node node)
{
int [] shardIdentifiers = range(0, 15).toArray();
int shardCount = shardIdentifiers.length;
int howManyDocumentsPerShard = 10000;
Map<Integer, Integer> nodeDistributionMap = new HashMap<>();
range(0, shardCount * howManyDocumentsPerShard)
.mapToLong(Long::valueOf)
.forEach(id ->
stream(shardIdentifiers)
.forEach(shardId -> {
if (router.routeNode(shardCount, shardId, node))
{
nodeDistributionMap.merge(shardId, 1, Integer::sum);
}
}));
assertEquals(1, nodeDistributionMap.size());
// Asserts that all documents have been assigned to the explicit shard ID.
assertEquals(shardCount * howManyDocumentsPerShard, nodeDistributionMap.get(node.getExplicitShardId()).intValue());
}
}

View File

@@ -0,0 +1,180 @@
/*
* Copyright (C) 2005-2014 Alfresco Software Limited.
*
* This file is part of Alfresco
*
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
*/
package org.alfresco.solr.tracker;
import org.alfresco.repo.index.shard.ShardMethodEnum;
import org.alfresco.solr.client.Acl;
import org.alfresco.solr.client.Node;
import org.apache.commons.math3.stat.descriptive.moment.StandardDeviation;
import org.junit.Before;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.mockito.Mock;
import org.mockito.junit.MockitoJUnitRunner;
import java.util.HashMap;
import java.util.Map;
import java.util.Properties;
import static java.util.Arrays.stream;
import static java.util.stream.IntStream.range;
import static org.alfresco.solr.AlfrescoSolrUtils.randomPositiveInteger;
import static org.alfresco.solr.AlfrescoSolrUtils.randomShardCountGreaterThanOne;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertTrue;
import static org.mockito.Mockito.when;
@RunWith(MockitoJUnitRunner.class)
public class ExplicitIDWithLRISRouterTest
{
private DocRouter router;
@Mock
private Acl acl;
@Mock
private Node node;
@Before
public void setUp()
{
router = DocRouterFactory.getRouter(new Properties(), ShardMethodEnum.EXPLICIT_ID);
}
@Test
public void aclsAreReplicatedAcrossShards()
{
range(0, 100).forEach(index ->
assertTrue(router.routeAcl(randomPositiveInteger(), randomPositiveInteger(), acl)));
}
@Test
public void shardPropertyEmpty_shouldFallBackToDBID()
{
// Don't set the DBID on the node, as the test is doing that in a loop.
// That allows to use this Node instance as a prototype
Node node = new Node();
node.setShardPropertyValue("\n\n\n\n \t\t");
assertDataIsDistributedAccordingWithDBIDRouting(node);
}
@Test
public void shardPropertyNull_shouldFallBackToDBID()
{
// Don't set the DBID on the node, as the test is doing that in a loop.
// That allows to use this Node instance as a prototype
Node node = new Node();
node.setShardPropertyValue(null);
assertDataIsDistributedAccordingWithDBIDRouting(node);
}
@Test
public void shardPropertyNaN_shouldFallBackToDBID()
{
// Don't set the DBID on the node, as the test is doing that in a loop.
// That allows to use this Node instance as a prototype
Node node = new Node();
node.setShardPropertyValue("This is not a valid Number that can be used as shard ID.");
assertDataIsDistributedAccordingWithDBIDRouting(node);
}
@Test
public void explicitShardMatchesShardInstance()
{
int shardCount = 2;
int firstShardInstance = 0;
int secondShardInstance = 1;
Node prototypeNodeOnFirstShard = new Node();
prototypeNodeOnFirstShard.setShardPropertyValue(String.valueOf(firstShardInstance));
Node prototypeNodeOnSecondShard = new Node();
prototypeNodeOnSecondShard.setShardPropertyValue(String.valueOf(secondShardInstance));
int howManyDocumentsPerShard = 1000;
Map<Integer, Integer> nodeDistributionMap = new HashMap<>();
range(0,2).forEach(shardId -> {
range(0, howManyDocumentsPerShard)
.forEach(index -> {
if (router.routeNode(shardCount, shardId, prototypeNodeOnFirstShard))
{
nodeDistributionMap.merge(shardId, 1, Integer::sum);
}
if (router.routeNode(shardCount, shardId, prototypeNodeOnSecondShard))
{
nodeDistributionMap.merge(shardId, 1, Integer::sum);
}
});
});
assertEquals(shardCount, nodeDistributionMap.size());
assertEquals(howManyDocumentsPerShard, nodeDistributionMap.get(firstShardInstance).intValue());
assertEquals(howManyDocumentsPerShard, nodeDistributionMap.get(secondShardInstance).intValue());
}
@Test
public void explicitShardDoesntMatchShardInstance()
{
int shardCount = randomShardCountGreaterThanOne();
int shardInstance = randomPositiveInteger();
when(node.getShardPropertyValue()).thenReturn(String.valueOf(shardInstance));
assertFalse(router.routeNode(shardCount, shardInstance + 1, node));
}
private void assertDataIsDistributedAccordingWithDBIDRouting(Node node)
{
int [] shardIdentifiers = range(0, 15).toArray();
int shardCount = shardIdentifiers.length;
int howManyDocumentsPerShard = 10000;
Map<Integer, Integer> nodeDistributionMap = new HashMap<>();
range(0, shardCount * howManyDocumentsPerShard)
.mapToLong(Long::valueOf)
.forEach(id -> {
node.setId(id);
stream(shardIdentifiers)
.forEach(shardId -> {
if (router.routeNode(shardCount, shardId, node))
{
nodeDistributionMap.merge(shardId, 1, Integer::sum);
}
});
});
StandardDeviation sd = new StandardDeviation();
double deviation = sd.evaluate(nodeDistributionMap.values().stream().mapToDouble(Number::doubleValue).toArray());
double norm = deviation/(howManyDocumentsPerShard) * 100;
assertEquals(shardIdentifiers.length, nodeDistributionMap.size());
// Asserts the standard deviation of the distribution map is in percentage lesser than 30%
assertTrue(
nodeDistributionMap.values().toString() + ", SD = " + deviation + ", SD_NORM = " + norm + "%",
norm < 30);
}
}

View File

@@ -0,0 +1,150 @@
/*
* Copyright (C) 2005-2014 Alfresco Software Limited.
*
* This file is part of Alfresco
*
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
*/
package org.alfresco.solr.tracker;
import org.alfresco.solr.client.Acl;
import org.alfresco.solr.client.Node;
import org.junit.Before;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.mockito.Mock;
import org.mockito.junit.MockitoJUnitRunner;
import static java.util.stream.IntStream.range;
import static org.alfresco.solr.AlfrescoSolrUtils.randomPositiveInteger;
import static org.alfresco.solr.AlfrescoSolrUtils.randomShardCountGreaterThanOne;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertNull;
import static org.junit.Assert.assertTrue;
import static org.mockito.Mockito.when;
@RunWith(MockitoJUnitRunner.class)
public class ExplicitShardIdWithDynamicPropertyRouterTest
{
private ExplicitShardIdWithDynamicPropertyRouter router;
@Mock
private Acl acl;
@Mock
private Node node;
@Before
public void setUp()
{
router = new ExplicitShardIdWithDynamicPropertyRouter();
}
@Test
public void aclsAreReplicatedAcrossShards()
{
range(0, 100).forEach(index ->
assertTrue(router.routeAcl(randomPositiveInteger(), randomPositiveInteger(), acl)));
}
@Test
public void standaloneModeShardPropertyNaN_shouldntAcceptNode()
{
int shardCount = randomShardCountGreaterThanOne();
int shardInstance = randomPositiveInteger();
when(node.getShardPropertyValue()).thenReturn("This is not a Number");
assertFalse(router.routeNode(shardCount, shardInstance, node));
}
@Test
public void composableModeShardPropertyNaN_shouldntAcceptNode()
{
router = new ExplicitShardIdWithDynamicPropertyRouter(false);
int shardCount = randomShardCountGreaterThanOne();
int shardInstance = randomPositiveInteger();
when(node.getShardPropertyValue()).thenReturn("This is not a Number");
assertNull(router.routeNode(shardCount, shardInstance, node));
}
@Test
public void standaloneModeShardPropertyValueIsNull_shouldntAcceptTheNode()
{
int shardCount = randomShardCountGreaterThanOne();
int shardInstance = randomPositiveInteger();
when(node.getShardPropertyValue()).thenReturn(null);
assertFalse(router.routeNode(shardCount, shardInstance, node));
}
@Test
public void composableModeShardPropertyValueIsNull_shouldRejectTheRequest()
{
router = new ExplicitShardIdWithDynamicPropertyRouter(false);
int shardCount = randomShardCountGreaterThanOne();
int shardInstance = randomPositiveInteger();
when(node.getShardPropertyValue()).thenReturn(null);
assertNull(router.routeNode(shardCount, shardInstance, node));
}
@Test
public void standaloneModeShardPropertyValueIsEmpty_shouldntAcceptNode()
{
int shardCount = randomShardCountGreaterThanOne();
int shardInstance = randomPositiveInteger();
when(node.getShardPropertyValue()).thenReturn(" \t\t\t \n\n");
assertFalse(router.routeNode(shardCount, shardInstance, node));
}
@Test
public void composableModeShardPropertyValueIsEmpty_shouldRejectTheRequest()
{
router = new ExplicitShardIdWithDynamicPropertyRouter(false);
int shardCount = randomShardCountGreaterThanOne();
int shardInstance = randomPositiveInteger();
when(node.getShardPropertyValue()).thenReturn(" \t\t\t \n\n");
assertNull(router.routeNode(shardCount, shardInstance, node));
}
@Test
public void explicitShardMatchesShardInstance()
{
int shardCount = randomShardCountGreaterThanOne();
int shardInstance = randomPositiveInteger();
when(node.getShardPropertyValue()).thenReturn(String.valueOf(shardInstance));
assertTrue(router.routeNode(shardCount, shardInstance, node));
}
@Test
public void explicitShardDoesntMatchShardInstance()
{
int shardCount = randomShardCountGreaterThanOne();
int shardInstance = randomPositiveInteger();
when(node.getShardPropertyValue()).thenReturn(String.valueOf(shardInstance));
assertFalse(router.routeNode(shardCount, shardInstance + 1, node));
}
}

View File

@@ -0,0 +1,106 @@
/*
* Copyright (C) 2005-2014 Alfresco Software Limited.
*
* This file is part of Alfresco
*
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
*/
package org.alfresco.solr.tracker;
import static java.util.stream.IntStream.range;
import static org.alfresco.solr.AlfrescoSolrUtils.randomPositiveInteger;
import static org.alfresco.solr.AlfrescoSolrUtils.randomShardCountGreaterThanOne;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertNull;
import static org.junit.Assert.assertTrue;
import static org.mockito.Mockito.when;
import org.alfresco.solr.client.Acl;
import org.alfresco.solr.client.Node;
import org.junit.Before;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.mockito.Mock;
import org.mockito.junit.MockitoJUnitRunner;
@RunWith(MockitoJUnitRunner.class)
public class ExplicitShardIdWithStaticPropertyRouterTest
{
private ExplicitShardIdWithStaticPropertyRouter router;
@Mock
private Acl acl;
@Mock
private Node node;
@Before
public void setUp()
{
router = new ExplicitShardIdWithStaticPropertyRouter();
}
@Test
public void aclsAreReplicatedAcrossShards()
{
range(0, 100).forEach(index ->
assertTrue(router.routeAcl(randomPositiveInteger(), randomPositiveInteger(), acl)));
}
@Test
public void standaloneModeExplicitShardIdIsNull_shouldReturnFalse()
{
int shardCount = randomShardCountGreaterThanOne();
int shardInstance = randomPositiveInteger();
when(node.getExplicitShardId()).thenReturn(null);
assertFalse(router.routeNode(shardCount, shardInstance, node));
}
@Test
public void composableModeExplicitShardIdIsNull_shouldReturnFalse()
{
router = new ExplicitShardIdWithStaticPropertyRouter(false);
int shardCount = randomShardCountGreaterThanOne();
int shardInstance = randomPositiveInteger();
when(node.getExplicitShardId()).thenReturn(null);
assertNull(router.routeNode(shardCount, shardInstance, node));
}
@Test
public void explicitShardMatchesShardInstance()
{
int shardCount = randomShardCountGreaterThanOne();
int shardInstance = randomPositiveInteger();
when(node.getExplicitShardId()).thenReturn(shardInstance);
assertTrue(router.routeNode(shardCount, shardInstance, node));
}
@Test
public void explicitShardDoesntMatchShardInstance()
{
int shardCount = randomShardCountGreaterThanOne();
int shardInstance = randomPositiveInteger();
when(node.getExplicitShardId()).thenReturn(shardInstance);
assertFalse(router.routeNode(shardCount, shardInstance + 1, node));
}
}

View File

@@ -0,0 +1,243 @@
/*
* Copyright (C) 2005-2014 Alfresco Software Limited.
*
* This file is part of Alfresco
*
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
*/
package org.alfresco.solr.tracker;
import static java.util.Arrays.stream;
import static java.util.stream.IntStream.range;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNull;
import static org.junit.Assert.assertTrue;
import static org.mockito.Mockito.reset;
import static org.mockito.Mockito.verify;
import static org.mockito.Mockito.when;
import static org.alfresco.solr.AlfrescoSolrUtils.randomPositiveInteger;
import static org.alfresco.solr.AlfrescoSolrUtils.randomShardCountGreaterThanOne;
import org.alfresco.solr.client.Acl;
import org.alfresco.solr.client.Node;
import org.apache.commons.math3.stat.descriptive.moment.StandardDeviation;
import org.junit.Before;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.mockito.Mock;
import org.mockito.junit.MockitoJUnitRunner;
import java.util.HashMap;
import java.util.Map;
@RunWith(MockitoJUnitRunner.class)
public class PropertyRouterTest
{
private PropertyRouter router;
@Mock
private Acl acl;
@Mock
private Node node;
@Mock
private DBIDRouter fallback;
@Before
public void setUp()
{
router = new PropertyRouter(null);
router.fallback = fallback;
}
@Test
public void negativeShardCount_shouldAlwaysReturnTrue()
{
// Should never happen
int negativeShardCount = -14;
assertTrue(router.routeAcl(negativeShardCount, 1, acl));
assertTrue(router.routeNode(negativeShardCount, 1, node));
}
@Test
public void zeroShardCount_shouldAlwaysReturnTrue()
{
// Should never happen
int zeroShardCount = 0;
assertTrue(router.routeAcl(zeroShardCount, 1, acl));
assertTrue(router.routeNode(zeroShardCount, 1, node));
}
@Test
public void oneShardInTheCluster_shouldAlwaysReturnTrue()
{
// Should never happen
int zeroShardCount = 0;
assertTrue(router.routeAcl(zeroShardCount, 1, acl));
assertTrue(router.routeNode(zeroShardCount, 1, node));
}
@Test
public void aclsAreReplicatedAcrossShards()
{
range(0, 100).forEach(index -> assertTrue(router.routeAcl(randomShardCountGreaterThanOne(), randomPositiveInteger(), acl)));
}
@Test
public void ifPropertyAndPatternAreNull_shouldFallbackToDbId()
{
assertNull(router.pattern);
when(node.getShardPropertyValue()).thenReturn(null);
int shardCount = randomShardCountGreaterThanOne();
int shardInstance = randomPositiveInteger();
router.routeNode(shardCount, shardInstance, node);
verify(fallback).routeNode(shardCount, shardInstance, node);
}
@Test
public void propertyDoesntMatchPattern_shouldFallbackToDbId()
{
router = new PropertyRouter("([0-9]*)_");
router.fallback = fallback;
when(node.getShardPropertyValue()).thenReturn("This value doesn't contain any number");
int shardCount = randomShardCountGreaterThanOne();
int shardInstance = randomPositiveInteger();
router.routeNode(shardCount, shardInstance, node);
verify(fallback).routeNode(shardCount, shardInstance, node);
}
@Test
public void propertyMatchesPatternButDoesntProduceAnyMatching_shouldFallbackToDbId()
{
router = new PropertyRouter("(0-9)*_.*");
router.fallback = fallback;
when(node.getShardPropertyValue()).thenReturn("pippo_pluto");
int shardCount = randomShardCountGreaterThanOne();
int shardInstance = randomPositiveInteger();
router.routeNode(shardCount, shardInstance, node);
verify(fallback).routeNode(shardCount, shardInstance, node);
}
@Test
public void propertyIsNullAndPatternIsEmpty_shouldFallbackToDbId()
{
String [] tests = {"", " ", "\t\t\t", "\n\n"};
stream(tests).forEach(test -> {
router = new PropertyRouter(test);
router.fallback = fallback;
assertNull(router.pattern);
when(node.getShardPropertyValue()).thenReturn(null);
int shardCount = randomShardCountGreaterThanOne();
int shardInstance = randomPositiveInteger();
router.routeNode(shardCount, shardInstance, node);
verify(fallback).routeNode(shardCount, shardInstance, node);
reset(node, fallback);
});
}
@Test
public void onlyPatternIsNull_shouldBalanceNodesOnShardProperty()
{
int [] shardIdentifiers = range(0,15).toArray();
int shardCount = shardIdentifiers.length;
int howManyDocumentsPerShard = 10000;
Map<Integer, Integer> nodeDistributionMap = new HashMap<>();
range(0, shardCount * howManyDocumentsPerShard)
.mapToLong(Long::valueOf)
.forEach(id -> {
Node node = new Node();
node.setShardPropertyValue(String.valueOf(randomPositiveInteger()));
stream(shardIdentifiers)
.forEach(shardId -> {
if (router.routeNode(shardCount, shardId, node))
{
nodeDistributionMap.merge(shardId, 1, Integer::sum);
}
});
});
assertEquals(shardIdentifiers.length, nodeDistributionMap.size());
StandardDeviation sd = new StandardDeviation();
double deviation = sd.evaluate(nodeDistributionMap.values().stream().mapToDouble(Number::doubleValue).toArray());
double norm = (deviation/howManyDocumentsPerShard) * 100;
assertEquals(shardIdentifiers.length, nodeDistributionMap.size());
// Asserts the standard deviation of the distribution map is in percentage lesser than 30%
assertTrue(
nodeDistributionMap.values().toString() + ", SD = " + deviation + ", SD_NORM = " + norm + "%",
norm < 30);
}
@Test
public void propertyAndPatternArentNull_shouldBalanceNodesOnShardProperty()
{
int [] shardIdentifiers = range(0,15).toArray();
int shardCount = shardIdentifiers.length;
int howManyDocumentsPerShard = 100000;
Map<Integer, Integer> nodeDistributionMap = new HashMap<>();
router = new PropertyRouter("([0-9]*)(_)");
router.fallback = fallback;
range(0, shardCount * howManyDocumentsPerShard)
.mapToLong(Long::valueOf)
.forEach(id -> {
Node node = new Node();
node.setShardPropertyValue(randomPositiveInteger() + "_ignoreThisPart");
stream(shardIdentifiers)
.forEach(shardId -> {
if (router.routeNode(shardCount, shardId, node))
{
nodeDistributionMap.merge(shardId, 1, Integer::sum);
}
});
});
StandardDeviation sd = new StandardDeviation();
double deviation = sd.evaluate(nodeDistributionMap.values().stream().mapToDouble(Number::doubleValue).toArray());
double norm = deviation/(howManyDocumentsPerShard) * 100;
assertEquals(shardIdentifiers.length, nodeDistributionMap.size());
// Asserts the standard deviation of the distribution map is in percentage lesser than 30%
assertTrue(
nodeDistributionMap.values().toString() + ", SD = " + deviation + ", SD_NORM = " + norm + "%",
norm < 30);
}
}