diff --git a/search-services/alfresco-search/src/main/java/org/alfresco/solr/tracker/ACLIDModRouter.java b/search-services/alfresco-search/src/main/java/org/alfresco/solr/tracker/ACLIDModRouter.java index 9fa4b8de9..408b33317 100644 --- a/search-services/alfresco-search/src/main/java/org/alfresco/solr/tracker/ACLIDModRouter.java +++ b/search-services/alfresco-search/src/main/java/org/alfresco/solr/tracker/ACLIDModRouter.java @@ -18,31 +18,45 @@ */ package org.alfresco.solr.tracker; -/* - * @author Joel - */ - import org.alfresco.solr.client.Acl; import org.alfresco.solr.client.Node; +/** + * Nodes and access control lists are grouped by their ACL ID. + * This places the nodes together with all the access control information required to determine the access to a node in the same shard. + * Both the nodes and access control information are sharded. The overall index size will be smaller than other methods as the ACL index information is not duplicated in every shard. + * Also, the ACL count is usually much smaller than the node count. + * + * This method is beneficial if you have lots of ACLs and the documents are evenly distributed over those ACLs. + * For example, if you have many Share sites, nodes and ACLs are assigned to shards randomly based on the ACL and the documents to which it applies. + * + * The node distribution may be uneven as it depends how many nodes share ACLs. + * To use this method, when creating a shard add a new configuration property: + * + * + * + * @see Search Services sharding methods + */ public class ACLIDModRouter implements DocRouter { @Override - public boolean routeAcl(int shardCount, int shardInstance, Acl acl) { - if(shardCount <= 1) { - return true; - } - - return acl.getId() % shardCount == shardInstance; + public Boolean routeAcl(int shardCount, int shardInstance, Acl acl) + { + return shardCount <= 1 || route(acl.getId(), shardCount, shardInstance); } @Override - public boolean routeNode(int shardCount, int shardInstance, Node node) { - if(shardCount <= 1) { - return true; - } + public Boolean routeNode(int shardCount, int shardInstance, Node node) + { + return (shardCount <= 1) || route(node.getAclId() , shardCount, shardInstance); + } - //Route the node based on the mod of the aclId - return node.getAclId() % shardCount == shardInstance; + private boolean route(long id, int shardCount, int shardInstance) + { + return id % shardCount == shardInstance; } } \ No newline at end of file diff --git a/search-services/alfresco-search/src/main/java/org/alfresco/solr/tracker/ACLIDMurmurRouter.java b/search-services/alfresco-search/src/main/java/org/alfresco/solr/tracker/ACLIDMurmurRouter.java index 44b05a141..004ecc1ee 100644 --- a/search-services/alfresco-search/src/main/java/org/alfresco/solr/tracker/ACLIDMurmurRouter.java +++ b/search-services/alfresco-search/src/main/java/org/alfresco/solr/tracker/ACLIDMurmurRouter.java @@ -22,31 +22,35 @@ import org.apache.solr.common.util.Hash; import org.alfresco.solr.client.Node; import org.alfresco.solr.client.Acl; - -/* - * @author Joel +/** + * Nodes are evenly distributed over the shards at random based on the murmur hash of the ACL ID. + * To use this method, when creating a shard add a new configuration property: + * + * + * + * @see Search Services sharding methods */ - public class ACLIDMurmurRouter implements DocRouter { @Override - public boolean routeAcl(int numShards, int shardInstance, Acl acl) { - if(numShards <= 1) { - return true; - } - - String s = Long.toString(acl.getId()); - return (Math.abs(Hash.murmurhash3_x86_32(s, 0, s.length(), 77)) % numShards) == shardInstance; + public Boolean routeAcl(int numShards, int shardInstance, Acl acl) + { + return (numShards <= 1) || route(acl.getId(), numShards, shardInstance); } @Override - public boolean routeNode(int numShards, int shardInstance, Node node) { - if(numShards <= 1) { - return true; - } + public Boolean routeNode(int numShards, int shardInstance, Node node) + { + return (numShards <= 1) || route(node.getAclId(), numShards, shardInstance); + } - //Route the node based on the murmur hash of the aclId - String s = Long.toString(node.getAclId()); - return (Math.abs(Hash.murmurhash3_x86_32(s, 0, s.length(), 77)) % numShards) == shardInstance; + private boolean route(long id, int numShards, int shardInstance) + { + String value = Long.toString(id); + return (Math.abs(Hash.murmurhash3_x86_32(value, 0, value.length(), 77)) % numShards) == shardInstance; } } \ No newline at end of file diff --git a/search-services/alfresco-search/src/main/java/org/alfresco/solr/tracker/ComposableDocRouter.java b/search-services/alfresco-search/src/main/java/org/alfresco/solr/tracker/ComposableDocRouter.java new file mode 100644 index 000000000..80d02cd0b --- /dev/null +++ b/search-services/alfresco-search/src/main/java/org/alfresco/solr/tracker/ComposableDocRouter.java @@ -0,0 +1,69 @@ +package org.alfresco.solr.tracker; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * A Composable {@link DocRouter} is a document router that can be used standalone or nested in a primary-fallback + * composite document routing strategy. + * The main reason why we need this marker supertype is because the return value is different depending on how the + * document router is used: + * + * + * + * @author agazzarini + */ +public abstract class ComposableDocRouter implements DocRouter +{ + protected final Logger logger = LoggerFactory.getLogger(getClass()); + private final boolean isRunningInStandaloneModeOrIsLeaf; + + /** + * Builds a doc router istance with the given mode (standalone or not). + * + * @param standaloneOrLeafMode a flag indicating the active mode of this router. + */ + ComposableDocRouter(boolean standaloneOrLeafMode) + { + this.isRunningInStandaloneModeOrIsLeaf = standaloneOrLeafMode; + } + + ComposableDocRouter() + { + this(true); + } + + /** + * Properly handles the return value of this doc router. + * The return value is different depending on how the document router is used: + * + * + * @return true/false or true/exception depending on the active mode of this router. + */ + Boolean negativeReturnValue() + { + return isRunningInStandaloneModeOrIsLeaf ? false : null; + } + + protected void debug(String message, Object ... params) + { + if (logger.isDebugEnabled()) + { + logger.debug(message, params); + } + } +} diff --git a/search-services/alfresco-search/src/main/java/org/alfresco/solr/tracker/DBIDRangeRouter.java b/search-services/alfresco-search/src/main/java/org/alfresco/solr/tracker/DBIDRangeRouter.java index 0e42a423d..5f69d0b6b 100644 --- a/search-services/alfresco-search/src/main/java/org/alfresco/solr/tracker/DBIDRangeRouter.java +++ b/search-services/alfresco-search/src/main/java/org/alfresco/solr/tracker/DBIDRangeRouter.java @@ -18,18 +18,32 @@ */ package org.alfresco.solr.tracker; -import org.apache.solr.common.util.Hash; import org.alfresco.solr.client.Node; import org.alfresco.solr.client.Acl; import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicLong; - -/* - * @author Joel +/** + * This routes documents within specific DBID ranges to specific shards. + * It adds new shards to the cluster without requiring a reindex. + * The access control information is duplicated in each shard. + * DBID range sharding is the only option to offer auto-scaling as opposed to defining your exact shard count at the start. + * All the other sharding methods require repartitioning in some way. + * + * For each shard, you specify the range of DBIDs to be included. As your repository grows you can add shards. + * + * To use this method, when creating a shard add a new configuration property: + * + * + * + * @author joel + * @see Search Services sharding methods */ - public class DBIDRangeRouter implements DocRouter { private long startRange; @@ -37,52 +51,57 @@ public class DBIDRangeRouter implements DocRouter private AtomicBoolean expanded = new AtomicBoolean(false); private AtomicBoolean initialized = new AtomicBoolean(false); - public DBIDRangeRouter(long startRange, long endRange) { + public DBIDRangeRouter(long startRange, long endRange) + { this.startRange = startRange; this.expandableRange = new AtomicLong(endRange); } - public void setEndRange(long endRange) { + public void setEndRange(long endRange) + { expandableRange.set(endRange); } - public void setExpanded(boolean expanded) { + public void setExpanded(boolean expanded) + { this.expanded.set(expanded); } - public void setInitialized(boolean initialized) { + public void setInitialized(boolean initialized) + { this.initialized.set(initialized); } - public boolean getInitialized() { + public boolean getInitialized() + { return this.initialized.get(); } - public long getEndRange() { + public long getEndRange() + { return expandableRange.longValue(); } - public long getStartRange() { + public long getStartRange() + { return this.startRange; } - public boolean getExpanded() { + public boolean getExpanded() + { return this.expanded.get(); } @Override - public boolean routeAcl(int shardCount, int shardInstance, Acl acl) { - //When routing by DBID range, all acls go to all shards. + public Boolean routeAcl(int shardCount, int shardInstance, Acl acl) + { return true; } @Override - public boolean routeNode(int shardCount, int shardInstance, Node node) { + public Boolean routeNode(int shardCount, int shardInstance, Node node) + { long dbid = node.getId(); - if(dbid >= startRange && dbid < expandableRange.longValue()) { - return true; - } else { - return false; - } + return dbid >= startRange && dbid < expandableRange.longValue(); } } \ No newline at end of file diff --git a/search-services/alfresco-search/src/main/java/org/alfresco/solr/tracker/DBIDRouter.java b/search-services/alfresco-search/src/main/java/org/alfresco/solr/tracker/DBIDRouter.java index b69abd949..7922f1d19 100644 --- a/search-services/alfresco-search/src/main/java/org/alfresco/solr/tracker/DBIDRouter.java +++ b/search-services/alfresco-search/src/main/java/org/alfresco/solr/tracker/DBIDRouter.java @@ -22,29 +22,42 @@ import org.apache.solr.common.util.Hash; import org.alfresco.solr.client.Node; import org.alfresco.solr.client.Acl; - -/* +/** + * DBID murmur hash based document router. + * This method is available in Alfresco Search Services 1.0 and later versions and is the default sharding option in Solr 6. + * Nodes are evenly distributed over the shards at random based on the murmur hash of the DBID. + * The access control information is duplicated in each shard. + * The distribution of nodes over each shard is very even and shards grow at the same rate. + * Also, this is the fall back method if any other sharding information is unavailable. + * + * To use this method, when creating a shard add a new configuration property: + * + * + * * @author Joel + * @see Search Services sharding methods */ - public class DBIDRouter implements DocRouter { @Override - public boolean routeAcl(int shardCount, int shardInstance, Acl acl) { - //When routing by DBID, all acls go to all shards. + public Boolean routeAcl(int shardCount, int shardInstance, Acl acl) + { return true; } @Override - public boolean routeNode(int shardCount, int shardInstance, Node node) { - - if(shardCount <= 1) { + public Boolean routeNode(int shardCount, int shardInstance, Node node) + { + if(shardCount <= 1) + { return true; } - //Route the node based on nodeId - - String s = Long.toString(node.getId()); - return (Math.abs(Hash.murmurhash3_x86_32(s, 0, s.length(), 77)) % shardCount) == shardInstance; + String dbid = Long.toString(node.getId()); + return (Math.abs(Hash.murmurhash3_x86_32(dbid, 0, dbid.length(), 77)) % shardCount) == shardInstance; } } \ No newline at end of file diff --git a/search-services/alfresco-search/src/main/java/org/alfresco/solr/tracker/DateMonthRouter.java b/search-services/alfresco-search/src/main/java/org/alfresco/solr/tracker/DateMonthRouter.java index a054bc0f2..b6302b743 100644 --- a/search-services/alfresco-search/src/main/java/org/alfresco/solr/tracker/DateMonthRouter.java +++ b/search-services/alfresco-search/src/main/java/org/alfresco/solr/tracker/DateMonthRouter.java @@ -19,19 +19,44 @@ package org.alfresco.solr.tracker; import org.alfresco.util.ISO8601DateFormat; -import org.apache.solr.common.util.Hash; import org.alfresco.solr.client.Node; import org.alfresco.solr.client.Acl; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import java.util.Calendar; import java.util.Date; import java.util.GregorianCalendar; -/* -* @author Joel -*/ - +/** + * The date-based sharding assigns dates sequentially through shards based on the month. + * For example: If there are 12 shards, each month would be assigned sequentially to each shard, wrapping round and + * starting again for each year. + * The non-random assignment facilitates easier shard management - dropping shards or scaling out replication for some + * date range. + * Typical ageing strategies could be based on the created date or destruction date. + * + * Each shard contains copies of all the ACL information, so this information is replicated in each shard. + * However, if the property is not present on a node, sharding falls back to the {@link DBIDRouter} to randomly distribute + * these nodes. + * + * To use this method, when creating a shard add the new configuration properties: + * + * + * + * Months can be grouped together, for example, by quarter. Each quarter of data would be assigned sequentially through the available shards. + * + * + * + * @see Search Services sharding methods + */ public class DateMonthRouter implements DocRouter { protected final static Logger log = LoggerFactory.getLogger(DateMonthRouter.class); @@ -43,38 +68,52 @@ public class DateMonthRouter implements DocRouter * Creates a date month router * @param groupparam - the number of months that should be grouped together on a shard before moving to use the next shard in sequence */ - public DateMonthRouter(String groupparam) { - try { + public DateMonthRouter(String groupparam) + { + try + { this.grouping = Integer.parseInt(groupparam); - } catch (NumberFormatException e) { + } + catch (NumberFormatException e) + { log.error("shard.date.grouping needs to be a valid integer.", e); throw e; } } @Override - public boolean routeAcl(int numShards, int shardInstance, Acl acl) { + public Boolean routeAcl(int numShards, int shardInstance, Acl acl) + { return true; } @Override - public boolean routeNode(int numShards, int shardInstance, Node node) { - if(numShards <= 1) { + public Boolean routeNode(int numShards, int shardInstance, Node node) + { + if(numShards <= 1) + { return true; } String ISO8601Date = node.getShardPropertyValue(); - if(ISO8601Date == null) { + if(ISO8601Date == null) + { return dbidRouter.routeNode(numShards, shardInstance, node); } - Date date = ISO8601DateFormat.parse(ISO8601Date); - GregorianCalendar cal = new GregorianCalendar(); - cal.setTime(date); - int month = cal.get(cal.MONTH); - int year = cal.get(cal.YEAR); - return ((((year * 12) + month)/grouping) % numShards) == shardInstance; - + try + { + Date date = ISO8601DateFormat.parse(ISO8601Date); + GregorianCalendar cal = new GregorianCalendar(); + cal.setTime(date); + int month = cal.get(Calendar.MONTH); + int year = cal.get(Calendar.YEAR); + return ((((year * 12) + month) / grouping) % numShards) == shardInstance; + } + catch (Exception exception) + { + return dbidRouter.routeNode(numShards, shardInstance, node); + } } } \ No newline at end of file diff --git a/search-services/alfresco-search/src/main/java/org/alfresco/solr/tracker/DateQuarterRouter.java b/search-services/alfresco-search/src/main/java/org/alfresco/solr/tracker/DateQuarterRouter.java index cedfe416c..8a446c880 100644 --- a/search-services/alfresco-search/src/main/java/org/alfresco/solr/tracker/DateQuarterRouter.java +++ b/search-services/alfresco-search/src/main/java/org/alfresco/solr/tracker/DateQuarterRouter.java @@ -19,34 +19,42 @@ package org.alfresco.solr.tracker; import org.alfresco.util.ISO8601DateFormat; -import org.apache.solr.common.util.Hash; import org.alfresco.solr.client.Node; import org.alfresco.solr.client.Acl; + +import java.util.Calendar; import java.util.Date; import java.util.GregorianCalendar; -/* -* @author Joel -*/ - +/** + * This {@link DocRouter} has been deprecated because it is a special case of {@link DateMonthRouter} with a grouping + * parameter equal to 3. + * + * @see DateMonthRouter + * @see Search Services sharding methods + */ +@Deprecated public class DateQuarterRouter implements DocRouter { - public boolean routeAcl(int numShards, int shardInstance, Acl acl) { + @Override + public Boolean routeAcl(int numShards, int shardInstance, Acl acl) + { return true; } - public boolean routeNode(int numShards, int shardInstance, Node node) { - if(numShards <= 1) { + public Boolean routeNode(int numShards, int shardInstance, Node node) + { + if(numShards <= 1) + { return true; } String ISO8601Date = node.getShardPropertyValue(); - //TODO: we can parse the string to make this more efficient rather then creating a calendar. Date date = ISO8601DateFormat.parse(ISO8601Date); - GregorianCalendar cal = new GregorianCalendar(); - cal.setTime(date); - int month = cal.get(cal.MONTH); - int year = cal.get(cal.YEAR); + GregorianCalendar calendar = new GregorianCalendar(); + calendar.setTime(date); + int month = calendar.get(Calendar.MONTH); + int year = calendar.get(Calendar.YEAR); return Math.ceil(((year * 12) + (month+1)) / 3) % numShards == shardInstance; } } \ No newline at end of file diff --git a/search-services/alfresco-search/src/main/java/org/alfresco/solr/tracker/DocRouter.java b/search-services/alfresco-search/src/main/java/org/alfresco/solr/tracker/DocRouter.java index 16f0b7990..1370ba440 100644 --- a/search-services/alfresco-search/src/main/java/org/alfresco/solr/tracker/DocRouter.java +++ b/search-services/alfresco-search/src/main/java/org/alfresco/solr/tracker/DocRouter.java @@ -18,16 +18,44 @@ */ package org.alfresco.solr.tracker; - import org.alfresco.solr.client.Node; import org.alfresco.solr.client.Acl; -/* - * This tracks two things: transactions and metadata nodes +/** + * Defines the logic used for distributing data across the shards. + * A {@link DocRouter} implementor instance is properly configured on each shard. + * Each time an incoming document D arrives to the shard S, the DocRouter (on the S instance) + * will be used for deciding if D needs to be managed (i.e. indexed) by S. + * + * The {@link DocRouter} contract requires a concrete implementor to provide the logic for + * understanding: + * + *
  • + * + * + *
  • + * * @author Joel */ public interface DocRouter { - public boolean routeAcl(int shardCount, int shardInstance, Acl acl); - public boolean routeNode(int shardCount, int shardInstance, Node node); + /** + * Checks if the incoming ACL document must be indexed on this shard. + * + * @param shardCount the total shard count. + * @param shardInstance the owning shard instance (i.e. instance number). + * @param acl the ACL. + * @return true if the ACL must be indexed in the shard which owns this {@link DocRouter} instance, false otherwise. + */ + Boolean routeAcl(int shardCount, int shardInstance, Acl acl); + + /** + * Checks if the incoming Node must be indexed on this shard. + * + * @param shardCount the total shard count. + * @param shardInstance the owning shard instance (i.e. instance number). + * @param node the {@link Node} instance. + * @return true if the {@link Node} instance must be indexed in the shard which owns this {@link DocRouter} instance, false otherwise. + */ + Boolean routeNode(int shardCount, int shardInstance, Node node); } \ No newline at end of file diff --git a/search-services/alfresco-search/src/main/java/org/alfresco/solr/tracker/DocRouterFactory.java b/search-services/alfresco-search/src/main/java/org/alfresco/solr/tracker/DocRouterFactory.java index 703a99c33..1399a6316 100644 --- a/search-services/alfresco-search/src/main/java/org/alfresco/solr/tracker/DocRouterFactory.java +++ b/search-services/alfresco-search/src/main/java/org/alfresco/solr/tracker/DocRouterFactory.java @@ -24,55 +24,59 @@ import org.slf4j.LoggerFactory; import java.util.Properties; -/* +/** + * Routing strategy Factory. + * * @author Joel */ - public class DocRouterFactory { - protected final static Logger log = LoggerFactory.getLogger(DocRouterFactory.class); + private final static Logger LOGGER = LoggerFactory.getLogger(DocRouterFactory.class); - public static DocRouter getRouter(Properties properties, ShardMethodEnum method) { - - switch(method) { + public static DocRouter getRouter(Properties properties, ShardMethodEnum method) + { + switch(method) + { case DB_ID: - log.info("Sharding via DB_ID"); + LOGGER.info("Sharding via DB_ID"); return new DBIDRouter(); case DB_ID_RANGE: - // if(properties.containsKey("shard.range")) { - log.info("Sharding via DB_ID_RANGE"); + LOGGER.info("Sharding via DB_ID_RANGE"); String[] pair =properties.getProperty("shard.range").split("-"); long start = Long.parseLong(pair[0]); long end = Long.parseLong(pair[1]); return new DBIDRangeRouter(start, end); } case ACL_ID: - log.info("Sharding via ACL_ID"); + LOGGER.info("Sharding via ACL_ID"); return new ACLIDMurmurRouter(); case MOD_ACL_ID: - log.info("Sharding via MOD_ACL_ID"); + LOGGER.info("Sharding via MOD_ACL_ID"); return new ACLIDModRouter(); case DATE: - log.info("Sharding via DATE"); + LOGGER.info("Sharding via DATE"); return new DateMonthRouter(properties.getProperty("shard.date.grouping", "1")); case PROPERTY: - log.info("Sharding via PROPERTY"); + LOGGER.info("Sharding via PROPERTY"); return new PropertyRouter(properties.getProperty("shard.regex", "")); case LAST_REGISTERED_INDEXING_SHARD: - log.info("Sharding via LAST_REGISTERED_INDEXING_SHARD"); - return new LastRegisteredShardRouter(); + LOGGER.info("Sharding via LAST_REGISTERED_INDEXING_SHARD"); + return new ExplicitShardIdWithStaticPropertyRouter(); case EXPLICIT_ID_FALLBACK_LRIS: - log.info("Sharding via EXPLICIT_ID_FALLBACK_LRIS"); - return new ExplicitRouter(new LastRegisteredShardRouter()); + LOGGER.info("Sharding via EXPLICIT_ID_FALLBACK_LRIS"); + return new DocRouterWithFallback( + new ExplicitShardIdWithDynamicPropertyRouter(false), + new ExplicitShardIdWithStaticPropertyRouter()); case EXPLICIT_ID: - log.info("Sharding via EXPLICIT_ID"); - return new ExplicitRouter(new DBIDRouter()); + LOGGER.info("Sharding via EXPLICIT_ID"); + return new DocRouterWithFallback( + new ExplicitShardIdWithDynamicPropertyRouter(false), + new DBIDRouter()); default: - log.info("Sharding via DB_ID (default)"); + LOGGER.warn("WARNING! Unknown/unsupported sharding method ({}). System will fallback to DB_ID", method); return new DBIDRouter(); } } - } \ No newline at end of file diff --git a/search-services/alfresco-search/src/main/java/org/alfresco/solr/tracker/DocRouterWithFallback.java b/search-services/alfresco-search/src/main/java/org/alfresco/solr/tracker/DocRouterWithFallback.java new file mode 100644 index 000000000..637ad9e5c --- /dev/null +++ b/search-services/alfresco-search/src/main/java/org/alfresco/solr/tracker/DocRouterWithFallback.java @@ -0,0 +1,48 @@ +package org.alfresco.solr.tracker; + +import org.alfresco.solr.client.Acl; +import org.alfresco.solr.client.Node; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.Objects; + +import static java.util.Optional.ofNullable; + +/** + * A composable {@link DocRouter} which consists of + * + * + * + * @author agazzarini + */ +public class DocRouterWithFallback implements DocRouter +{ + private final static Logger LOGGER = LoggerFactory.getLogger(ExplicitShardIdWithDynamicPropertyRouter.class); + + private final DocRouter primaryStrategy; + private final DocRouter fallbackStrategy; + + public DocRouterWithFallback(DocRouter primaryStrategy, DocRouter fallbackStrategy) + { + this.primaryStrategy = Objects.requireNonNull(primaryStrategy); + this.fallbackStrategy = Objects.requireNonNull(fallbackStrategy); + } + + @Override + public Boolean routeAcl(int shardCount, int shardInstance, Acl acl) + { + return primaryStrategy.routeAcl(shardCount, shardInstance, acl); + } + + @Override + public Boolean routeNode(int shardCount, int shardInstance, Node node) + { + return ofNullable(primaryStrategy.routeNode(shardCount, shardInstance, node)) + .orElseGet(() -> ofNullable(fallbackStrategy.routeNode(shardCount, shardInstance, node)) + .orElse(false)); + } +} diff --git a/search-services/alfresco-search/src/main/java/org/alfresco/solr/tracker/ExplicitRouter.java b/search-services/alfresco-search/src/main/java/org/alfresco/solr/tracker/ExplicitRouter.java deleted file mode 100644 index 5ba5c42be..000000000 --- a/search-services/alfresco-search/src/main/java/org/alfresco/solr/tracker/ExplicitRouter.java +++ /dev/null @@ -1,78 +0,0 @@ -/* - * Copyright (C) 2005-2019 Alfresco Software Limited. - * - * This file is part of Alfresco - * - * Alfresco is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Alfresco is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with Alfresco. If not, see . - */ -package org.alfresco.solr.tracker; - -import org.alfresco.solr.client.Acl; -import org.alfresco.solr.client.Node; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** - * Routes a document only if the shardInstance matches the provided shardId - */ -public class ExplicitRouter implements DocRouter { - - protected final static Logger log = LoggerFactory.getLogger(ExplicitRouter.class); - private final DocRouter fallbackRouter; - - public ExplicitRouter(DocRouter fallbackRouter) { - this.fallbackRouter = fallbackRouter; - } - - @Override - public boolean routeAcl(int shardCount, int shardInstance, Acl acl) { - //all acls go to all shards. - return true; - } - - @Override - public boolean routeNode(int shardCount, int shardInstance, Node node) { - - String shardBy = node.getShardPropertyValue(); - - if (shardBy != null && !shardBy.isEmpty()) - { - try - { - int shardid = Integer.parseInt(shardBy); - return shardid == shardInstance; - } - catch (NumberFormatException e) - { - if (log.isDebugEnabled()) - { - log.debug("Shard "+shardInstance+" EXPLICIT_ID routing specified but failed to parse a shard property value ("+shardBy+") for node "+node.getNodeRef()); - } - } - } - else - { - if (log.isDebugEnabled()) - { - log.debug("Shard "+shardInstance+" EXPLICIT_ID routing specified but no shard id property found for node "+node.getNodeRef()); - } - } - - if (log.isDebugEnabled()) - { - log.debug("Shard "+shardInstance+" falling back to DBID routing for node "+node.getNodeRef()); - } - return fallbackRouter.routeNode(shardCount, shardInstance, node); - } -} diff --git a/search-services/alfresco-search/src/main/java/org/alfresco/solr/tracker/ExplicitShardIdWithDynamicPropertyRouter.java b/search-services/alfresco-search/src/main/java/org/alfresco/solr/tracker/ExplicitShardIdWithDynamicPropertyRouter.java new file mode 100644 index 000000000..ad208df53 --- /dev/null +++ b/search-services/alfresco-search/src/main/java/org/alfresco/solr/tracker/ExplicitShardIdWithDynamicPropertyRouter.java @@ -0,0 +1,75 @@ +/* + * Copyright (C) 2005-2019 Alfresco Software Limited. + * + * This file is part of Alfresco + * + * Alfresco is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Alfresco is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with Alfresco. If not, see . + */ +package org.alfresco.solr.tracker; + +import org.alfresco.solr.client.Acl; +import org.alfresco.solr.client.Node; + +/** + * Routes a document only if the shardInstance matches the provided shardId. + * The access control information is duplicated in each shard. + * The target shard identifier is provided using a (configurable) property of the incoming document. + * + * @author agazzarini + * @see Search Services sharding methods + */ +public class ExplicitShardIdWithDynamicPropertyRouter extends ComposableDocRouter +{ + public ExplicitShardIdWithDynamicPropertyRouter() + { + super(); + } + + public ExplicitShardIdWithDynamicPropertyRouter(boolean isInStandaloneMode) + { + super(isInStandaloneMode); + } + + @Override + public Boolean routeAcl(int shardCount, int shardInstance, Acl acl) + { + return true; + } + + @Override + public Boolean routeNode(int shardCount, int shardInstance, Node node) + { + String shardBy = node.getShardPropertyValue(); + if (shardBy == null || shardBy.trim().length() == 0) + { + debug("Shard {}: EXPLICIT_ID routing specified but no shard id property found for node {}", shardInstance, node.getNodeRef()); + return negativeReturnValue(); + } + + try + { + int shardid = Integer.parseInt(shardBy.trim()); + return shardid == shardInstance; + + } + catch (NumberFormatException exception) + { + debug("Shard {} EXPLICIT_ID routing specified but failed to parse a shard property value ({}) for node {}", + shardInstance, + shardBy, + node.getNodeRef()); + return negativeReturnValue(); + } + } +} diff --git a/search-services/alfresco-search/src/main/java/org/alfresco/solr/tracker/LastRegisteredShardRouter.java b/search-services/alfresco-search/src/main/java/org/alfresco/solr/tracker/ExplicitShardIdWithStaticPropertyRouter.java similarity index 60% rename from search-services/alfresco-search/src/main/java/org/alfresco/solr/tracker/LastRegisteredShardRouter.java rename to search-services/alfresco-search/src/main/java/org/alfresco/solr/tracker/ExplicitShardIdWithStaticPropertyRouter.java index ff109aeb7..62e847029 100644 --- a/search-services/alfresco-search/src/main/java/org/alfresco/solr/tracker/LastRegisteredShardRouter.java +++ b/search-services/alfresco-search/src/main/java/org/alfresco/solr/tracker/ExplicitShardIdWithStaticPropertyRouter.java @@ -24,39 +24,43 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; /** - * Routes a document only if the explicitShardId matches the provided shardId + * Routes the incoming nodes (not ACLs!) on the last registered indexing shard (LRIS). + * The access control information is duplicated in each shard. * * @author Elia + * @author agazzarini */ -public class LastRegisteredShardRouter implements DocRouter +public class ExplicitShardIdWithStaticPropertyRouter extends ComposableDocRouter { + private final static Logger log = LoggerFactory.getLogger(ExplicitShardIdWithStaticPropertyRouter.class); - protected final static Logger log = LoggerFactory.getLogger(ExplicitRouter.class); - - public LastRegisteredShardRouter() + public ExplicitShardIdWithStaticPropertyRouter() { + super(); + } + + public ExplicitShardIdWithStaticPropertyRouter(boolean isInStandaloneMode) + { + super(isInStandaloneMode); } @Override - public boolean routeAcl(int shardCount, int shardInstance, Acl acl) + public Boolean routeAcl(int shardCount, int shardInstance, Acl acl) { - //all acls go to all shards. return true; } @Override - public boolean routeNode(int shardCount, int shardInstance, Node node) + public Boolean routeNode(int shardCount, int shardInstance, Node node) { - Integer explicitShardId = node.getExplicitShardId(); if (explicitShardId == null) { - log.error("explicitShardId is not set for node " + node.getNodeRef()); - return false; + debug("ExplicitShardId property is not set for node {} ", node.getNodeRef()); + return negativeReturnValue(); } return explicitShardId.equals(shardInstance); - } -} +} \ No newline at end of file diff --git a/search-services/alfresco-search/src/main/java/org/alfresco/solr/tracker/PropertyRouter.java b/search-services/alfresco-search/src/main/java/org/alfresco/solr/tracker/PropertyRouter.java index 897fcd222..b482d74d3 100644 --- a/search-services/alfresco-search/src/main/java/org/alfresco/solr/tracker/PropertyRouter.java +++ b/search-services/alfresco-search/src/main/java/org/alfresco/solr/tracker/PropertyRouter.java @@ -30,47 +30,67 @@ import java.util.regex.Pattern; /** * Routes based on a text property field. + * In this method, the value of some property is hashed and this hash is used to assign the node to a random shard. + * All nodes with the same property value will be assigned to the same shard. + * Each shard will duplicate all the ACL information. + * + * To use this method, when creating a shard add the new configuration properties: + * + * + * + * It is possible to extract a part of the property value to use for sharding using a regular expression, + * for example, a year at the start of a string: + * + * * * @author Gethin James + * @see Search Services sharding methods */ public class PropertyRouter implements DocRouter { protected final static Logger log = LoggerFactory.getLogger(PropertyRouter.class); - Pattern pattern = null; + Pattern pattern; //Fallback to DB_ID routing - private DocRouter fallback = DocRouterFactory.getRouter(null, ShardMethodEnum.DB_ID); + DocRouter fallback = DocRouterFactory.getRouter(null, ShardMethodEnum.DB_ID); public PropertyRouter(String propertyRegEx) { - if (propertyRegEx != null && !propertyRegEx.isEmpty()) + if (propertyRegEx != null && propertyRegEx.trim().length() > 0) { - pattern = Pattern.compile(propertyRegEx); + pattern = Pattern.compile(propertyRegEx.trim()); } } @Override - public boolean routeAcl(int shardCount, int shardInstance, Acl acl) + public Boolean routeAcl(int shardCount, int shardInstance, Acl acl) { return true; } @Override - public boolean routeNode(int shardCount, int shardInstance, Node node) + public Boolean routeNode(int shardCount, int shardInstance, Node node) { if(shardCount <= 1) { return true; } - String shardBy = node.getShardPropertyValue(); + String shardBy = node.getShardPropertyValue(); if (shardBy !=null && pattern != null) { try { Matcher matcher = pattern.matcher(shardBy); - if (matcher.find() && !matcher.group(1).isEmpty()) + if (matcher.find() && matcher.groupCount() > 1 && !matcher.group(1).isEmpty()) { shardBy = matcher.group(1); } diff --git a/search-services/alfresco-search/src/test/java/org/alfresco/solr/AlfrescoSolrUtils.java b/search-services/alfresco-search/src/test/java/org/alfresco/solr/AlfrescoSolrUtils.java index 5d129568d..174939124 100644 --- a/search-services/alfresco-search/src/test/java/org/alfresco/solr/AlfrescoSolrUtils.java +++ b/search-services/alfresco-search/src/test/java/org/alfresco/solr/AlfrescoSolrUtils.java @@ -65,6 +65,7 @@ import java.util.LinkedHashMap; import java.util.List; import java.util.Locale; import java.util.Map; +import java.util.Random; import java.util.Set; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicLong; @@ -104,14 +105,16 @@ import org.apache.solr.response.SolrQueryResponse; import org.apache.solr.update.AddUpdateCommand; import org.apache.solr.update.CommitUpdateCommand; /** - * Alfresco Solr Utility class which provide helper methods. - * @author Michael Suzuki + * Alfresco Solr Test Utility class which provide helper methods. * + * @author Michael Suzuki + * @author Andrea Gazzarini */ public class AlfrescoSolrUtils { public static final String TEST_NAMESPACE = "http://www.alfresco.org/test/solrtest"; public static long MAX_WAIT_TIME = 80000; + public static Random RANDOMIZER = new Random(); /** * Get transaction. @@ -141,6 +144,25 @@ public class AlfrescoSolrUtils return transaction; } + /** + * Returns a pseudo-random number of shards always greater than 1. + * + * @return a pseudo-random number of shards always greater than 1. + */ + public static int randomShardCountGreaterThanOne() + { + return randomPositiveInteger() + 2; + } + + /** + * Returns a pseudo-random number of shards always greater than 1. + * + * @return a pseudo-random number of shards always greater than 1. + */ + public static int randomPositiveInteger() + { + return RANDOMIZER.nextInt(100); + } /** * Get a node. diff --git a/search-services/alfresco-search/src/test/java/org/alfresco/solr/tracker/ACLIDMurmurRouterTest.java b/search-services/alfresco-search/src/test/java/org/alfresco/solr/tracker/ACLIDMurmurRouterTest.java new file mode 100644 index 000000000..d670b2eca --- /dev/null +++ b/search-services/alfresco-search/src/test/java/org/alfresco/solr/tracker/ACLIDMurmurRouterTest.java @@ -0,0 +1,131 @@ +/* + * Copyright (C) 2005-2014 Alfresco Software Limited. + * + * This file is part of Alfresco + * + * Alfresco is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Alfresco is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with Alfresco. If not, see . + */ +package org.alfresco.solr.tracker; + +import static java.util.Arrays.stream; +import static java.util.stream.IntStream.range; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; +import static org.alfresco.solr.AlfrescoSolrUtils.randomPositiveInteger; + +import org.alfresco.repo.index.shard.ShardMethodEnum; +import org.alfresco.solr.client.Acl; +import org.alfresco.solr.client.Node; +import org.apache.commons.math3.stat.descriptive.moment.StandardDeviation; +import org.junit.Before; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.mockito.Mock; +import org.mockito.junit.MockitoJUnitRunner; + +import java.util.HashMap; +import java.util.Map; +import java.util.Properties; + +@RunWith(MockitoJUnitRunner.class) +public class ACLIDMurmurRouterTest +{ + private DocRouter router; + + @Mock + private Acl acl; + + @Mock + private Node node; + + @Before + public void setUp() + { + router = DocRouterFactory.getRouter(new Properties(), ShardMethodEnum.ACL_ID); + } + + @Test + public void negativeShardCount_shouldAlwaysReturnTrue() + { + // Should never happen + int negativeShardCount = -14; + + assertTrue(router.routeAcl(negativeShardCount, 1, acl)); + assertTrue(router.routeNode(negativeShardCount, 1, node)); + } + + @Test + public void zeroShardCount_shouldAlwaysReturnTrue() + { + // Should never happen + int zeroShardCount = 0; + + assertTrue(router.routeAcl(zeroShardCount, 1, acl)); + assertTrue(router.routeNode(zeroShardCount, 1, node)); + } + + @Test + public void oneShardInTheCluster_shouldAlwaysReturnTrue() + { + // Should never happen + int zeroShardCount = 0; + + assertTrue(router.routeAcl(zeroShardCount, 1, acl)); + assertTrue(router.routeNode(zeroShardCount, 1, node)); + } + + @Test + public void sevenShardsInTheCluster_shouldBalanceNodesAndAcls() + { + int [] shardIdentifiers = range(0,7).toArray(); + int shardCount = shardIdentifiers.length; + int howManyDocumentsPerShard = 1000; + + // Maps used for validating the data distribution + Map aclDistributionMap = new HashMap<>(); + Map nodeDistributionMap = new HashMap<>(); + + range(0, shardCount * howManyDocumentsPerShard) + .mapToLong(Long::valueOf) + .forEach(id -> { + Acl acl = new Acl(randomPositiveInteger(), id); + Node node = new Node(); + node.setAclId(acl.getId()); + + stream(shardIdentifiers) + .forEach(shardId -> { + if (router.routeAcl(shardCount, shardId, acl)) + { + aclDistributionMap.merge(shardId, 1, Integer::sum); + } + + if (router.routeNode(shardCount, shardId, node)) + { + nodeDistributionMap.merge(shardId, 1, Integer::sum); + } + }); + }); + + StandardDeviation sd = new StandardDeviation(); + double aclsDeviation = sd.evaluate(aclDistributionMap.values().stream().mapToDouble(Number::doubleValue).toArray()); + double nodesDeviation = sd.evaluate(nodeDistributionMap.values().stream().mapToDouble(Number::doubleValue).toArray()); + + assertEquals(shardIdentifiers.length, nodeDistributionMap.size()); + assertEquals(shardIdentifiers.length, aclDistributionMap.size()); + + // Asserts the standard deviation of the distribution map is in percentage lesser than 30% + assertTrue(aclDistributionMap.values().toString() + ", SD = " + aclsDeviation, aclsDeviation/(howManyDocumentsPerShard) * 100 < 30); + assertTrue(nodeDistributionMap.values().toString() + ", SD = " + nodesDeviation,nodesDeviation/(howManyDocumentsPerShard) * 100 < 30); + } +} \ No newline at end of file diff --git a/search-services/alfresco-search/src/test/java/org/alfresco/solr/tracker/AclModCountRouterTest.java b/search-services/alfresco-search/src/test/java/org/alfresco/solr/tracker/AclModCountRouterTest.java new file mode 100644 index 000000000..c903cd8f3 --- /dev/null +++ b/search-services/alfresco-search/src/test/java/org/alfresco/solr/tracker/AclModCountRouterTest.java @@ -0,0 +1,125 @@ +/* + * Copyright (C) 2005-2014 Alfresco Software Limited. + * + * This file is part of Alfresco + * + * Alfresco is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Alfresco is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with Alfresco. If not, see . + */ +package org.alfresco.solr.tracker; + +import static java.util.Arrays.stream; +import static java.util.stream.IntStream.range; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; +import static org.mockito.Mockito.reset; +import static org.mockito.Mockito.when; + +import org.alfresco.repo.index.shard.ShardMethodEnum; +import org.alfresco.solr.client.Acl; +import org.alfresco.solr.client.Node; +import org.junit.Before; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.mockito.Mock; +import org.mockito.junit.MockitoJUnitRunner; + +import java.util.HashMap; +import java.util.Map; +import java.util.Properties; + +@RunWith(MockitoJUnitRunner.class) +public class AclModCountRouterTest +{ + private DocRouter router; + + @Mock + private Acl acl; + + @Mock + private Node node; + + @Before + public void setUp() + { + router = DocRouterFactory.getRouter(new Properties(), ShardMethodEnum.MOD_ACL_ID); + } + + @Test + public void negativeShardCount_shouldAlwaysReturnTrue() + { + // Should never happen + int negativeShardCount = -14; + + assertTrue(router.routeAcl(negativeShardCount, 1, acl)); + assertTrue(router.routeNode(negativeShardCount, 1, node)); + } + + @Test + public void zeroShardCount_shouldAlwaysReturnTrue() + { + // Should never happen + int zeroShardCount = 0; + + assertTrue(router.routeAcl(zeroShardCount, 1, acl)); + assertTrue(router.routeNode(zeroShardCount, 1, node)); + } + + @Test + public void oneShardInTheCluster_shouldAlwaysReturnTrue() + { + // Should never happen + int zeroShardCount = 0; + + assertTrue(router.routeAcl(zeroShardCount, 1, acl)); + assertTrue(router.routeNode(zeroShardCount, 1, node)); + } + + @Test + public void sevenShardsInTheCluster_shouldBalanceNodesAndAcls() + { + int [] shardIdentifiers = range(0,7).toArray(); + int shardCount = shardIdentifiers.length; + int howManyDocumentsPerShard = 100; + + // Maps used for validating the data distribution + Map aclDistributionMap = new HashMap<>(); + Map nodeDistributionMap = new HashMap<>(); + + range(0, shardCount * howManyDocumentsPerShard) + .mapToLong(Long::valueOf) + .forEach(id -> { + when(acl.getId()).thenReturn(id); + when(node.getAclId()).thenReturn(id); + stream(shardIdentifiers) + .forEach(shardId -> { + if (router.routeAcl(shardCount, shardId, acl)) + { + aclDistributionMap.merge(shardId, 1, Integer::sum); + } + + if (router.routeNode(shardCount, shardId, node)) + { + nodeDistributionMap.merge(shardId, 1, Integer::sum); + } + }); + reset(acl, node); + }); + + assertEquals(shardIdentifiers.length, aclDistributionMap.size()); + aclDistributionMap.forEach((k, v) -> assertEquals(howManyDocumentsPerShard, v.intValue())); + + assertEquals(shardIdentifiers.length, nodeDistributionMap.size()); + nodeDistributionMap.forEach((k, v) -> assertEquals(howManyDocumentsPerShard, v.intValue())); + } +} \ No newline at end of file diff --git a/search-services/alfresco-search/src/test/java/org/alfresco/solr/tracker/DBIDRangeRouterTest.java b/search-services/alfresco-search/src/test/java/org/alfresco/solr/tracker/DBIDRangeRouterTest.java new file mode 100644 index 000000000..a293df87b --- /dev/null +++ b/search-services/alfresco-search/src/test/java/org/alfresco/solr/tracker/DBIDRangeRouterTest.java @@ -0,0 +1,88 @@ +/* + * Copyright (C) 2005-2014 Alfresco Software Limited. + * + * This file is part of Alfresco + * + * Alfresco is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Alfresco is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with Alfresco. If not, see . + */ +package org.alfresco.solr.tracker; + +import org.alfresco.repo.index.shard.ShardMethodEnum; +import org.alfresco.solr.client.Acl; +import org.alfresco.solr.client.Node; +import org.junit.Before; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.mockito.Mock; +import org.mockito.junit.MockitoJUnitRunner; + +import java.util.Properties; +import java.util.Random; + +import static java.util.stream.IntStream.range; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; +import static org.mockito.Mockito.when; + +/** + * {@link DBIDRangeRouter} test case. + * + * @author agazzarini + */ +@RunWith(MockitoJUnitRunner.class) +public class DBIDRangeRouterTest +{ + private final Random randomizer = new Random(); + private DocRouter router; + + @Mock + private Acl acl; + + @Mock + private Node node; + + @Before + public void setUp() + { + Properties properties = new Properties(); + properties.put("shard.range", "200-20000"); + router = DocRouterFactory.getRouter(properties, ShardMethodEnum.DB_ID_RANGE); + } + + @Test + public void aclsAreReplicatedAcrossShards() + { + range(0, 100).forEach(index -> assertTrue(router.routeAcl(randomizer.nextInt(), randomizer.nextInt(), acl))); + } + + @Test + public void outOfBoundsShouldRejectTheNode() + { + when(node.getId()).thenReturn(199L); + assertFalse(router.routeNode(randomizer.nextInt(), randomizer.nextInt(), node)); + + when(node.getId()).thenReturn(20000L); + assertFalse(router.routeNode(randomizer.nextInt(), randomizer.nextInt(), node)); + } + + @Test + public void inRange_shouldAcceptTheNode() + { + when(node.getId()).thenReturn(200L); + assertTrue(router.routeNode(randomizer.nextInt(), randomizer.nextInt(), node)); + + when(node.getId()).thenReturn(543L); + assertTrue(router.routeNode(randomizer.nextInt(), randomizer.nextInt(), node)); + } +} \ No newline at end of file diff --git a/search-services/alfresco-search/src/test/java/org/alfresco/solr/tracker/DBIDRouterTest.java b/search-services/alfresco-search/src/test/java/org/alfresco/solr/tracker/DBIDRouterTest.java new file mode 100644 index 000000000..cb7e4837b --- /dev/null +++ b/search-services/alfresco-search/src/test/java/org/alfresco/solr/tracker/DBIDRouterTest.java @@ -0,0 +1,131 @@ +/* + * Copyright (C) 2005-2014 Alfresco Software Limited. + * + * This file is part of Alfresco + * + * Alfresco is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Alfresco is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with Alfresco. If not, see . + */ +package org.alfresco.solr.tracker; + +import static java.util.Arrays.stream; +import static java.util.stream.IntStream.range; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; +import static org.mockito.Mockito.reset; +import static org.mockito.Mockito.when; +import static org.alfresco.solr.AlfrescoSolrUtils.randomPositiveInteger; +import static org.alfresco.solr.AlfrescoSolrUtils.randomShardCountGreaterThanOne; + +import org.alfresco.repo.index.shard.ShardMethodEnum; +import org.alfresco.solr.client.Acl; +import org.alfresco.solr.client.Node; +import org.apache.commons.math3.stat.descriptive.moment.StandardDeviation; +import org.junit.Before; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.mockito.Mock; +import org.mockito.junit.MockitoJUnitRunner; + +import java.util.HashMap; +import java.util.Map; +import java.util.Properties; + +@RunWith(MockitoJUnitRunner.class) +public class DBIDRouterTest +{ + private DocRouter router; + + @Mock + private Acl acl; + + @Mock + private Node node; + + @Before + public void setUp() + { + router = DocRouterFactory.getRouter(new Properties(), ShardMethodEnum.DB_ID); + } + + @Test + public void negativeShardCount_shouldAlwaysReturnTrue() + { + // Should never happen + int negativeShardCount = -14; + + assertTrue(router.routeAcl(negativeShardCount, 1, acl)); + assertTrue(router.routeNode(negativeShardCount, 1, node)); + } + + @Test + public void zeroShardCount_shouldAlwaysReturnTrue() + { + // Should never happen + int zeroShardCount = 0; + + assertTrue(router.routeAcl(zeroShardCount, 1, acl)); + assertTrue(router.routeNode(zeroShardCount, 1, node)); + } + + @Test + public void oneShardInTheCluster_shouldAlwaysReturnTrue() + { + // Should never happen + int zeroShardCount = 0; + + assertTrue(router.routeAcl(zeroShardCount, 1, acl)); + assertTrue(router.routeNode(zeroShardCount, 1, node)); + } + + @Test + public void aclsAreReplicatedAcrossShards() + { + range(0, 100).forEach(index -> assertTrue(router.routeAcl(randomShardCountGreaterThanOne(), randomPositiveInteger(), acl))); + } + + @Test + public void multipleShardsInTheCluster_shouldBalanceNodes() + { + int [] shardIdentifiers = range(0,15).toArray(); + int shardCount = shardIdentifiers.length; + int howManyDocumentsPerShard = 10000; + + Map nodeDistributionMap = new HashMap<>(); + + range(0, shardCount * howManyDocumentsPerShard) + .mapToLong(Long::valueOf) + .forEach(id -> { + Node node = new Node(); + node.setId(id); + stream(shardIdentifiers) + .forEach(shardId -> { + if (router.routeNode(shardCount, shardId, node)) + { + nodeDistributionMap.merge(shardId, 1, Integer::sum); + } + }); + }); + + StandardDeviation sd = new StandardDeviation(); + double deviation = sd.evaluate(nodeDistributionMap.values().stream().mapToDouble(Number::doubleValue).toArray()); + double norm = deviation/(howManyDocumentsPerShard) * 100; + + assertEquals(shardIdentifiers.length, nodeDistributionMap.size()); + + // Asserts the standard deviation of the distribution map is in percentage lesser than 30% + assertTrue( + nodeDistributionMap.values().toString() + ", SD = " + deviation + ", SD_NORM = " + norm + "%", + norm < 30); + } +} \ No newline at end of file diff --git a/search-services/alfresco-search/src/test/java/org/alfresco/solr/tracker/DateMonthRouterTest.java b/search-services/alfresco-search/src/test/java/org/alfresco/solr/tracker/DateMonthRouterTest.java new file mode 100644 index 000000000..2432966d5 --- /dev/null +++ b/search-services/alfresco-search/src/test/java/org/alfresco/solr/tracker/DateMonthRouterTest.java @@ -0,0 +1,143 @@ +/* + * Copyright (C) 2005-2014 Alfresco Software Limited. + * + * This file is part of Alfresco + * + * Alfresco is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Alfresco is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with Alfresco. If not, see . + */ +package org.alfresco.solr.tracker; + +import static java.util.Arrays.stream; +import static java.util.stream.IntStream.range; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.when; +import static org.alfresco.solr.AlfrescoSolrUtils.randomShardCountGreaterThanOne; +import static org.alfresco.solr.AlfrescoSolrUtils.randomPositiveInteger; + +import org.alfresco.repo.index.shard.ShardMethodEnum; +import org.alfresco.solr.client.Acl; +import org.alfresco.solr.client.Node; +import org.alfresco.util.ISO8601DateFormat; +import org.apache.commons.math3.stat.descriptive.moment.StandardDeviation; +import org.junit.Before; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.mockito.Mock; +import org.mockito.junit.MockitoJUnitRunner; + +import java.util.Date; +import java.util.HashMap; +import java.util.Map; +import java.util.Properties; +import java.util.Random; + +@RunWith(MockitoJUnitRunner.class) +public class DateMonthRouterTest +{ + private Random randomizer = new Random(); + + private DocRouter router; + + @Mock + private Acl acl; + + @Mock + private Node node; + + @Before + public void setUp() + { + router = DocRouterFactory.getRouter(new Properties(), ShardMethodEnum.DATE); + } + + @Test + public void aclsAreReplicatedAcrossShards() + { + range(0, 100).forEach(index -> assertTrue(router.routeAcl(randomizer.nextInt(), randomizer.nextInt(), acl))); + } + + @Test + public void invalidDate_shouldFallBackToDBIDRouting() + { + DBIDRouter fallbackRouting = mock(DBIDRouter.class); + ((DateMonthRouter)router).dbidRouter = fallbackRouting; + + when(node.getShardPropertyValue()).thenReturn("Something which is not an ISO Date"); + + int shardCount = randomShardCountGreaterThanOne(); + int shardInstance = randomPositiveInteger(); + + router.routeNode(shardCount, shardInstance, node); + + verify(fallbackRouting).routeNode(shardCount, shardInstance, node); + } + + @Test + public void nullDate_shouldFallBackToDBIDRouting() + { + DBIDRouter fallbackRouting = mock(DBIDRouter.class); + ((DateMonthRouter)router).dbidRouter = fallbackRouting; + + when(node.getShardPropertyValue()).thenReturn(null); + + int shardCount = randomShardCountGreaterThanOne(); + int shardInstance = randomPositiveInteger(); + + router.routeNode(shardCount, shardInstance, node); + + verify(fallbackRouting).routeNode(shardCount, shardInstance, node); + } + + @Test + public void twelveShardsInTheCluster_shouldBalanceNodes() + { + int [] shardIdentifiers = range(0,12).toArray(); + int shardCount = shardIdentifiers.length; + + router.routeNode(shardCount, 0, node); + int howManyDocuments = shardCount * 10000; + + Map nodeDistributionMap = new HashMap<>(); + + range(0, howManyDocuments) + .mapToLong(Long::valueOf) + .forEach(id -> { + String date = ISO8601DateFormat.format(new Date(System.currentTimeMillis() + id * (1000L * 60 * 60 * 24 * 30))); + Node node = new Node(); + node.setShardPropertyValue(date); + + stream(shardIdentifiers) + .forEach(shardId -> { + if (router.routeNode(shardCount, shardId, node)) + { + nodeDistributionMap.merge(shardId, 1, Integer::sum); + } + }); + }); + + StandardDeviation sd = new StandardDeviation(); + double deviation = sd.evaluate(nodeDistributionMap.values().stream().mapToDouble(Number::doubleValue).toArray()); + + assertEquals(shardIdentifiers.length, nodeDistributionMap.size()); + + // Asserts the standard deviation of the distribution map is in percentage lesser than 30% + assertTrue( + nodeDistributionMap.values().toString() + ", SD = " + deviation, + deviation/(howManyDocuments/shardCount) * 100 < 30); + + } +} \ No newline at end of file diff --git a/search-services/alfresco-search/src/test/java/org/alfresco/solr/tracker/DistributedLastRegisteredShardRouterTest.java b/search-services/alfresco-search/src/test/java/org/alfresco/solr/tracker/DistributedExplicitShardIdWithStaticPropertyRouterTest.java similarity index 78% rename from search-services/alfresco-search/src/test/java/org/alfresco/solr/tracker/DistributedLastRegisteredShardRouterTest.java rename to search-services/alfresco-search/src/test/java/org/alfresco/solr/tracker/DistributedExplicitShardIdWithStaticPropertyRouterTest.java index 48692021a..115d81029 100644 --- a/search-services/alfresco-search/src/test/java/org/alfresco/solr/tracker/DistributedLastRegisteredShardRouterTest.java +++ b/search-services/alfresco-search/src/test/java/org/alfresco/solr/tracker/DistributedExplicitShardIdWithStaticPropertyRouterTest.java @@ -21,7 +21,6 @@ package org.alfresco.solr.tracker; import java.util.Properties; import org.alfresco.model.ContentModel; import org.alfresco.repo.index.shard.ShardMethodEnum; -import org.alfresco.repo.search.adaptor.lucene.QueryConstants; import org.alfresco.solr.AbstractAlfrescoDistributedTest; import org.alfresco.solr.client.Acl; import org.alfresco.solr.client.AclChangeSet; @@ -30,12 +29,6 @@ import org.alfresco.solr.client.Node; import org.alfresco.solr.client.NodeMetaData; import org.alfresco.solr.client.StringPropertyValue; import org.alfresco.solr.client.Transaction; -import org.apache.lucene.index.Term; -import org.apache.lucene.search.BooleanClause; -import org.apache.lucene.search.BooleanQuery; -import org.apache.lucene.search.LegacyNumericRangeQuery; -import org.apache.lucene.search.Query; -import org.apache.lucene.search.TermQuery; import org.apache.lucene.util.LuceneTestCase; import org.apache.solr.SolrTestCaseJ4; import org.junit.AfterClass; @@ -60,7 +53,7 @@ import static org.carrot2.shaded.guava.common.collect.ImmutableList.of; @SolrTestCaseJ4.SuppressSSL @SolrTestCaseJ4.SuppressObjectReleaseTracker (bugUrl = "RAMDirectory") @LuceneTestCase.SuppressCodecs({"Appending","Lucene3x","Lucene40","Lucene41","Lucene42","Lucene43", "Lucene44", "Lucene45","Lucene46","Lucene47","Lucene48","Lucene49"}) -public class DistributedLastRegisteredShardRouterTest extends AbstractAlfrescoDistributedTest +public class DistributedExplicitShardIdWithStaticPropertyRouterTest extends AbstractAlfrescoDistributedTest { private static long MAX_WAIT_TIME = 80000; private final int timeout = 100000; @@ -68,7 +61,7 @@ public class DistributedLastRegisteredShardRouterTest extends AbstractAlfrescoDi @Before private void initData() throws Throwable { - initSolrServers(2, "DistributedLastRegisteredShardRoutingTest", getProperties()); + initSolrServers(2, getClass().getSimpleName(), getProperties()); indexData(); } @@ -78,32 +71,6 @@ public class DistributedLastRegisteredShardRouterTest extends AbstractAlfrescoDi dismissSolrServers(); } - - /** - * Setup, indexes and returns the ACL used within the tests. - * - * @return the ACL used within the test. - */ - private Acl getTestAcl() throws Exception - { - AclChangeSet aclChangeSet = getAclChangeSet(1); - Acl acl = getAcl(aclChangeSet); - AclReaders aclReaders = getAclReaders(aclChangeSet, acl, singletonList("joel"), singletonList("phil"), null); - - indexAclChangeSet(aclChangeSet, singletonList(acl), singletonList(aclReaders)); - - //Check for the ACL state stamp. - BooleanQuery.Builder builder = - new BooleanQuery.Builder() - .add(new BooleanClause(new TermQuery(new Term(QueryConstants.FIELD_SOLR4_ID, "TRACKER!STATE!ACLTX")), BooleanClause.Occur.MUST)) - .add(new BooleanClause(LegacyNumericRangeQuery.newLongRange( - QueryConstants.FIELD_S_ACLTXID, aclChangeSet.getId(), aclChangeSet.getId() + 1, true, false), BooleanClause.Occur.MUST)); - - Query waitForQuery = builder.build(); - waitForDocCount(waitForQuery, 1, MAX_WAIT_TIME); - return acl; - } - /** * Default data is indexed in solr. * 1 folder node with 2 children nodes. @@ -157,7 +124,6 @@ public class DistributedLastRegisteredShardRouterTest extends AbstractAlfrescoDi Node node4 = getNode(3, txn, acl, Node.SolrApiNodeStatus.UPDATED); node4.setExplicitShardId(1); NodeMetaData nodeMetaData4 = getNodeMetaData(node4, txn, acl, "elia", null, false); - nodeMetaData4.getProperties().put(ContentModel.PROP_NAME, new StringPropertyValue("second")); nodeMetaData4.getProperties().put(ContentModel.PROP_NAME, new StringPropertyValue("forth")); /* @@ -199,4 +165,4 @@ public class DistributedLastRegisteredShardRouterTest extends AbstractAlfrescoDi prop.put("shard.method", ShardMethodEnum.LAST_REGISTERED_INDEXING_SHARD.toString()); return prop; } -} +} \ No newline at end of file diff --git a/search-services/alfresco-search/src/test/java/org/alfresco/solr/tracker/ExplicitIDRouterTest.java b/search-services/alfresco-search/src/test/java/org/alfresco/solr/tracker/ExplicitIDRouterTest.java new file mode 100644 index 000000000..dd9a04333 --- /dev/null +++ b/search-services/alfresco-search/src/test/java/org/alfresco/solr/tracker/ExplicitIDRouterTest.java @@ -0,0 +1,177 @@ +/* + * Copyright (C) 2005-2014 Alfresco Software Limited. + * + * This file is part of Alfresco + * + * Alfresco is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Alfresco is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with Alfresco. If not, see . + */ +package org.alfresco.solr.tracker; + +import static java.util.Arrays.stream; +import static java.util.stream.IntStream.range; +import static org.alfresco.solr.AlfrescoSolrUtils.randomPositiveInteger; +import static org.alfresco.solr.AlfrescoSolrUtils.randomShardCountGreaterThanOne; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; +import static org.mockito.Mockito.when; + +import org.alfresco.repo.index.shard.ShardMethodEnum; +import org.alfresco.solr.client.Acl; +import org.alfresco.solr.client.Node; +import org.junit.Before; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.mockito.Mock; +import org.mockito.junit.MockitoJUnitRunner; + +import java.util.HashMap; +import java.util.Map; +import java.util.Properties; + +@RunWith(MockitoJUnitRunner.class) +public class ExplicitIDRouterTest +{ + private DocRouter router; + + @Mock + private Acl acl; + + @Mock + private Node node; + + @Before + public void setUp() + { + router = DocRouterFactory.getRouter(new Properties(), ShardMethodEnum.EXPLICIT_ID_FALLBACK_LRIS); + } + + @Test + public void aclsAreReplicatedAcrossShards() + { + range(0, 100).forEach(index -> + assertTrue(router.routeAcl(randomPositiveInteger(), randomPositiveInteger(), acl))); + } + + @Test + public void shardPropertyEmpty_shouldFallBackToLRIS() + { + Node node = new Node(); + node.setShardPropertyValue("\n\n\n\n \t\t"); + node.setExplicitShardId(4); + + assertDataIsDistributedAccordingWithLRISRouting(node); + } + + @Test + public void shardPropertyNull_shouldFallBackToLRIS() + { + Node node = new Node(); + node.setShardPropertyValue(null); + node.setExplicitShardId(4); + + assertDataIsDistributedAccordingWithLRISRouting(node); + } + + @Test + public void shardPropertyNaN_shouldFallBackToDBID() + { + Node node = new Node(); + node.setShardPropertyValue("This is not a valid Number that can be used as shard ID."); + node.setExplicitShardId(4); + + assertDataIsDistributedAccordingWithLRISRouting(node); + } + + @Test + public void dynamicAndStaticShardPropertyIsNotValid_shouldRejectTheNode() + { + Node node = new Node(); + node.setShardPropertyValue("This is not a valid Number that can be used as shard ID."); + node.setExplicitShardId(null); + + assertFalse(router.routeNode(randomShardCountGreaterThanOne(), randomPositiveInteger(), node)); + } + + @Test + public void explicitShardMatchesShardInstance() + { + int shardCount = 2; + int firstShardInstance = 0; + int secondShardInstance = 1; + + Node prototypeNodeOnFirstShard = new Node(); + prototypeNodeOnFirstShard.setShardPropertyValue(String.valueOf(firstShardInstance)); + + Node prototypeNodeOnSecondShard = new Node(); + prototypeNodeOnSecondShard.setShardPropertyValue(String.valueOf(secondShardInstance)); + + int howManyDocumentsPerShard = 1000; + Map nodeDistributionMap = new HashMap<>(); + + range(0,2).forEach(shardId -> + range(0, howManyDocumentsPerShard) + .forEach(index -> { + if (router.routeNode(shardCount, shardId, prototypeNodeOnFirstShard)) + { + nodeDistributionMap.merge(shardId, 1, Integer::sum); + } + + if (router.routeNode(shardCount, shardId, prototypeNodeOnSecondShard)) + { + nodeDistributionMap.merge(shardId, 1, Integer::sum); + } + })); + + assertEquals(shardCount, nodeDistributionMap.size()); + assertEquals(howManyDocumentsPerShard, nodeDistributionMap.get(firstShardInstance).intValue()); + assertEquals(howManyDocumentsPerShard, nodeDistributionMap.get(secondShardInstance).intValue()); + } + + @Test + public void explicitShardDoesntMatchShardInstance() + { + int shardCount = randomShardCountGreaterThanOne(); + int shardInstance = randomPositiveInteger(); + + when(node.getShardPropertyValue()).thenReturn(String.valueOf(shardInstance)); + + assertFalse(router.routeNode(shardCount, shardInstance + 1, node)); + } + + private void assertDataIsDistributedAccordingWithLRISRouting(Node node) + { + int [] shardIdentifiers = range(0, 15).toArray(); + int shardCount = shardIdentifiers.length; + int howManyDocumentsPerShard = 10000; + + Map nodeDistributionMap = new HashMap<>(); + + range(0, shardCount * howManyDocumentsPerShard) + .mapToLong(Long::valueOf) + .forEach(id -> + stream(shardIdentifiers) + .forEach(shardId -> { + if (router.routeNode(shardCount, shardId, node)) + { + nodeDistributionMap.merge(shardId, 1, Integer::sum); + } + })); + + assertEquals(1, nodeDistributionMap.size()); + + // Asserts that all documents have been assigned to the explicit shard ID. + assertEquals(shardCount * howManyDocumentsPerShard, nodeDistributionMap.get(node.getExplicitShardId()).intValue()); + } +} \ No newline at end of file diff --git a/search-services/alfresco-search/src/test/java/org/alfresco/solr/tracker/ExplicitIDWithLRISRouterTest.java b/search-services/alfresco-search/src/test/java/org/alfresco/solr/tracker/ExplicitIDWithLRISRouterTest.java new file mode 100644 index 000000000..2cbdd138c --- /dev/null +++ b/search-services/alfresco-search/src/test/java/org/alfresco/solr/tracker/ExplicitIDWithLRISRouterTest.java @@ -0,0 +1,180 @@ +/* + * Copyright (C) 2005-2014 Alfresco Software Limited. + * + * This file is part of Alfresco + * + * Alfresco is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Alfresco is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with Alfresco. If not, see . + */ +package org.alfresco.solr.tracker; + +import org.alfresco.repo.index.shard.ShardMethodEnum; +import org.alfresco.solr.client.Acl; +import org.alfresco.solr.client.Node; +import org.apache.commons.math3.stat.descriptive.moment.StandardDeviation; +import org.junit.Before; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.mockito.Mock; +import org.mockito.junit.MockitoJUnitRunner; + +import java.util.HashMap; +import java.util.Map; +import java.util.Properties; + +import static java.util.Arrays.stream; +import static java.util.stream.IntStream.range; +import static org.alfresco.solr.AlfrescoSolrUtils.randomPositiveInteger; +import static org.alfresco.solr.AlfrescoSolrUtils.randomShardCountGreaterThanOne; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; +import static org.mockito.Mockito.when; + +@RunWith(MockitoJUnitRunner.class) +public class ExplicitIDWithLRISRouterTest +{ + private DocRouter router; + + @Mock + private Acl acl; + + @Mock + private Node node; + + @Before + public void setUp() + { + router = DocRouterFactory.getRouter(new Properties(), ShardMethodEnum.EXPLICIT_ID); + } + + @Test + public void aclsAreReplicatedAcrossShards() + { + range(0, 100).forEach(index -> + assertTrue(router.routeAcl(randomPositiveInteger(), randomPositiveInteger(), acl))); + } + + @Test + public void shardPropertyEmpty_shouldFallBackToDBID() + { + // Don't set the DBID on the node, as the test is doing that in a loop. + // That allows to use this Node instance as a prototype + Node node = new Node(); + node.setShardPropertyValue("\n\n\n\n \t\t"); + + assertDataIsDistributedAccordingWithDBIDRouting(node); + } + + @Test + public void shardPropertyNull_shouldFallBackToDBID() + { + // Don't set the DBID on the node, as the test is doing that in a loop. + // That allows to use this Node instance as a prototype + Node node = new Node(); + node.setShardPropertyValue(null); + + assertDataIsDistributedAccordingWithDBIDRouting(node); + } + + @Test + public void shardPropertyNaN_shouldFallBackToDBID() + { + // Don't set the DBID on the node, as the test is doing that in a loop. + // That allows to use this Node instance as a prototype + Node node = new Node(); + node.setShardPropertyValue("This is not a valid Number that can be used as shard ID."); + + assertDataIsDistributedAccordingWithDBIDRouting(node); + } + + @Test + public void explicitShardMatchesShardInstance() + { + int shardCount = 2; + int firstShardInstance = 0; + int secondShardInstance = 1; + + Node prototypeNodeOnFirstShard = new Node(); + prototypeNodeOnFirstShard.setShardPropertyValue(String.valueOf(firstShardInstance)); + + Node prototypeNodeOnSecondShard = new Node(); + prototypeNodeOnSecondShard.setShardPropertyValue(String.valueOf(secondShardInstance)); + + int howManyDocumentsPerShard = 1000; + Map nodeDistributionMap = new HashMap<>(); + + range(0,2).forEach(shardId -> { + range(0, howManyDocumentsPerShard) + .forEach(index -> { + if (router.routeNode(shardCount, shardId, prototypeNodeOnFirstShard)) + { + nodeDistributionMap.merge(shardId, 1, Integer::sum); + } + + if (router.routeNode(shardCount, shardId, prototypeNodeOnSecondShard)) + { + nodeDistributionMap.merge(shardId, 1, Integer::sum); + } + }); + }); + + assertEquals(shardCount, nodeDistributionMap.size()); + assertEquals(howManyDocumentsPerShard, nodeDistributionMap.get(firstShardInstance).intValue()); + assertEquals(howManyDocumentsPerShard, nodeDistributionMap.get(secondShardInstance).intValue()); + } + + @Test + public void explicitShardDoesntMatchShardInstance() + { + int shardCount = randomShardCountGreaterThanOne(); + int shardInstance = randomPositiveInteger(); + + when(node.getShardPropertyValue()).thenReturn(String.valueOf(shardInstance)); + + assertFalse(router.routeNode(shardCount, shardInstance + 1, node)); + } + + private void assertDataIsDistributedAccordingWithDBIDRouting(Node node) + { + int [] shardIdentifiers = range(0, 15).toArray(); + int shardCount = shardIdentifiers.length; + int howManyDocumentsPerShard = 10000; + + Map nodeDistributionMap = new HashMap<>(); + + range(0, shardCount * howManyDocumentsPerShard) + .mapToLong(Long::valueOf) + .forEach(id -> { + node.setId(id); + stream(shardIdentifiers) + .forEach(shardId -> { + if (router.routeNode(shardCount, shardId, node)) + { + nodeDistributionMap.merge(shardId, 1, Integer::sum); + } + }); + }); + + StandardDeviation sd = new StandardDeviation(); + double deviation = sd.evaluate(nodeDistributionMap.values().stream().mapToDouble(Number::doubleValue).toArray()); + double norm = deviation/(howManyDocumentsPerShard) * 100; + + assertEquals(shardIdentifiers.length, nodeDistributionMap.size()); + + // Asserts the standard deviation of the distribution map is in percentage lesser than 30% + assertTrue( + nodeDistributionMap.values().toString() + ", SD = " + deviation + ", SD_NORM = " + norm + "%", + norm < 30); + } +} \ No newline at end of file diff --git a/search-services/alfresco-search/src/test/java/org/alfresco/solr/tracker/ExplicitShardIdWithDynamicPropertyRouterTest.java b/search-services/alfresco-search/src/test/java/org/alfresco/solr/tracker/ExplicitShardIdWithDynamicPropertyRouterTest.java new file mode 100644 index 000000000..21f5b6a2c --- /dev/null +++ b/search-services/alfresco-search/src/test/java/org/alfresco/solr/tracker/ExplicitShardIdWithDynamicPropertyRouterTest.java @@ -0,0 +1,150 @@ +/* + * Copyright (C) 2005-2014 Alfresco Software Limited. + * + * This file is part of Alfresco + * + * Alfresco is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Alfresco is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with Alfresco. If not, see . + */ +package org.alfresco.solr.tracker; + +import org.alfresco.solr.client.Acl; +import org.alfresco.solr.client.Node; +import org.junit.Before; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.mockito.Mock; +import org.mockito.junit.MockitoJUnitRunner; + +import static java.util.stream.IntStream.range; +import static org.alfresco.solr.AlfrescoSolrUtils.randomPositiveInteger; +import static org.alfresco.solr.AlfrescoSolrUtils.randomShardCountGreaterThanOne; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNull; +import static org.junit.Assert.assertTrue; +import static org.mockito.Mockito.when; + +@RunWith(MockitoJUnitRunner.class) +public class ExplicitShardIdWithDynamicPropertyRouterTest +{ + private ExplicitShardIdWithDynamicPropertyRouter router; + + @Mock + private Acl acl; + + @Mock + private Node node; + + @Before + public void setUp() + { + router = new ExplicitShardIdWithDynamicPropertyRouter(); + } + + @Test + public void aclsAreReplicatedAcrossShards() + { + range(0, 100).forEach(index -> + assertTrue(router.routeAcl(randomPositiveInteger(), randomPositiveInteger(), acl))); + } + + @Test + public void standaloneModeShardPropertyNaN_shouldntAcceptNode() + { + int shardCount = randomShardCountGreaterThanOne(); + int shardInstance = randomPositiveInteger(); + + when(node.getShardPropertyValue()).thenReturn("This is not a Number"); + assertFalse(router.routeNode(shardCount, shardInstance, node)); + } + + @Test + public void composableModeShardPropertyNaN_shouldntAcceptNode() + { + router = new ExplicitShardIdWithDynamicPropertyRouter(false); + + int shardCount = randomShardCountGreaterThanOne(); + int shardInstance = randomPositiveInteger(); + + when(node.getShardPropertyValue()).thenReturn("This is not a Number"); + assertNull(router.routeNode(shardCount, shardInstance, node)); + } + + @Test + public void standaloneModeShardPropertyValueIsNull_shouldntAcceptTheNode() + { + int shardCount = randomShardCountGreaterThanOne(); + int shardInstance = randomPositiveInteger(); + + when(node.getShardPropertyValue()).thenReturn(null); + + assertFalse(router.routeNode(shardCount, shardInstance, node)); + } + + @Test + public void composableModeShardPropertyValueIsNull_shouldRejectTheRequest() + { + router = new ExplicitShardIdWithDynamicPropertyRouter(false); + + int shardCount = randomShardCountGreaterThanOne(); + int shardInstance = randomPositiveInteger(); + + when(node.getShardPropertyValue()).thenReturn(null); + + assertNull(router.routeNode(shardCount, shardInstance, node)); + } + + @Test + public void standaloneModeShardPropertyValueIsEmpty_shouldntAcceptNode() + { + int shardCount = randomShardCountGreaterThanOne(); + int shardInstance = randomPositiveInteger(); + + when(node.getShardPropertyValue()).thenReturn(" \t\t\t \n\n"); + assertFalse(router.routeNode(shardCount, shardInstance, node)); + } + + @Test + public void composableModeShardPropertyValueIsEmpty_shouldRejectTheRequest() + { + router = new ExplicitShardIdWithDynamicPropertyRouter(false); + + int shardCount = randomShardCountGreaterThanOne(); + int shardInstance = randomPositiveInteger(); + + when(node.getShardPropertyValue()).thenReturn(" \t\t\t \n\n"); + assertNull(router.routeNode(shardCount, shardInstance, node)); + } + + @Test + public void explicitShardMatchesShardInstance() + { + int shardCount = randomShardCountGreaterThanOne(); + int shardInstance = randomPositiveInteger(); + + when(node.getShardPropertyValue()).thenReturn(String.valueOf(shardInstance)); + + assertTrue(router.routeNode(shardCount, shardInstance, node)); + } + + @Test + public void explicitShardDoesntMatchShardInstance() + { + int shardCount = randomShardCountGreaterThanOne(); + int shardInstance = randomPositiveInteger(); + + when(node.getShardPropertyValue()).thenReturn(String.valueOf(shardInstance)); + + assertFalse(router.routeNode(shardCount, shardInstance + 1, node)); + } +} \ No newline at end of file diff --git a/search-services/alfresco-search/src/test/java/org/alfresco/solr/tracker/ExplicitShardIdWithStaticPropertyRouterTest.java b/search-services/alfresco-search/src/test/java/org/alfresco/solr/tracker/ExplicitShardIdWithStaticPropertyRouterTest.java new file mode 100644 index 000000000..794182016 --- /dev/null +++ b/search-services/alfresco-search/src/test/java/org/alfresco/solr/tracker/ExplicitShardIdWithStaticPropertyRouterTest.java @@ -0,0 +1,106 @@ +/* + * Copyright (C) 2005-2014 Alfresco Software Limited. + * + * This file is part of Alfresco + * + * Alfresco is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Alfresco is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with Alfresco. If not, see . + */ +package org.alfresco.solr.tracker; + +import static java.util.stream.IntStream.range; +import static org.alfresco.solr.AlfrescoSolrUtils.randomPositiveInteger; +import static org.alfresco.solr.AlfrescoSolrUtils.randomShardCountGreaterThanOne; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNull; +import static org.junit.Assert.assertTrue; +import static org.mockito.Mockito.when; + +import org.alfresco.solr.client.Acl; +import org.alfresco.solr.client.Node; +import org.junit.Before; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.mockito.Mock; +import org.mockito.junit.MockitoJUnitRunner; + +@RunWith(MockitoJUnitRunner.class) +public class ExplicitShardIdWithStaticPropertyRouterTest +{ + private ExplicitShardIdWithStaticPropertyRouter router; + + @Mock + private Acl acl; + + @Mock + private Node node; + + @Before + public void setUp() + { + router = new ExplicitShardIdWithStaticPropertyRouter(); + } + + @Test + public void aclsAreReplicatedAcrossShards() + { + range(0, 100).forEach(index -> + assertTrue(router.routeAcl(randomPositiveInteger(), randomPositiveInteger(), acl))); + } + + @Test + public void standaloneModeExplicitShardIdIsNull_shouldReturnFalse() + { + int shardCount = randomShardCountGreaterThanOne(); + int shardInstance = randomPositiveInteger(); + + when(node.getExplicitShardId()).thenReturn(null); + + assertFalse(router.routeNode(shardCount, shardInstance, node)); + } + + @Test + public void composableModeExplicitShardIdIsNull_shouldReturnFalse() + { + router = new ExplicitShardIdWithStaticPropertyRouter(false); + + int shardCount = randomShardCountGreaterThanOne(); + int shardInstance = randomPositiveInteger(); + + when(node.getExplicitShardId()).thenReturn(null); + + assertNull(router.routeNode(shardCount, shardInstance, node)); + } + + @Test + public void explicitShardMatchesShardInstance() + { + int shardCount = randomShardCountGreaterThanOne(); + int shardInstance = randomPositiveInteger(); + + when(node.getExplicitShardId()).thenReturn(shardInstance); + + assertTrue(router.routeNode(shardCount, shardInstance, node)); + } + + @Test + public void explicitShardDoesntMatchShardInstance() + { + int shardCount = randomShardCountGreaterThanOne(); + int shardInstance = randomPositiveInteger(); + + when(node.getExplicitShardId()).thenReturn(shardInstance); + + assertFalse(router.routeNode(shardCount, shardInstance + 1, node)); + } +} \ No newline at end of file diff --git a/search-services/alfresco-search/src/test/java/org/alfresco/solr/tracker/PropertyRouterTest.java b/search-services/alfresco-search/src/test/java/org/alfresco/solr/tracker/PropertyRouterTest.java new file mode 100644 index 000000000..15a4f4110 --- /dev/null +++ b/search-services/alfresco-search/src/test/java/org/alfresco/solr/tracker/PropertyRouterTest.java @@ -0,0 +1,243 @@ +/* + * Copyright (C) 2005-2014 Alfresco Software Limited. + * + * This file is part of Alfresco + * + * Alfresco is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Alfresco is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with Alfresco. If not, see . + */ +package org.alfresco.solr.tracker; + +import static java.util.Arrays.stream; +import static java.util.stream.IntStream.range; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNull; +import static org.junit.Assert.assertTrue; +import static org.mockito.Mockito.reset; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.when; +import static org.alfresco.solr.AlfrescoSolrUtils.randomPositiveInteger; +import static org.alfresco.solr.AlfrescoSolrUtils.randomShardCountGreaterThanOne; + +import org.alfresco.solr.client.Acl; +import org.alfresco.solr.client.Node; +import org.apache.commons.math3.stat.descriptive.moment.StandardDeviation; +import org.junit.Before; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.mockito.Mock; +import org.mockito.junit.MockitoJUnitRunner; + +import java.util.HashMap; +import java.util.Map; + +@RunWith(MockitoJUnitRunner.class) +public class PropertyRouterTest +{ + private PropertyRouter router; + + @Mock + private Acl acl; + + @Mock + private Node node; + + @Mock + private DBIDRouter fallback; + + @Before + public void setUp() + { + router = new PropertyRouter(null); + router.fallback = fallback; + } + + @Test + public void negativeShardCount_shouldAlwaysReturnTrue() + { + // Should never happen + int negativeShardCount = -14; + + assertTrue(router.routeAcl(negativeShardCount, 1, acl)); + assertTrue(router.routeNode(negativeShardCount, 1, node)); + } + + @Test + public void zeroShardCount_shouldAlwaysReturnTrue() + { + // Should never happen + int zeroShardCount = 0; + + assertTrue(router.routeAcl(zeroShardCount, 1, acl)); + assertTrue(router.routeNode(zeroShardCount, 1, node)); + } + + @Test + public void oneShardInTheCluster_shouldAlwaysReturnTrue() + { + // Should never happen + int zeroShardCount = 0; + + assertTrue(router.routeAcl(zeroShardCount, 1, acl)); + assertTrue(router.routeNode(zeroShardCount, 1, node)); + } + + @Test + public void aclsAreReplicatedAcrossShards() + { + range(0, 100).forEach(index -> assertTrue(router.routeAcl(randomShardCountGreaterThanOne(), randomPositiveInteger(), acl))); + } + + @Test + public void ifPropertyAndPatternAreNull_shouldFallbackToDbId() + { + assertNull(router.pattern); + when(node.getShardPropertyValue()).thenReturn(null); + + int shardCount = randomShardCountGreaterThanOne(); + int shardInstance = randomPositiveInteger(); + + router.routeNode(shardCount, shardInstance, node); + + verify(fallback).routeNode(shardCount, shardInstance, node); + } + + @Test + public void propertyDoesntMatchPattern_shouldFallbackToDbId() + { + router = new PropertyRouter("([0-9]*)_"); + router.fallback = fallback; + + when(node.getShardPropertyValue()).thenReturn("This value doesn't contain any number"); + + int shardCount = randomShardCountGreaterThanOne(); + int shardInstance = randomPositiveInteger(); + + router.routeNode(shardCount, shardInstance, node); + + verify(fallback).routeNode(shardCount, shardInstance, node); + } + + @Test + public void propertyMatchesPatternButDoesntProduceAnyMatching_shouldFallbackToDbId() + { + router = new PropertyRouter("(0-9)*_.*"); + router.fallback = fallback; + + when(node.getShardPropertyValue()).thenReturn("pippo_pluto"); + + int shardCount = randomShardCountGreaterThanOne(); + int shardInstance = randomPositiveInteger(); + + router.routeNode(shardCount, shardInstance, node); + + verify(fallback).routeNode(shardCount, shardInstance, node); + } + + @Test + public void propertyIsNullAndPatternIsEmpty_shouldFallbackToDbId() + { + String [] tests = {"", " ", "\t\t\t", "\n\n"}; + stream(tests).forEach(test -> { + router = new PropertyRouter(test); + router.fallback = fallback; + + assertNull(router.pattern); + when(node.getShardPropertyValue()).thenReturn(null); + + int shardCount = randomShardCountGreaterThanOne(); + int shardInstance = randomPositiveInteger(); + + router.routeNode(shardCount, shardInstance, node); + + verify(fallback).routeNode(shardCount, shardInstance, node); + + reset(node, fallback); + }); + } + + @Test + public void onlyPatternIsNull_shouldBalanceNodesOnShardProperty() + { + int [] shardIdentifiers = range(0,15).toArray(); + int shardCount = shardIdentifiers.length; + int howManyDocumentsPerShard = 10000; + + Map nodeDistributionMap = new HashMap<>(); + + range(0, shardCount * howManyDocumentsPerShard) + .mapToLong(Long::valueOf) + .forEach(id -> { + Node node = new Node(); + node.setShardPropertyValue(String.valueOf(randomPositiveInteger())); + stream(shardIdentifiers) + .forEach(shardId -> { + if (router.routeNode(shardCount, shardId, node)) + { + nodeDistributionMap.merge(shardId, 1, Integer::sum); + } + }); + }); + + assertEquals(shardIdentifiers.length, nodeDistributionMap.size()); + StandardDeviation sd = new StandardDeviation(); + double deviation = sd.evaluate(nodeDistributionMap.values().stream().mapToDouble(Number::doubleValue).toArray()); + double norm = (deviation/howManyDocumentsPerShard) * 100; + + assertEquals(shardIdentifiers.length, nodeDistributionMap.size()); + + // Asserts the standard deviation of the distribution map is in percentage lesser than 30% + assertTrue( + nodeDistributionMap.values().toString() + ", SD = " + deviation + ", SD_NORM = " + norm + "%", + norm < 30); + + } + + @Test + public void propertyAndPatternArentNull_shouldBalanceNodesOnShardProperty() + { + int [] shardIdentifiers = range(0,15).toArray(); + int shardCount = shardIdentifiers.length; + int howManyDocumentsPerShard = 100000; + + Map nodeDistributionMap = new HashMap<>(); + + router = new PropertyRouter("([0-9]*)(_)"); + router.fallback = fallback; + + range(0, shardCount * howManyDocumentsPerShard) + .mapToLong(Long::valueOf) + .forEach(id -> { + Node node = new Node(); + node.setShardPropertyValue(randomPositiveInteger() + "_ignoreThisPart"); + stream(shardIdentifiers) + .forEach(shardId -> { + if (router.routeNode(shardCount, shardId, node)) + { + nodeDistributionMap.merge(shardId, 1, Integer::sum); + } + }); + }); + + StandardDeviation sd = new StandardDeviation(); + double deviation = sd.evaluate(nodeDistributionMap.values().stream().mapToDouble(Number::doubleValue).toArray()); + double norm = deviation/(howManyDocumentsPerShard) * 100; + + assertEquals(shardIdentifiers.length, nodeDistributionMap.size()); + + // Asserts the standard deviation of the distribution map is in percentage lesser than 30% + assertTrue( + nodeDistributionMap.values().toString() + ", SD = " + deviation + ", SD_NORM = " + norm + "%", + norm < 30); + } +} \ No newline at end of file