mirror of
https://github.com/Alfresco/SearchServices.git
synced 2025-09-10 14:11:25 +00:00
Merge branch 'master' of git.alfresco.com:search_discovery/insightengine into feature/SEARCH-1643
This commit is contained in:
33
pom.xml
Normal file
33
pom.xml
Normal file
@@ -0,0 +1,33 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
<parent>
|
||||
<groupId>org.alfresco</groupId>
|
||||
<artifactId>alfresco-super-pom</artifactId>
|
||||
<version>10</version>
|
||||
</parent>
|
||||
<artifactId>alfresco-search-and-insight-parent</artifactId>
|
||||
<version>1.4.0-SNAPSHOT</version>
|
||||
<packaging>pom</packaging>
|
||||
<name>Alfresco Search And Insight Parent</name>
|
||||
<distributionManagement>
|
||||
<repository>
|
||||
<id>alfresco-internal</id>
|
||||
<url>https://artifacts.alfresco.com/nexus/content/repositories/enterprise-releases/</url>
|
||||
</repository>
|
||||
<snapshotRepository>
|
||||
<id>alfresco-internal-snapshots</id>
|
||||
<url>https://artifacts.alfresco.com/nexus/content/repositories/enterprise-snapshots/</url>
|
||||
</snapshotRepository>
|
||||
</distributionManagement>
|
||||
<scm>
|
||||
<connection>scm:git:https://git.alfresco.com/search_discovery/insightengine.git</connection>
|
||||
<developerConnection>scm:git:https://git.alfresco.com/search_discovery/insightengine.git</developerConnection>
|
||||
<url>https://git.alfresco.com/search_discovery/insightengine.git</url>
|
||||
<tag>HEAD</tag>
|
||||
</scm>
|
||||
<modules>
|
||||
<module>search-services</module>
|
||||
<module>insight-engine</module>
|
||||
</modules>
|
||||
</project>
|
@@ -0,0 +1,27 @@
|
||||
# 7. Event Content Tracker
|
||||
|
||||
Date: 08/05/2019
|
||||
|
||||
## Status
|
||||
|
||||
In progress
|
||||
|
||||
## Context
|
||||
|
||||
The current approach of the *Content Tracker* in *Search Service* is to query SOLR for any `dirty` documents which it then fetches from Alfresco. Once the content is successfully obtained from Alfresco, it marks it `clean` which eventually get committed to the index. This approach will need to be modified as it applies pressure on Alfresco on every call to get the content. Taking an event based approach where the *Content Tracker* subscribes to a topic with policy the specific behaviour will allow to get the extracted content when ready.
|
||||
|
||||
Different alternatives have been evaluated at [Event Based Content Tracker Spike](https://github.com/Alfresco/SearchServices/blob/master/alfresco-search/doc/architecture/event-trackers/event-based-content-tracker-spike.md)
|
||||
|
||||
This proposal is to develop a new Content Tracker that consumes content based on events.
|
||||
|
||||
## Decision
|
||||
|
||||
We will use an Event Oriented Content Tracking based in Apache Kafka. This product supports recovering from a previous event, so catching up with the Repository and re-indexing operations are granted.
|
||||
|
||||
## Consequences
|
||||
|
||||
A complete PoC will be developed, including following use cases:
|
||||
|
||||
1. Indexing Content on events, including asynchronous Tranforms Service integration.
|
||||
1. Rebuilding content indexes from scratch.
|
||||
1. Recovering from a previous content indexation status.
|
@@ -0,0 +1,206 @@
|
||||
## Tracker State
|
||||
|
||||

|
||||
|
||||
### Purpose and current implementation
|
||||
The _org.alfresco.solr.TrackerState_ class represents the state of a given tracker, with several information including the state of the tracker itself and some other data the system needs to remember about the ongoing tracking process.
|
||||
The state definition within the _TrackerState_ is actually the union of all attributes needed by the all trackers during their lifecycle.
|
||||
|
||||
The following table illustrates the member instances currently defined in the _TrackerState_, together with the indication of the tracker which makes use of that specific information.
|
||||
|
||||
| Property | READ | WRITE | Notes|
|
||||
| --------------------------|:----------:|------------------------------------|:------------|
|
||||
|lastChangeSetIdOnServer|ACLTracker|ACLTracker | |
|
||||
|lastChangeSetCommitTimeOnServer|ACLTracker|ACLTracker| |
|
||||
|lastIndexedChangeSetId*|ACLTracker, MetadataTracker::getShardState|ACLTracker| |
|
||||
|lastIndexedTxCommitTime|MetadataTracker|MetadataTracker| |
|
||||
|lastIndexedTxId|MetadataTracker|MetadataTracker| |
|
||||
|lastIndexedChangeSetCommitTime*|AclTracker, MetadataTracker, AbstractTracker::continueState|AclTracker| |
|
||||
|lastTxCommitTimeOnServer|MetadataTracker|N.A.| |
|
||||
|lastTxIdOnServer|MetadataTracker|MetadataTracker| |
|
||||
|lastIndexedTxIdBeforeHoles|MetadataTracker|N.A.| |
|
||||
|lastIndexedChangeSetIdBeforeHoles|AclTracker| N.A.| |
|
||||
|running|ALL|ALL||
|
||||
|checkedFirstTransactionTime|MetadataTracker|ACLTracker, MetadataTracker| |
|
||||
|checkedFirstAclTransactionTime|ACLTracker|ACLTracker| |
|
||||
|checkedLastAclTransactionTime|ACLTracker|ACLTracker| |
|
||||
|checkedLastTransactionTime|MetadataTracker|MetadataTracker| |
|
||||
|check|AclTrackerTest|AbstractTracker, CoreAdminHandler::actionCHECK| It seems used only for ACL test purposes, but it is set on the superclass on all trackers|
|
||||
|trackerCycles|ALL |ALL | |
|
||||
|timeToStopIndexing|AclTracker, MetadataTracker|MetadataTrackerTest, SolrInformationServer::getTrackerInitialState |Since it is set in the initial tracker state, it is done for all trackers, but actually it is never set again so the MetadataTracker and AclTracker always use the initial value. |
|
||||
|lastGoodChangeSetCommitTimeInIndex|AclTracker|AclTracker, SolrInformationServer::getTrackerInitialState, SolrInformatonServer::continueState|SolrInformatioServer::continueState is called on every tracker. However, only the AclTracker is using this information. |
|
||||
|lastGoodTxCommitTimeInIndex|MetadataTracker|MetadataTracker, SolrInformationServer::getTrackerInitialState, SolrInformatonServer::continueState| See above|
|
||||
|timeBeforeWhichThereCanBeNoHoles|N.A.| SolrInformationServer::getTrackerInitialState, SolrInformatonServer::continueState | This is set for all trackers but never read.|
|
||||
|lastStartTime| ALL|ALL ||
|
||||
|
||||
_* as part of the MNT-20179, which is being implemented at time of writing, the MetadataTracker won't access anymore to this attribute_
|
||||
|
||||
From the table above we can see there are some intersections between attributes and trackers; specifically we have three different scenarios:
|
||||
|
||||
- n writers, 1 reader (e.g. lastGoodChangeSetCommitTimeInIndex)
|
||||
- n readers, 1 writer (e.g. lastIndexedChangeSetCommitTime, lastIndexedChangeSetId)
|
||||
- n readers, n writers (timeToStopIndexing)
|
||||
|
||||
These scenarios suggest the "unified" approach used within the _TrackerState_ could be improved. Specifically:
|
||||
|
||||
- n writers, 1 reader: dangerous, we need to clarify if the usage of the same property is disjoint or the two writers are supposed to insist on the same property instance/value.
|
||||
- n readers, 1 writer: this could be ok, because only one tracker is mutating the property value.
|
||||
- n readers, n writers: this is the worst scenario; however, it seems only one property (timeToStopIndexing) falls here so the context should be easy to understand and address.
|
||||
|
||||
### Thoughts about the current implementation
|
||||
The current implementation of the _TrackerState_ contains a "mix" of information that can be roughly classified in two categories:
|
||||
|
||||
- things that below to the "stateful" context of the tracker lifecycle (e.g. running, checked, trackerCycles): as a side note, in a refactoring context we need to consider that each tracker instance has also some attribute which belongs to this category but it's not part of the _TrackerState_ (e.g. rollback)
|
||||
- things we want to remember across the tracker cycles (e.g. lastIndexedTxId, checkedFirstTransactionTime)
|
||||
|
||||
The first category is tied to a tracker instance and therefore can be considered part, strictly speaking, of the tracker state.
|
||||
The second category instead, seems to be something shared across the trackers; that is, a place where each tracker (actually only the _MetadataTracker_ and the _AclTracker_ plays a role in this context) can record some information during a tracking cycle.
|
||||
|
||||
The _TrackerState_ creation and initialisation is done in _SolrInformationServer_ (a singleton within each _SolrCore_); at the end of the subsystem initialisation, each tracker has an its own _TrackerState_ instance. Looking at the two points above,
|
||||
that dedicated instance sounds reasonable for the first category (the stateful context of a tracker) while it doesn't make so much sense for the second one. This because each tracker owns a private, decoupled instance of the tracker state but:
|
||||
|
||||
- in the best case it will use only a disjoint and partial portion of that class
|
||||
- in the worst case it will never use that
|
||||
|
||||
As consequence of that, a proper design should divide those two responsibilities, by providing a private copy of the state and a shared instance composed by things we need to remember across tracker cycles.
|
||||
This approach is the main driver of the proposal described in the next section.
|
||||
|
||||
### Proposal
|
||||
As explained above, the proposal would divide the current tracker state in two separate set of attributes.
|
||||
|
||||
#### Tracker State
|
||||
This set includes things that are strictly related with the current state of a tracker (e.g. running, isInRollback, isInMaintenance). These properties are specific for each tracker, they could be expressed in two ways:
|
||||
|
||||
- as instance properties of each tracker (in _AbstractTracker_ or in the concete tracker class)
|
||||
- using a State Pattern [1], in order to formalize the behaviour that a given tracker should have when it is in a specific state.
|
||||
|
||||
Using the first approach, variables like "running", "check", "trackerCycles" should be moved and managed directly or indirectly (_AbstractTracker_) in the tracker class. Each tracker instance must own a private copy of that variable because it can differ between trackers (e.g. _MetadataTracker_ could be running while _AclTracker_ couldn't)
|
||||
|
||||
```Java
|
||||
|
||||
public abstract class AbstractTracker {
|
||||
|
||||
private boolean running;
|
||||
|
||||
...
|
||||
|
||||
|
||||
public boolean isRunning() {
|
||||
return running;
|
||||
}
|
||||
|
||||
public void doTrack() {
|
||||
|
||||
if (isRunning) {
|
||||
// Do something
|
||||
} else {
|
||||
// Dom something else
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
```
|
||||
|
||||
The second approach would transform each tracker in a Finite State Machine (FSM) [2]. Since at the moment the different behaviour is spread across the code with a relevant amount of conditional statements, this approach requires more analysis and implementation effort.
|
||||
The concrete Tracker (or the _AbstractTracker_) would act as a controller which would orchestrate and move between available states, in order to execute the tracking logic.
|
||||
|
||||
#### Tracker Memory/Memento
|
||||
This set of attributes includes all things we need to remember across tracker cycles. The term "Memento" is taken from the popular GoF Design Pattern [3]
|
||||
but in this context the usage is slightly different because here we aren't interested in retain a captured state for doing things like undo/redo operations: the purpose is to persist a set of information between one tracking cycle and another.
|
||||
|
||||
Under this perspective, the Memento/Memory should act as a singleton across all trackers: that is, there should be only one instance for each _SolrCore_. This class will contain all information copied from the _TrackerState_ related with ACLs and Transactions:
|
||||
|
||||
```
|
||||
public class TrackersMemento {
|
||||
|
||||
// ACLs and Transactions are two private inner classes used just for classifying things within
|
||||
// this class. They are not exposed and the caller sees only the TrackersMemory public interface (getters and
|
||||
// setters)
|
||||
|
||||
private class ACLs {
|
||||
private long lastChangeSetIdOnServer;
|
||||
private long lastChangeSetCommitTimeOnServer;
|
||||
private long lastIndexedChangeSetId;
|
||||
private long lastIndexedChangeSetCommitTime;
|
||||
private long lastIndexedChangeSetIdBeforeHoles = -1;
|
||||
|
||||
private volatile boolean checkedFirstAclTransactionTime;
|
||||
private volatile boolean checkedLastAclTransactionTime;
|
||||
|
||||
private long lastGoodChangeSetCommitTimeInIndex;
|
||||
}
|
||||
|
||||
private class Transactions {
|
||||
private long lastIndexedTxCommitTime;
|
||||
private long lastIndexedTxId;
|
||||
private long lastTxCommitTimeOnServer;
|
||||
private long lastTxIdOnServer;
|
||||
private long lastIndexedTxIdBeforeHoles = -1;
|
||||
|
||||
private boolean checkedFirstTransactionTime;
|
||||
|
||||
private boolean checkedLastTransactionTime;
|
||||
|
||||
private long lastGoodTxCommitTimeInIndex;
|
||||
}
|
||||
|
||||
private final ACLs acls = new ACLs();
|
||||
private final Transactions transactions = new Transactions();
|
||||
|
||||
// getter and setter at hosting instance level
|
||||
|
||||
```
|
||||
|
||||
With this class, the tracker subsystem will expose a centralized view of things that need to be recorded. This is the first important difference:
|
||||
in the current implementation each tracker has a copy of (for example) "lastIndexedTxId" regardless if it uses it or not.
|
||||
|
||||
Once we got the centralization, a proper design would enforce/formalize the read/write access to that properties. The purpose is to communicate in a formal way
|
||||
|
||||
- who is allowed to write what
|
||||
- who is allowed to read what
|
||||
|
||||
So, looking at the table at the beginning of this document, after removing things belonging to the tracker state (section above),
|
||||
we can group the remaining properties using two categories (ACLs and Transactions) which can be furtherly split from the access perspective (Read and/or Write).
|
||||
|
||||
So the idea is to create four interfaces, all implemented by the _TrackersMemento_ which would enable
|
||||
|
||||
- ACL properties read access (e.g. _ReadableOnlyAclsMemento_) which will contains all getters related with the _TrackersMemento::ACLs_ properties
|
||||
- ACL properties write (and read) access (e.g. _AclsMemento_ extends _ReadableOnlyAclsMemento_) which will contains all setters related with the _TrackersMemento::ACLs_ properties
|
||||
- Transaction properties read access (e.g. _ReadableOnlyTransactionsMemento_) which will contains all getters related with the _TrackersMemento::Transations_ properties
|
||||
- Transaction properties write (and read) access (e.g. _TransactionsMemento_ extends _ReadableOnlyTransactionsMemento_) which will contains all setters related with the _TrackersMemento::Transactions_ properties
|
||||
|
||||
The _TrackersMemento_ would implement all four interfaces:
|
||||
|
||||
```
|
||||
|
||||
// ReadableOnly intefaces are automatically inherited
|
||||
|
||||
public class TrackersMemento implements AclsMemento, TransactionsMemento {
|
||||
```
|
||||
|
||||

|
||||
|
||||
Using the class hierarchy above, we could improve the formalisation of the different kind of access each tracker would require. Specifically:
|
||||
|
||||
- the memento instance could be available or not in each tracker. That probably means it doesn't make sense to have it available at the _AbstractTracker_ level.
|
||||
- a tracker or a component, depending on its requirements and behaviour, could have a reference to a readable and/or writable (ACLs and/or Transactions) memento.
|
||||
|
||||
For example:
|
||||
|
||||
- _ModelTracker_ won't have any reference because it doesn't need it
|
||||
- _ACLTracker_ would see an _AclsMemento_ with full RW access but a Read-Only view of the _TransactionsMemento_
|
||||
- _CoreAdminHandler_ would have a Read-Only view of both ACLs and Transations memento
|
||||
- _SolrInformationServer_ would a a full access to the _TrackersMemento_, because it create and initialise it
|
||||
|
||||
### Single or Multi Core
|
||||
Being a Singleton, the _TrackerMemento_ could be also designed for storing/serving data belonging to more than one core.
|
||||
This could be useful if the same Solr instance hosts multiple core (e.g. alfresco, archive). In that case the access interfaces and the access pattern would be slightly different
|
||||
because it should be implemented for taking in account also the core name as input parameter.
|
||||
|
||||
The resulting implementation should provide a 1-n cardinality relationship (by means of a dictionary, for example) between the memento instance and the internal stored sets of attributes belonging to different cores.
|
||||
|
||||
***
|
||||
[1] [State Pattern](https://en.wikipedia.org/wiki/State_pattern)
|
||||
[2] [Finite State Machine](https://en.wikipedia.org/wiki/Finite-state_machine)
|
||||
[3] [Memento](https://en.wikipedia.org/wiki/Memento_pattern)
|
Binary file not shown.
After Width: | Height: | Size: 304 KiB |
@@ -84,6 +84,12 @@
|
||||
<version>1.7.26</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>jaxen</groupId>
|
||||
<artifactId>jaxen</artifactId>
|
||||
<version>1.1.6</version>
|
||||
</dependency>
|
||||
|
||||
<!-- Test dependencies -->
|
||||
<dependency>
|
||||
<groupId>junit</groupId>
|
||||
@@ -108,7 +114,7 @@
|
||||
<dependency>
|
||||
<groupId>com.carrotsearch.randomizedtesting</groupId>
|
||||
<artifactId>randomizedtesting-runner</artifactId>
|
||||
<version>2.7.2</version>
|
||||
<version>2.7.3</version>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
|
@@ -24,7 +24,6 @@ import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.LinkedHashSet;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Properties;
|
||||
import java.util.concurrent.ConcurrentLinkedQueue;
|
||||
|
||||
@@ -39,6 +38,7 @@ import org.alfresco.repo.search.impl.QueryParserUtils;
|
||||
import org.alfresco.service.cmr.dictionary.DictionaryService;
|
||||
import org.alfresco.service.cmr.dictionary.PropertyDefinition;
|
||||
import org.alfresco.service.namespace.QName;
|
||||
import org.alfresco.solr.AlfrescoCoreAdminHandler;
|
||||
import org.alfresco.solr.AlfrescoSolrDataModel;
|
||||
import org.alfresco.solr.BoundedDeque;
|
||||
import org.alfresco.solr.InformationServer;
|
||||
@@ -56,6 +56,8 @@ import org.json.JSONException;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import static java.util.Optional.of;
|
||||
|
||||
/*
|
||||
* This tracks two things: transactions and metadata nodes
|
||||
* @author Ahmed Owianå
|
||||
@@ -194,16 +196,36 @@ public class MetadataTracker extends AbstractTracker implements Tracker
|
||||
}
|
||||
|
||||
/**
|
||||
* @return
|
||||
* The {@link ShardState}, as the name suggests, encapsulates/stores the state of the shard which hosts this
|
||||
* {@link MetadataTracker} instance.
|
||||
*
|
||||
* The {@link ShardState} is primarily used in two places:
|
||||
*
|
||||
* <ul>
|
||||
* <li>Transaction tracking: (see {@link #trackTransactions()}): for pulling/tracking transactions from Alfresco</li>
|
||||
* <li>
|
||||
* DynamicSharding: when the {@link MetadataTracker} is running on a slave instance it doesn't actually act
|
||||
* as a tracker, it calls Alfresco to register the state of the node (the shard) without pulling any transactions.
|
||||
* As consequence of that, Alfresco will be aware about the shard which will be included in subsequent queries.
|
||||
* </li>
|
||||
* </ul>
|
||||
*
|
||||
* @return the {@link ShardState} instance which stores the current state of the hosting shard.
|
||||
*/
|
||||
private ShardState getShardState()
|
||||
ShardState getShardState()
|
||||
{
|
||||
TrackerState state = super.getTrackerState();
|
||||
TrackerState transactionsTrackerState = super.getTrackerState();
|
||||
TrackerState changeSetsTrackerState =
|
||||
of(infoSrv.getAdminHandler())
|
||||
.map(AlfrescoCoreAdminHandler::getTrackerRegistry)
|
||||
.map(registry -> registry.getTrackerForCore(coreName, AclTracker.class))
|
||||
.map(Tracker::getTrackerState)
|
||||
.orElse(transactionsTrackerState);
|
||||
|
||||
HashMap<String, String> propertyBag = new HashMap<>();
|
||||
propertyBag.put("coreName", coreName);
|
||||
|
||||
ShardState shardstate = ShardStateBuilder.shardState()
|
||||
|
||||
return ShardStateBuilder.shardState()
|
||||
.withMaster(isMaster)
|
||||
.withLastUpdated(System.currentTimeMillis())
|
||||
.withLastIndexedChangeSetCommitTime(state.getLastIndexedChangeSetCommitTime())
|
||||
@@ -227,7 +249,6 @@ public class MetadataTracker extends AbstractTracker implements Tracker
|
||||
.endShard()
|
||||
.endShardInstance()
|
||||
.build();
|
||||
return shardstate;
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -343,7 +364,6 @@ public class MetadataTracker extends AbstractTracker implements Tracker
|
||||
gnp.setStoreProtocol(storeRef.getProtocol());
|
||||
gnp.setStoreIdentifier(storeRef.getIdentifier());
|
||||
gnp.setShardProperty(shardProperty);
|
||||
gnp.setCoreName(coreName);
|
||||
|
||||
List<Node> nodes = client.getNodes(gnp, (int) info.getUpdates());
|
||||
for (Node node : nodes)
|
||||
@@ -862,7 +882,6 @@ public class MetadataTracker extends AbstractTracker implements Tracker
|
||||
gnp.setStoreProtocol(storeRef.getProtocol());
|
||||
gnp.setStoreIdentifier(storeRef.getIdentifier());
|
||||
gnp.setShardProperty(shardProperty);
|
||||
gnp.setCoreName(coreName);
|
||||
List<Node> nodes = client.getNodes(gnp, Integer.MAX_VALUE);
|
||||
|
||||
ArrayList<Node> nodeBatch = new ArrayList<>();
|
||||
|
@@ -1,89 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2005-2017 Alfresco Software Limited.
|
||||
*
|
||||
* This file is part of Alfresco
|
||||
*
|
||||
* Alfresco is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Lesser General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* Alfresco is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package org.alfresco.dataload;
|
||||
|
||||
import org.apache.solr.client.solrj.impl.HttpSolrClient;
|
||||
import org.apache.solr.client.solrj.request.UpdateRequest;
|
||||
import org.apache.solr.common.SolrInputDocument;
|
||||
|
||||
import java.time.LocalDateTime;
|
||||
import java.time.ZoneOffset;
|
||||
import java.time.format.DateTimeFormatter;
|
||||
import java.util.Random;
|
||||
|
||||
public class Loader {
|
||||
|
||||
public static void main(String args[]) throws Exception {
|
||||
int num = Integer.parseInt(args[0]);
|
||||
int start = Integer.parseInt(args[1]);
|
||||
//String url = "http://localhost:8985/solr/joel";
|
||||
String url = "http://localhost:8983/solr/collection1";
|
||||
|
||||
HttpSolrClient client = new HttpSolrClient(url);
|
||||
UpdateRequest request = new UpdateRequest();
|
||||
int i = start;
|
||||
LocalDateTime localDate = LocalDateTime.now();
|
||||
|
||||
Random rand = new Random();
|
||||
for(i=start; i<num+start; i++) {
|
||||
String s = rand.nextInt(1000)+"helloworld123";
|
||||
SolrInputDocument doc = new SolrInputDocument();
|
||||
doc.addField("id", i);
|
||||
doc.addField("test_s", s);
|
||||
doc.addField("test_t", "hello world we love you");
|
||||
int year = rand.nextInt(50);
|
||||
int month = rand.nextInt(12);
|
||||
int day = rand.nextInt(30);
|
||||
float f = rand.nextFloat();
|
||||
|
||||
doc.addField("year_i", Integer.toString(year));
|
||||
doc.addField("month_i", Integer.toString(month));
|
||||
doc.addField("day_i", Integer.toString(day));
|
||||
doc.addField("price_f", Float.toString(f));
|
||||
|
||||
LocalDateTime randomDate = localDate.plusDays(rand.nextInt(1000));
|
||||
doc.addField("date_dt", DateTimeFormatter.ISO_INSTANT.format(randomDate.toInstant(ZoneOffset.UTC)));
|
||||
doc.addField("epoch_l", randomDate.atZone(ZoneOffset.UTC).toInstant().toEpochMilli());
|
||||
|
||||
request.add(doc);
|
||||
if(i % 5000 == 0) {
|
||||
request.process(client);
|
||||
client.commit();
|
||||
request = new UpdateRequest();
|
||||
}
|
||||
|
||||
|
||||
for(int l=0; l<5; l++) {
|
||||
String ps = "product"+rand.nextInt(35);
|
||||
doc.addField("prod_ss",ps);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
if((i % 5000) != 0) {
|
||||
request.process(client);
|
||||
client.commit();
|
||||
}
|
||||
|
||||
client.close();
|
||||
|
||||
}
|
||||
|
||||
|
||||
}
|
@@ -18,6 +18,8 @@
|
||||
*/
|
||||
package org.alfresco.solr;
|
||||
|
||||
import static java.util.Optional.of;
|
||||
import static java.util.Optional.ofNullable;
|
||||
import static junit.framework.TestCase.assertNotNull;
|
||||
import static junit.framework.TestCase.assertTrue;
|
||||
import static org.alfresco.repo.search.adaptor.lucene.QueryConstants.FIELD_ACLID;
|
||||
@@ -68,6 +70,7 @@ import java.util.concurrent.TimeUnit;
|
||||
import java.util.concurrent.atomic.AtomicLong;
|
||||
|
||||
import org.alfresco.model.ContentModel;
|
||||
import org.alfresco.repo.index.shard.ShardState;
|
||||
import org.alfresco.repo.tenant.TenantService;
|
||||
import org.alfresco.service.cmr.repository.ChildAssociationRef;
|
||||
import org.alfresco.service.cmr.repository.NodeRef;
|
||||
@@ -86,11 +89,11 @@ import org.alfresco.solr.client.StringPropertyValue;
|
||||
import org.alfresco.solr.client.Transaction;
|
||||
import org.alfresco.util.ISO9075;
|
||||
import org.alfresco.util.Pair;
|
||||
import org.apache.solr.SolrTestCaseJ4;
|
||||
import org.apache.solr.SolrTestCaseJ4.XmlDoc;
|
||||
import org.apache.solr.common.SolrInputDocument;
|
||||
import org.apache.solr.common.params.CoreAdminParams;
|
||||
import org.apache.solr.common.params.ModifiableSolrParams;
|
||||
import org.apache.solr.common.params.SolrParams;
|
||||
import org.apache.solr.common.util.NamedList;
|
||||
import org.apache.solr.common.util.XML;
|
||||
import org.apache.solr.core.CoreContainer;
|
||||
@@ -108,6 +111,8 @@ import org.apache.solr.update.CommitUpdateCommand;
|
||||
public class AlfrescoSolrUtils
|
||||
{
|
||||
public static final String TEST_NAMESPACE = "http://www.alfresco.org/test/solrtest";
|
||||
public static long MAX_WAIT_TIME = 80000;
|
||||
|
||||
/**
|
||||
* Get transaction.
|
||||
* @param deletes
|
||||
@@ -884,5 +889,40 @@ public class AlfrescoSolrUtils
|
||||
assertTrue("There must be a searcher for "+coreName, ((Integer)coreSummary.get("Number of Searchers")) > 0);
|
||||
}
|
||||
|
||||
/**
|
||||
* Asserts that the input {@link ShardState} and the CoreAdmin.SUMMARY response give the same information.
|
||||
*
|
||||
* @param state the {@link ShardState} instance.
|
||||
* @param core the target {@link SolrCore} instance.
|
||||
*/
|
||||
public static void assertShardAndCoreSummaryConsistency(ShardState state, SolrCore core) {
|
||||
SolrParams params =
|
||||
new ModifiableSolrParams()
|
||||
.add(CoreAdminParams.CORE, core.getName())
|
||||
.add(CoreAdminParams.ACTION, "SUMMARY");
|
||||
|
||||
}
|
||||
SolrQueryRequest request = new LocalSolrQueryRequest(core, params);
|
||||
SolrQueryResponse response = new SolrQueryResponse();
|
||||
coreAdminHandler(core).handleRequest(request, response);
|
||||
|
||||
NamedList<?> summary =
|
||||
ofNullable(response.getValues())
|
||||
.map(values -> values.get("Summary"))
|
||||
.map(NamedList.class::cast)
|
||||
.map(values -> values.get(core.getName()))
|
||||
.map(NamedList.class::cast)
|
||||
.orElseGet(NamedList::new);
|
||||
|
||||
assertEquals(state.getLastIndexedChangeSetId(), summary.get("Id for last Change Set in index"));
|
||||
assertEquals(state.getLastIndexedChangeSetCommitTime(), summary.get("Last Index Change Set Commit Time"));
|
||||
assertEquals(state.getLastIndexedTxCommitTime(), summary.get("Last Index TX Commit Time"));
|
||||
assertEquals(state.getLastIndexedTxId(), summary.get("Id for last TX in index"));
|
||||
}
|
||||
|
||||
public static AlfrescoCoreAdminHandler coreAdminHandler(SolrCore core) {
|
||||
return of(core).map(SolrCore::getCoreContainer)
|
||||
.map(CoreContainer::getMultiCoreHandler)
|
||||
.map(AlfrescoCoreAdminHandler.class::cast)
|
||||
.orElseThrow(() -> new IllegalStateException("Cannot retrieve the Core Admin Handler on this test core."));
|
||||
}
|
||||
}
|
@@ -16,10 +16,11 @@
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package org.alfresco.solr.query.afts;
|
||||
package org.alfresco.solr.dataload;
|
||||
|
||||
import static java.util.Arrays.asList;
|
||||
import static java.util.Collections.singletonList;
|
||||
import static java.util.stream.IntStream.range;
|
||||
import static org.alfresco.model.ContentModel.ASSOC_CONTAINS;
|
||||
import static org.alfresco.model.ContentModel.ASSOC_CHILDREN;
|
||||
import static org.alfresco.model.ContentModel.PROP_NAME;
|
||||
@@ -28,6 +29,8 @@ import static org.alfresco.service.namespace.NamespaceService.CONTENT_MODEL_1_0_
|
||||
import static org.alfresco.solr.AlfrescoSolrUtils.addNode;
|
||||
import static org.alfresco.solr.AlfrescoSolrUtils.createGUID;
|
||||
import static org.alfresco.solr.AlfrescoSolrUtils.addStoreRoot;
|
||||
import static org.alfresco.solr.AlfrescoSolrUtils.getNode;
|
||||
import static org.alfresco.solr.AlfrescoSolrUtils.getNodeMetaData;
|
||||
|
||||
import org.alfresco.model.ContentModel;
|
||||
import org.alfresco.service.cmr.repository.ChildAssociationRef;
|
||||
@@ -39,19 +42,27 @@ import org.alfresco.service.cmr.repository.datatype.Duration;
|
||||
import org.alfresco.service.namespace.QName;
|
||||
import org.alfresco.solr.AlfrescoSolrConstants;
|
||||
import org.alfresco.solr.AlfrescoSolrDataModel;
|
||||
import org.alfresco.solr.client.Acl;
|
||||
import org.alfresco.solr.client.ContentPropertyValue;
|
||||
import org.alfresco.solr.client.MLTextPropertyValue;
|
||||
import org.alfresco.solr.client.MultiPropertyValue;
|
||||
import org.alfresco.solr.client.Node;
|
||||
import org.alfresco.solr.client.NodeMetaData;
|
||||
import org.alfresco.solr.client.PropertyValue;
|
||||
import org.alfresco.solr.client.StringPropertyValue;
|
||||
import org.alfresco.solr.client.Transaction;
|
||||
import org.alfresco.util.ISO9075;
|
||||
import org.apache.solr.client.solrj.io.Tuple;
|
||||
import org.apache.solr.core.SolrCore;
|
||||
import org.apache.solr.util.TestHarness;
|
||||
|
||||
import java.util.AbstractMap;
|
||||
import java.util.Date;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
import java.util.Map;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
/**
|
||||
* Test datasets provider.
|
||||
@@ -855,6 +866,14 @@ public class TestDataProvider implements AlfrescoSolrConstants
|
||||
return testProperties;
|
||||
}
|
||||
|
||||
public static Map.Entry<List<Node>, List<NodeMetaData>> nSampleNodesWithSampleContent(Acl acl, Transaction txn, int howManyNodes) {
|
||||
|
||||
List<Node> nodes = range(0, howManyNodes).mapToObj(index -> getNode(txn, acl, Node.SolrApiNodeStatus.UPDATED)).collect(Collectors.toList());
|
||||
List<NodeMetaData> metadata = nodes.stream().map(node -> getNodeMetaData(node, txn, acl, "mike", null, false)).collect(Collectors.toList());
|
||||
|
||||
return new AbstractMap.SimpleImmutableEntry<>(nodes, metadata);
|
||||
}
|
||||
|
||||
public NodeRef newNodeRef()
|
||||
{
|
||||
return new NodeRef(new StoreRef("workspace", "SpacesStore"), createGUID());
|
@@ -19,7 +19,7 @@
|
||||
package org.alfresco.solr.query.afts.qparser;
|
||||
|
||||
import org.alfresco.repo.search.adaptor.lucene.QueryConstants;
|
||||
import org.alfresco.solr.query.afts.TestDataProvider;
|
||||
import org.alfresco.solr.dataload.TestDataProvider;
|
||||
import org.alfresco.util.ISO9075;
|
||||
import org.junit.BeforeClass;
|
||||
import org.junit.Test;
|
||||
|
@@ -30,7 +30,7 @@ import static org.alfresco.model.ContentModel.TYPE_THUMBNAIL;
|
||||
|
||||
import org.alfresco.repo.search.adaptor.lucene.QueryConstants;
|
||||
import org.alfresco.service.namespace.QName;
|
||||
import org.alfresco.solr.query.afts.TestDataProvider;
|
||||
import org.alfresco.solr.dataload.TestDataProvider;
|
||||
import org.alfresco.util.CachingDateFormat;
|
||||
import org.apache.solr.SolrTestCaseJ4;
|
||||
import org.junit.BeforeClass;
|
||||
|
@@ -22,7 +22,7 @@ import org.alfresco.service.cmr.repository.NodeRef;
|
||||
import org.alfresco.service.namespace.QName;
|
||||
import org.alfresco.solr.client.PropertyValue;
|
||||
import org.alfresco.solr.client.StringPropertyValue;
|
||||
import org.alfresco.solr.query.afts.TestDataProvider;
|
||||
import org.alfresco.solr.dataload.TestDataProvider;
|
||||
import org.junit.BeforeClass;
|
||||
import org.junit.Test;
|
||||
|
||||
|
@@ -30,7 +30,7 @@ import org.alfresco.service.cmr.repository.NodeRef;
|
||||
import org.alfresco.service.namespace.QName;
|
||||
import org.alfresco.solr.client.PropertyValue;
|
||||
import org.alfresco.solr.client.StringPropertyValue;
|
||||
import org.alfresco.solr.query.afts.TestDataProvider;
|
||||
import org.alfresco.solr.dataload.TestDataProvider;
|
||||
import org.junit.BeforeClass;
|
||||
import org.junit.Test;
|
||||
|
||||
|
@@ -27,7 +27,7 @@ import org.alfresco.service.cmr.repository.NodeRef;
|
||||
import org.alfresco.service.namespace.QName;
|
||||
import org.alfresco.solr.client.PropertyValue;
|
||||
import org.alfresco.solr.client.StringPropertyValue;
|
||||
import org.alfresco.solr.query.afts.TestDataProvider;
|
||||
import org.alfresco.solr.dataload.TestDataProvider;
|
||||
import org.junit.BeforeClass;
|
||||
import org.junit.Test;
|
||||
|
||||
|
@@ -28,7 +28,7 @@ import org.alfresco.service.cmr.repository.NodeRef;
|
||||
import org.alfresco.service.namespace.QName;
|
||||
import org.alfresco.solr.client.PropertyValue;
|
||||
import org.alfresco.solr.client.StringPropertyValue;
|
||||
import org.alfresco.solr.query.afts.TestDataProvider;
|
||||
import org.alfresco.solr.dataload.TestDataProvider;
|
||||
import org.alfresco.util.Pair;
|
||||
import org.junit.BeforeClass;
|
||||
import org.junit.Test;
|
||||
|
@@ -10,7 +10,7 @@ import org.alfresco.repo.search.adaptor.lucene.QueryConstants;
|
||||
import org.alfresco.service.namespace.QName;
|
||||
import org.alfresco.solr.AlfrescoSolrDataModel;
|
||||
import org.alfresco.solr.SolrInformationServer;
|
||||
import org.alfresco.solr.query.afts.TestDataProvider;
|
||||
import org.alfresco.solr.dataload.TestDataProvider;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.solr.SolrTestCaseJ4;
|
||||
import org.junit.BeforeClass;
|
||||
|
@@ -1,6 +1,6 @@
|
||||
package org.alfresco.solr.query.afts.requestHandler;
|
||||
|
||||
import org.alfresco.solr.query.afts.TestDataProvider;
|
||||
import org.alfresco.solr.dataload.TestDataProvider;
|
||||
import org.junit.BeforeClass;
|
||||
import org.junit.Test;
|
||||
|
||||
|
@@ -0,0 +1,197 @@
|
||||
/*
|
||||
* Copyright (C) 2005-2019 Alfresco Software Limited.
|
||||
*
|
||||
* This file is part of Alfresco
|
||||
*
|
||||
* Alfresco is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Lesser General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* Alfresco is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
package org.alfresco.solr.tracker;
|
||||
|
||||
import static java.util.Arrays.asList;
|
||||
import static java.util.Collections.singletonList;
|
||||
import static java.util.Optional.of;
|
||||
import static java.util.stream.IntStream.range;
|
||||
import static org.alfresco.solr.AlfrescoSolrUtils.MAX_WAIT_TIME;
|
||||
import static org.alfresco.solr.AlfrescoSolrUtils.assertShardAndCoreSummaryConsistency;
|
||||
import static org.alfresco.solr.AlfrescoSolrUtils.coreAdminHandler;
|
||||
import static org.alfresco.solr.AlfrescoSolrUtils.getAcl;
|
||||
import static org.alfresco.solr.AlfrescoSolrUtils.getAclChangeSet;
|
||||
import static org.alfresco.solr.AlfrescoSolrUtils.getAclReaders;
|
||||
import static org.alfresco.solr.AlfrescoSolrUtils.getTransaction;
|
||||
import static org.alfresco.solr.AlfrescoSolrUtils.indexAclChangeSet;
|
||||
import static org.junit.Assert.assertEquals;
|
||||
import static org.junit.Assert.assertNotEquals;
|
||||
|
||||
import org.alfresco.repo.index.shard.ShardState;
|
||||
import org.alfresco.repo.search.adaptor.lucene.QueryConstants;
|
||||
import org.alfresco.solr.AbstractAlfrescoSolrTests;
|
||||
import org.alfresco.solr.AlfrescoCoreAdminHandler;
|
||||
import org.alfresco.solr.client.Acl;
|
||||
import org.alfresco.solr.client.AclChangeSet;
|
||||
import org.alfresco.solr.client.AclReaders;
|
||||
import org.alfresco.solr.client.Node;
|
||||
import org.alfresco.solr.client.NodeMetaData;
|
||||
import org.alfresco.solr.client.SOLRAPIQueueClient;
|
||||
import org.alfresco.solr.client.Transaction;
|
||||
import org.alfresco.solr.dataload.TestDataProvider;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.search.BooleanClause;
|
||||
import org.apache.lucene.search.BooleanQuery;
|
||||
import org.apache.lucene.search.LegacyNumericRangeQuery;
|
||||
import org.apache.lucene.search.TermQuery;
|
||||
import org.apache.solr.core.SolrCore;
|
||||
import org.junit.After;
|
||||
import org.junit.Before;
|
||||
import org.junit.BeforeClass;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
/**
|
||||
* A partial state of {@link org.alfresco.solr.TrackerState} is exposed through two interfaces: AdminHandler.SUMMARY and
|
||||
* {@link MetadataTracker#getShardState}.
|
||||
*
|
||||
* This test makes sure that state is consistent across the two mentioned approaches. That is, properties returned by the
|
||||
* Core SUMMARY must have the same value of the same properties in the ShardState.
|
||||
*
|
||||
* @author agazzarini
|
||||
*/
|
||||
public class AlfrescoSolrTrackerStateTest extends AbstractAlfrescoSolrTests
|
||||
{
|
||||
@BeforeClass
|
||||
public static void beforeClass() throws Exception
|
||||
{
|
||||
initAlfrescoCore("schema.xml");
|
||||
}
|
||||
|
||||
@After
|
||||
public void clearQueue() throws Exception {
|
||||
SOLRAPIQueueClient.nodeMetaDataMap.clear();
|
||||
SOLRAPIQueueClient.transactionQueue.clear();
|
||||
SOLRAPIQueueClient.aclChangeSetQueue.clear();
|
||||
SOLRAPIQueueClient.aclReadersMap.clear();
|
||||
SOLRAPIQueueClient.aclMap.clear();
|
||||
SOLRAPIQueueClient.nodeMap.clear();
|
||||
}
|
||||
|
||||
@Before
|
||||
public void indexTestAclTransactionAndNodes() throws Exception
|
||||
{
|
||||
Acl acl = createAndIndexSomeAclData();
|
||||
createAndIndexTransactionWithSomeNodes(4, acl, "first");
|
||||
}
|
||||
|
||||
@Test
|
||||
@SuppressWarnings("deprecated")
|
||||
public void shardStateMustBeConsistentWithCoreSummaryStats() throws Exception {
|
||||
SolrCore core = getCore();
|
||||
|
||||
MetadataTracker tracker =
|
||||
of(coreAdminHandler(core))
|
||||
.map(AlfrescoCoreAdminHandler::getTrackerRegistry)
|
||||
.map(registry -> registry.getTrackerForCore(core.getName(), MetadataTracker.class))
|
||||
.orElseThrow(() -> new IllegalStateException("Cannot retrieve the Metadata tracker on this test core."));
|
||||
|
||||
// 1. First consistency check: ShardState must have the same values of CoreAdmin.SUMMARY report
|
||||
ShardState shardStateAfterFirstIndexingRound = tracker.getShardState();
|
||||
assertShardAndCoreSummaryConsistency(shardStateAfterFirstIndexingRound, core);
|
||||
|
||||
// 2. Index additional ACLs
|
||||
Acl acl = createAndIndexSomeAclData();
|
||||
|
||||
ShardState shardStateAfterIndexingSomeAdditionalAcl= tracker.getShardState();
|
||||
|
||||
// 3. We indexed only ACLs, so ACL data must be different while Transaction data must be the same
|
||||
assertEquals(shardStateAfterFirstIndexingRound.getLastIndexedTxId(), shardStateAfterIndexingSomeAdditionalAcl.getLastIndexedTxId());
|
||||
assertEquals(shardStateAfterFirstIndexingRound.getLastIndexedTxCommitTime(), shardStateAfterIndexingSomeAdditionalAcl.getLastIndexedTxCommitTime());
|
||||
assertNotEquals(shardStateAfterFirstIndexingRound.getLastIndexedChangeSetId(), shardStateAfterIndexingSomeAdditionalAcl.getLastIndexedChangeSetId());
|
||||
assertNotEquals(shardStateAfterFirstIndexingRound.getLastIndexedChangeSetCommitTime(), shardStateAfterIndexingSomeAdditionalAcl.getLastIndexedChangeSetCommitTime());
|
||||
|
||||
// Second consistency check: ShardState must have the same values of CoreAdmin.SUMMARY report
|
||||
assertShardAndCoreSummaryConsistency(shardStateAfterIndexingSomeAdditionalAcl, core);
|
||||
|
||||
// 4. Index a transaction with 5 other nodes
|
||||
createAndIndexTransactionWithSomeNodes(5, acl, "second");
|
||||
|
||||
ShardState shardStateAfterIndexingAnAdditionalTransaction = tracker.getShardState();
|
||||
assertNotEquals(shardStateAfterIndexingSomeAdditionalAcl.getLastIndexedTxId(), shardStateAfterIndexingAnAdditionalTransaction.getLastIndexedTxId());
|
||||
assertNotEquals(shardStateAfterIndexingSomeAdditionalAcl.getLastIndexedTxCommitTime(), shardStateAfterIndexingAnAdditionalTransaction.getLastIndexedTxCommitTime());
|
||||
assertEquals(shardStateAfterIndexingSomeAdditionalAcl.getLastIndexedChangeSetId(), shardStateAfterIndexingAnAdditionalTransaction.getLastIndexedChangeSetId());
|
||||
assertEquals(shardStateAfterIndexingSomeAdditionalAcl.getLastIndexedChangeSetCommitTime(), shardStateAfterIndexingAnAdditionalTransaction.getLastIndexedChangeSetCommitTime());
|
||||
|
||||
// 5. Third consistency check: ShardState must have the same values of CoreAdmin.SUMMARY report
|
||||
assertShardAndCoreSummaryConsistency(tracker.getShardState(), core);
|
||||
}
|
||||
|
||||
private void makeSureTransactionHasBeenIndexed(long transactionId) throws Exception
|
||||
{
|
||||
BooleanQuery.Builder builder = new BooleanQuery.Builder();
|
||||
builder.add(new BooleanClause(new TermQuery(new Term(QueryConstants.FIELD_SOLR4_ID, "TRACKER!STATE!TX")), BooleanClause.Occur.MUST));
|
||||
builder.add(new BooleanClause(LegacyNumericRangeQuery.newLongRange(QueryConstants.FIELD_S_TXID, transactionId, transactionId + 1, true, false), BooleanClause.Occur.MUST));
|
||||
BooleanQuery waitForQuery = builder.build();
|
||||
waitForDocCount(waitForQuery, 1, MAX_WAIT_TIME);
|
||||
}
|
||||
|
||||
private void makeSureNodesHaveBeenIndexed(int expectedCount, String searchText) throws Exception
|
||||
{
|
||||
waitForDocCount(new TermQuery(new Term("content@s___t@{http://www.alfresco.org/model/content/1.0}content", searchText)), expectedCount, MAX_WAIT_TIME);
|
||||
|
||||
BooleanQuery.Builder builder = new BooleanQuery.Builder();
|
||||
builder.add(new BooleanClause(new TermQuery(new Term("content@s___t@{http://www.alfresco.org/model/content/1.0}content", searchText)), BooleanClause.Occur.MUST));
|
||||
builder.add(new BooleanClause(new TermQuery(new Term(QueryConstants.FIELD_OWNER, "mike")), BooleanClause.Occur.MUST));
|
||||
waitForDocCount(builder.build(), expectedCount, MAX_WAIT_TIME);
|
||||
}
|
||||
|
||||
private Acl createAndIndexSomeAclData() throws Exception
|
||||
{
|
||||
AclChangeSet aclChangeSet = getAclChangeSet(1);
|
||||
|
||||
Acl acl = getAcl(aclChangeSet);
|
||||
Acl acl2 = getAcl(aclChangeSet);
|
||||
|
||||
AclReaders aclReaders = getAclReaders(aclChangeSet, acl, singletonList("joel"), singletonList("phil"), null);
|
||||
AclReaders aclReaders2 = getAclReaders(aclChangeSet, acl2, singletonList("jim"), singletonList("phil"), null);
|
||||
|
||||
indexAclChangeSet(aclChangeSet, asList(acl, acl2), asList(aclReaders, aclReaders2));
|
||||
|
||||
BooleanQuery.Builder builder = new BooleanQuery.Builder();
|
||||
builder.add(new BooleanClause(new TermQuery(new Term(QueryConstants.FIELD_SOLR4_ID, "TRACKER!STATE!ACLTX")), BooleanClause.Occur.MUST));
|
||||
builder.add(new BooleanClause(LegacyNumericRangeQuery.newLongRange(QueryConstants.FIELD_S_ACLTXID, aclChangeSet.getId(), aclChangeSet.getId() + 1, true, false), BooleanClause.Occur.MUST));
|
||||
BooleanQuery waitForQuery = builder.build();
|
||||
waitForDocCount(waitForQuery, 1, MAX_WAIT_TIME);
|
||||
|
||||
return acl;
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates and indexes a transaction with a certain number of nodes.
|
||||
*
|
||||
* @param howManyTestNodes how many nodes we want to index.
|
||||
* @param acl the related ACL.
|
||||
* @param sampleTextContent a sample text content that will be used to assert nodes have been actually indexed.
|
||||
*/
|
||||
private void createAndIndexTransactionWithSomeNodes(int howManyTestNodes, Acl acl, String sampleTextContent) throws Exception
|
||||
{
|
||||
Transaction txn = getTransaction(0, howManyTestNodes);
|
||||
Map.Entry<List<Node>, List<NodeMetaData>> data = TestDataProvider.nSampleNodesWithSampleContent(acl, txn, howManyTestNodes);
|
||||
|
||||
indexTransaction(txn, data.getKey(), data.getValue(), range(0, howManyTestNodes).mapToObj(index -> sampleTextContent).collect(Collectors.toList()));
|
||||
|
||||
makeSureTransactionHasBeenIndexed(txn.getId());
|
||||
makeSureNodesHaveBeenIndexed(data.getKey().size(), sampleTextContent);
|
||||
}
|
||||
}
|
@@ -0,0 +1,188 @@
|
||||
/*
|
||||
* Copyright (C) 2005-2019 Alfresco Software Limited.
|
||||
*
|
||||
* This file is part of Alfresco
|
||||
*
|
||||
* Alfresco is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Lesser General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* Alfresco is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package org.alfresco.solr.tracker;
|
||||
|
||||
import static java.util.Arrays.asList;
|
||||
import static java.util.Collections.singletonList;
|
||||
import static java.util.Optional.of;
|
||||
import static java.util.stream.IntStream.range;
|
||||
import static org.alfresco.solr.AlfrescoSolrUtils.MAX_WAIT_TIME;
|
||||
import static org.alfresco.solr.AlfrescoSolrUtils.assertShardAndCoreSummaryConsistency;
|
||||
import static org.alfresco.solr.AlfrescoSolrUtils.coreAdminHandler;
|
||||
import static org.alfresco.solr.AlfrescoSolrUtils.getAcl;
|
||||
import static org.alfresco.solr.AlfrescoSolrUtils.getAclChangeSet;
|
||||
import static org.alfresco.solr.AlfrescoSolrUtils.getAclReaders;
|
||||
import static org.alfresco.solr.AlfrescoSolrUtils.getTransaction;
|
||||
import static org.alfresco.solr.AlfrescoSolrUtils.indexAclChangeSet;
|
||||
|
||||
import org.alfresco.repo.index.shard.ShardState;
|
||||
import org.alfresco.repo.search.adaptor.lucene.QueryConstants;
|
||||
import org.alfresco.solr.AbstractAlfrescoDistributedTest;
|
||||
import org.alfresco.solr.AlfrescoCoreAdminHandler;
|
||||
import org.alfresco.solr.client.Acl;
|
||||
import org.alfresco.solr.client.AclChangeSet;
|
||||
import org.alfresco.solr.client.AclReaders;
|
||||
import org.alfresco.solr.client.Node;
|
||||
import org.alfresco.solr.client.NodeMetaData;
|
||||
import org.alfresco.solr.client.Transaction;
|
||||
import org.alfresco.solr.dataload.TestDataProvider;
|
||||
import org.apache.lucene.document.LongPoint;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.search.BooleanClause;
|
||||
import org.apache.lucene.search.BooleanQuery;
|
||||
import org.apache.lucene.search.LegacyNumericRangeQuery;
|
||||
import org.apache.lucene.search.TermQuery;
|
||||
import org.apache.solr.SolrTestCaseJ4;
|
||||
import org.junit.AfterClass;
|
||||
import org.junit.BeforeClass;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
/**
|
||||
* A partial state of {@link org.alfresco.solr.TrackerState} is exposed through two interfaces: AdminHandler.SUMMARY and
|
||||
* {@link MetadataTracker#getShardState}.
|
||||
* This test makes sure that state is consistent across the two mentioned approaches. That is, properties returned by the
|
||||
* Core SUMMARY must have the same value of the same properties in the ShardState.
|
||||
*
|
||||
* Note that this is the distributed version of {@link AlfrescoSolrTrackerStateTest}.
|
||||
*
|
||||
* @author agazzarini
|
||||
*/
|
||||
@SolrTestCaseJ4.SuppressSSL
|
||||
public class DistributedAlfrescoSolrTrackerStateTest extends AbstractAlfrescoDistributedTest
|
||||
{
|
||||
@BeforeClass
|
||||
private static void initData() throws Throwable
|
||||
{
|
||||
initSolrServers(5, getClassName(),null);
|
||||
|
||||
Acl acl = createAndIndexSomeAclData();
|
||||
createAndIndexTransactionWithSomeNodes(5, acl, "first");
|
||||
}
|
||||
|
||||
@AfterClass
|
||||
private static void destroyData()
|
||||
{
|
||||
dismissSolrServers();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void shardStateMustBeConsistentWithCoreSummaryStats()
|
||||
{
|
||||
putHandleDefaults();
|
||||
|
||||
getJettyCores(solrShards).forEach(core -> {
|
||||
MetadataTracker tracker =
|
||||
of(coreAdminHandler(core))
|
||||
.map(AlfrescoCoreAdminHandler::getTrackerRegistry)
|
||||
.map(registry -> registry.getTrackerForCore(core.getName(), MetadataTracker.class))
|
||||
.orElseThrow(() -> new IllegalStateException("Cannot retrieve the Metadata tracker on this test core."));
|
||||
|
||||
// 1. First consistency check: ShardState must have the same values of CoreAdmin.SUMMARY report
|
||||
ShardState shardStateAfterFirstIndexingRound = tracker.getShardState();
|
||||
assertShardAndCoreSummaryConsistency(shardStateAfterFirstIndexingRound, core);
|
||||
|
||||
// 2. Index additional ACLs
|
||||
Acl acl = createAndIndexSomeAclData();
|
||||
|
||||
ShardState shardStateAfterIndexingSomeAdditionalAcl= tracker.getShardState();
|
||||
|
||||
// 3. We indexed only ACLs, so ACL data must be different while Transaction data must be the same
|
||||
assertEquals(shardStateAfterFirstIndexingRound.getLastIndexedTxId(), shardStateAfterIndexingSomeAdditionalAcl.getLastIndexedTxId());
|
||||
assertEquals(shardStateAfterFirstIndexingRound.getLastIndexedTxCommitTime(), shardStateAfterIndexingSomeAdditionalAcl.getLastIndexedTxCommitTime());
|
||||
assertNotEquals(shardStateAfterFirstIndexingRound.getLastIndexedChangeSetId(), shardStateAfterIndexingSomeAdditionalAcl.getLastIndexedChangeSetId());
|
||||
assertNotEquals(shardStateAfterFirstIndexingRound.getLastIndexedChangeSetCommitTime(), shardStateAfterIndexingSomeAdditionalAcl.getLastIndexedChangeSetCommitTime());
|
||||
|
||||
// Second consistency check: ShardState must have the same values of CoreAdmin.SUMMARY report
|
||||
assertShardAndCoreSummaryConsistency(shardStateAfterIndexingSomeAdditionalAcl, core);
|
||||
|
||||
// 4. Index a transaction with 10 other nodes
|
||||
createAndIndexTransactionWithSomeNodes(10, acl, "second");
|
||||
|
||||
ShardState shardStateAfterIndexingAnAdditionalTransaction = tracker.getShardState();
|
||||
assertNotEquals(shardStateAfterIndexingSomeAdditionalAcl.getLastIndexedTxId(), shardStateAfterIndexingAnAdditionalTransaction.getLastIndexedTxId());
|
||||
assertNotEquals(shardStateAfterIndexingSomeAdditionalAcl.getLastIndexedTxCommitTime(), shardStateAfterIndexingAnAdditionalTransaction.getLastIndexedTxCommitTime());
|
||||
assertEquals(shardStateAfterIndexingSomeAdditionalAcl.getLastIndexedChangeSetId(), shardStateAfterIndexingAnAdditionalTransaction.getLastIndexedChangeSetId());
|
||||
assertEquals(shardStateAfterIndexingSomeAdditionalAcl.getLastIndexedChangeSetCommitTime(), shardStateAfterIndexingAnAdditionalTransaction.getLastIndexedChangeSetCommitTime());
|
||||
|
||||
// 5. Third consistency check: ShardState must have the same values of CoreAdmin.SUMMARY report
|
||||
assertShardAndCoreSummaryConsistency(tracker.getShardState(), core);
|
||||
});
|
||||
}
|
||||
|
||||
private static Acl createAndIndexSomeAclData()
|
||||
{
|
||||
try {
|
||||
|
||||
AclChangeSet aclChangeSet = getAclChangeSet(1);
|
||||
|
||||
Acl acl = getAcl(aclChangeSet);
|
||||
Acl acl2 = getAcl(aclChangeSet);
|
||||
|
||||
AclReaders aclReaders = getAclReaders(aclChangeSet, acl, singletonList("joel"), singletonList("phil"), null);
|
||||
AclReaders aclReaders2 = getAclReaders(aclChangeSet, acl2, singletonList("jim"), singletonList("phil"), null);
|
||||
|
||||
indexAclChangeSet(aclChangeSet, asList(acl, acl2), asList(aclReaders, aclReaders2));
|
||||
|
||||
BooleanQuery.Builder builder = new BooleanQuery.Builder();
|
||||
builder.add(new BooleanClause(new TermQuery(new Term(QueryConstants.FIELD_SOLR4_ID, "TRACKER!STATE!ACLTX")), BooleanClause.Occur.MUST));
|
||||
builder.add(new BooleanClause(LegacyNumericRangeQuery.newLongRange(QueryConstants.FIELD_S_ACLTXID, aclChangeSet.getId(), aclChangeSet.getId() + 1, true, false), BooleanClause.Occur.MUST));
|
||||
BooleanQuery waitForQuery = builder.build();
|
||||
waitForDocCountAllCores(waitForQuery, 1, MAX_WAIT_TIME);
|
||||
|
||||
return acl;
|
||||
}
|
||||
catch (Exception exception)
|
||||
{
|
||||
throw new RuntimeException(exception);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates and indexes a transaction with a certain number of nodes.
|
||||
*
|
||||
* @param howManyTestNodes how many nodes we want to index.
|
||||
* @param acl the related ACL.
|
||||
* @param sampleTextContent a sample text content that will be used to assert nodes have been actually indexed.
|
||||
*/
|
||||
private static void createAndIndexTransactionWithSomeNodes(int howManyTestNodes, Acl acl, String sampleTextContent)
|
||||
{
|
||||
try
|
||||
{
|
||||
Transaction txn = getTransaction(0, howManyTestNodes);
|
||||
Map.Entry<List<Node>, List<NodeMetaData>> data = TestDataProvider.nSampleNodesWithSampleContent(acl, txn, howManyTestNodes);
|
||||
|
||||
indexTransaction(txn, data.getKey(), data.getValue(), range(0, howManyTestNodes).mapToObj(index -> sampleTextContent).collect(Collectors.toList()));
|
||||
|
||||
BooleanQuery.Builder builder = new BooleanQuery.Builder();
|
||||
builder.add(new BooleanClause(new TermQuery(new Term(QueryConstants.FIELD_SOLR4_ID, "TRACKER!STATE!TX")), BooleanClause.Occur.MUST));
|
||||
builder.add(new BooleanClause(LongPoint.newExactQuery(QueryConstants.FIELD_S_TXID, txn.getId()), BooleanClause.Occur.MUST));
|
||||
BooleanQuery waitForQuery = builder.build();
|
||||
|
||||
waitForDocCountAllCores(waitForQuery, 1, MAX_WAIT_TIME);
|
||||
}
|
||||
catch (Exception exception)
|
||||
{
|
||||
throw new RuntimeException(exception);
|
||||
}
|
||||
}
|
||||
}
|
@@ -118,7 +118,9 @@ public class DistributedDateMonthAlfrescoSolrTrackerTest extends AbstractAlfresc
|
||||
Date[] dates = new Date[5];
|
||||
|
||||
Calendar cal = new GregorianCalendar();
|
||||
cal.setTimeZone(TimeZone.getTimeZone("UTC"));
|
||||
for (int i = 0; i < dates.length; i++) {
|
||||
|
||||
cal.set(1980, i, 21);
|
||||
dates[i] = cal.getTime();
|
||||
}
|
||||
|
@@ -22,7 +22,7 @@
|
||||
</distributionManagement>
|
||||
|
||||
<properties>
|
||||
<dependency.alfresco-data-model.version>8.30</dependency.alfresco-data-model.version>
|
||||
<dependency.alfresco-data-model.version>8.32</dependency.alfresco-data-model.version>
|
||||
</properties>
|
||||
|
||||
<dependencies>
|
||||
|
@@ -43,7 +43,7 @@
|
||||
<plugin>
|
||||
<groupId>org.apache.maven.plugins</groupId>
|
||||
<artifactId>maven-failsafe-plugin</artifactId>
|
||||
<version>2.22.1</version>
|
||||
<version>2.22.2</version>
|
||||
<executions>
|
||||
<execution>
|
||||
<goals>
|
||||
|
@@ -1,6 +1,6 @@
|
||||
# Alfresco Search Services ${project.version} Docker Image
|
||||
|
||||
FROM alfresco/alfresco-base-java:11.0.1-openjdk-centos-7-1fd3c4475374
|
||||
FROM alfresco/alfresco-base-java:11.0.1-openjdk-centos-7-3e4e9f4e5d6a
|
||||
LABEL creator="Gethin James" maintainer="Alfresco Search Services Team"
|
||||
|
||||
ENV DIST_DIR /opt/alfresco-search-services
|
||||
|
@@ -35,7 +35,7 @@ json-20160212.jar http://code.google.com/p/json-simple/
|
||||
xml-resolver-1.2.jar https://github.com/FasterXML/jackson
|
||||
neethi-3.0.3.jar http://ws.apache.org/commons/neethi/
|
||||
commons-logging-1.2.jar http://jakarta.apache.org/commons/
|
||||
commons-lang3-3.8.1.jar http://jakarta.apache.org/commons/
|
||||
commons-lang3-3.9.jar http://jakarta.apache.org/commons/
|
||||
mybatis-3.3.0.jar http://www.mybatis.org/
|
||||
chemistry-opencmis-commons-impl-1.1.0.jar http://chemistry.apache.org/
|
||||
chemistry-opencmis-commons-api-1.1.0.jar http://chemistry.apache.org/
|
||||
|
@@ -3,11 +3,10 @@
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
<parent>
|
||||
<groupId>org.alfresco</groupId>
|
||||
<artifactId>alfresco-super-pom</artifactId>
|
||||
<version>10</version>
|
||||
<artifactId>alfresco-search-and-insight-parent</artifactId>
|
||||
<version>1.4.0-SNAPSHOT</version>
|
||||
</parent>
|
||||
<artifactId>alfresco-search-parent</artifactId>
|
||||
<version>1.4.0-SNAPSHOT</version>
|
||||
<packaging>pom</packaging>
|
||||
<name>Alfresco Solr Search parent</name>
|
||||
<properties>
|
||||
@@ -18,22 +17,6 @@
|
||||
<solr.zip>https://artifacts.alfresco.com/nexus/content/repositories/public/org/apache/solr/solr/solr-${solr.version}-patched/solr-solr-${solr.version}-patched.zip</solr.zip>
|
||||
<solr.directory>${project.build.directory}/solr-${solr.version}-patched</solr.directory>
|
||||
</properties>
|
||||
<distributionManagement>
|
||||
<repository>
|
||||
<id>alfresco-releases</id>
|
||||
<url>https://artifacts.alfresco.com/nexus/content/repositories/releases/</url>
|
||||
</repository>
|
||||
<snapshotRepository>
|
||||
<id>alfresco-snapshots</id>
|
||||
<url>https://artifacts.alfresco.com/nexus/content/repositories/snapshots/</url>
|
||||
</snapshotRepository>
|
||||
</distributionManagement>
|
||||
<scm>
|
||||
<connection>scm:git:git@github.com:Alfresco/SearchServices.git</connection>
|
||||
<developerConnection>scm:git:git@github.com:Alfresco/SearchServices.git</developerConnection>
|
||||
<url>https://github.com/Alfresco/SearchServices.git</url>
|
||||
<tag>HEAD</tag>
|
||||
</scm>
|
||||
<modules>
|
||||
<module>alfresco-solrclient-lib</module>
|
||||
<module>alfresco-search</module>
|
||||
|
Reference in New Issue
Block a user