diff --git a/pom.xml b/pom.xml
index cbc7355f65..de2d5eea23 100644
--- a/pom.xml
+++ b/pom.xml
@@ -143,7 +143,7 @@ LICENSE file.
     <rocksdb.version>6.2.2</rocksdb.version>
     <s3.version>1.10.20</s3.version>
     <seaweed.client.version>1.4.1</seaweed.client.version>
-    <scylla.cql.version>3.10.1-scylla-0</scylla.cql.version>
+    <scylla.cql.version>3.10.2-scylla-1</scylla.cql.version>
     <solr7.version>7.7.2</solr7.version>
     <tarantool.version>1.6.5</tarantool.version>
     <thrift.version>0.8.0</thrift.version>
diff --git a/scylla/README.md b/scylla/README.md
index 2d6e354fbc..ac5a27da60 100644
--- a/scylla/README.md
+++ b/scylla/README.md
@@ -53,7 +53,6 @@ Create a keyspace, and a table as mentioned above. Load data with:
         -p readproportion=0 -p updateproportion=0 \
         -p fieldcount=10 -p fieldlength=128 \
         -p insertstart=0 -p insertcount=1000000000 \
-        -p cassandra.coreconnections=14 -p cassandra.maxconnections=14 \
         -p cassandra.username=cassandra -p cassandra.password=cassandra \
         -p scylla.hosts=ip1,ip2,ip3,...
 
@@ -63,27 +62,32 @@ Use as following:
         -target 120000 -threads 840 -p recordcount=1000000000 \
         -p fieldcount=10 -p fieldlength=128 \
         -p operationcount=50000000 \
-        -p scylla.coreconnections=280 -p scylla.maxconnections=280 \
         -p scylla.username=cassandra -p scylla.password=cassandra \
-        -p scylla.hosts=ip1,ip2,ip3,... \
-        -p scylla.tokenaware=true
+        -p scylla.hosts=ip1,ip2,ip3,...
 
 ## On choosing meaningful configuration
 
 ### 1. Load target
 
-You want to test how a database handles load. To get the performance picture
-you would want to look at the latency distribution and utilization under the
-constant load. To select load use `-target` to state desired throughput level.
+Suppose, you want to test how a database handles an OLTP load.
+
+In this case, to get the performance picture you want to look at the latency
+distribution and utilization at the sustained throughput that is independent
+of the processing speed. This kind of system called an open-loop system.
+Use the `-target` flag to state desired requests arrival rate.
 
 For example `-target 120000` means that we expect YCSB workers to generate
-120,000 requests per second (RPS, QPS or TPS) to the database.
+120,000 requests per second (RPS, QPS or TPS) overall to the database.
 
-Why is this important? Because without setting target throughput you will be
-looking only on the system equilibrium point that in the face of constantly
-varying latency will not allow you to see either throughput, nor latency.
+Why is this important? First, we want to look at the latency at some sustained
+throughput target, not visa versa. Second, without a throughput target,
+the system+loader pair will converge to the closed-loop system that has completely
+different characteristics than what we wanted to measure. The load will settle
+at the system equilibrium point. You will be able to find the throughput that will depend
+on the number of loader threads (workers) but not the latency - only service time.
+This is not something we expected.
 
-For more information check out these resources on the coordinated omission problem:
+For more information check out these resources on the coordinated omission problem.
 
 See
 [[1]](http://highscalability.com/blog/2015/10/5/your-load-generator-is-probably-lying-to-you-take-the-red-pi.html)
@@ -92,7 +96,51 @@ See
 and [[this]](https://www.youtube.com/watch?v=lJ8ydIuPFeU)
 great talk by Gil Tene.
 
-### 2. Parallelism factor and threads
+### 2. Latency correction
+
+To measure latency, it is not enough to just set a target. 
+The latencies must be measured with the correction as we apply
+a closed-class loader to the open-class problem. This is what YCSB
+calls an Intended operation.
+
+Intended operations have points in time when they were intended to be executed
+according to the scheduler defined by the load target (--target). We must correct
+measurement if we did not manage to execute an operation in time.
+
+The fair measurement consists of the operation latency and its correction
+to the point of its intended execution. Even if you don’t want to have
+a completely fair measurement, use “both”:
+
+    -p measurement.interval=both
+
+Other options are “op” and “intended”. “op” is the default.
+
+Another flag that affects measurement quality is the type of histogram
+“-p measurementtype” but for a long time, it uses “hdrhistogram” that 
+must be fine for most use cases.
+
+### 3. Latency percentiles and multiple loaders
+
+Latencies percentiles can't be averaged. Don't fall into this trap.
+Neither averages nor p99 averages do not make any sense.
+
+If you run a single loader instance look for P99 - 99 percentile.
+If you run multiple loaders dump result histograms with:
+
+    -p measurement.histogram.verbose=true
+
+or 
+
+    -p hdrhistogram.fileoutput=true
+    -p hdrhistogram.output.path=file.hdr
+
+merge them manually and extract required percentiles out of the
+joined result.
+
+Remember that running multiple workloads may distort original
+workloads distributions they were intended to produce.
+
+### 4. Parallelism factor and threads
 
 Scylla utilizes [thread-per-core](https://www.scylladb.com/product/technology/) architecture design.
 That means that a Node consists of shards that are mapped to the CPU cores 1-per-core.
@@ -113,41 +161,45 @@ of shards, and the number of nodes in the cluster. For example:
 
     =>
 
-    threads = K * shards * nodes = K * 14 * nodes
+    threads = K * shards per node * nodes
 
     for i3.4xlarge where
 
-        - K is parallelism factor >= 1,
-        - Nodes is number of nodes in the cluster.
-
-For example for 3 nodes `i3.4xlarge` and `-threads 840` means
-`K = 20`, `shards = 14`, and `threads = 14 * 20 * 3`.
+        - K is parallelism factor:
 
-Thus, the `K` - the parallelism factor must be selected in the first order. If you
-don't know what you want out of it start with 1.
+          K >= Target Throughput / QPS per Worker / Shards per node / Nodes / Workers per shard >= 1
+          where
+          Target Throughput = --target
+          QPS per Worker = 1000 [ms/second] / Latency in ms expected at target Percentile
+          Shards per node = vCPU per cluster node - 2
+          Nodes = a number of nodes in the cluster.
+          Workers per shard = Target Throughput / Shards per node / Nodes / QPS per Worker
 
-For picking desired parallelism factor it is useful to come from desired `target`
-parameter. It is better if the `target` is a multiple of `threads`.
+        - Nodes is number of nodes in the cluster.
 
 Another concern is that for high throughput scenarios you would probably
 want to keep shards incoming queues non-empty. For that your parallelism factor
 must be at least 2.
 
-### 3. Number of connections
+### 5. Number of connections
 
-Both `scylla.coreconnections` and `scylla.maxconnections` define limits
-per node. When you see `-p scylla.coreconnections=280 -p scylla.maxconnections=280`
-that means 280 connections per node.
+If you use original Cassandra drivers you need to pick the proper number
+of connections per host. Scylla drivers do not require this to be configured
+and by default create a connection per shard. For example if your node has
+16 vCPU and thus 14 shards Scylla drivers will pick to create 14 connections
+per host. An excess of connections may result in degraded latency.
 
-Number of connections must be a multiple of:
+Database client protocol is asynchronous and allows queueing requests in
+a single connection. The default queue limit for local keys is 1024 and 256
+for remote ones. Current binding implementation do not require this.
 
-- number of _shards_
-- parallelism factor `K`
+Both `scylla.coreconnections` and `scylla.maxconnections` define limits per node.
+When you see `-p scylla.coreconnections=14 -p scylla.maxconnections=14` that means
+14 connections per node.
 
-For example, for `i3.4xlarge` that has 14 shards per node and `K = 20`
-it makes sense to pick `connections = shards * K = 14 * 20 = 280`.
+Pick the number of connections per host to be divisible by the number of _shards_.
 
-### 4. Other considerations
+### 6. Other considerations
 
 Consistency levels do not change consistency model or its strongness.
 Even with `-p scylla.writeconsistencylevel=ONE` the data will be written
@@ -159,16 +211,13 @@ latency picture a bit but would not affect utilization.
 Remember that you can't measure CPU utilization with Scylla by normal
 Unix tools. Check out Scylla own metrics to see real reactors utilization.
 
-Always use [token aware](https://www.scylladb.com/2019/03/27/best-practices-for-scylla-applications/)
-load balancing `-p scylla.tokenaware=true`.
-
 For best performance it is crucial to evenly load all available shards.
 
-### 5. Expected performance target
+### 7. Expected performance target
 
 You can expect about 12500 uOPS / core (shard), where uOPS are basic
 reads and writes operations post replication. Don't forget that usually
-`Core = 2 * vCPU` for HT systems.
+`Core = 2 vCPU` for HT systems.
 
 For example if we insert a row with RF = 3 we can count at least 3 writes -
 1 write per each replica. That is 1 Transaction = 3 u operations.
@@ -235,12 +284,9 @@ of 3 nodes of i3.4xlarge (16 vCPU per node) and target of 120000 is:
   * Default is false
   * https://docs.scylladb.com/using-scylla/tracing/
 
-- `scylla.tokenaware`
-  - Enable token awareness
-  - Default value is false.
-
-- `scylla.tokenaware_local_dc`
-  - Restrict Round Robin child policy with the local dc nodes
+* `scylla.local_dc`
+  - Specify local datacenter for multi-dc setup.
+  - By default uses LOCAL_QUORUM consistency level.  
   - Default value is empty.
 
 - `scylla.lwt`
diff --git a/scylla/src/main/java/site/ycsb/db/scylla/ScyllaCQLClient.java b/scylla/src/main/java/site/ycsb/db/scylla/ScyllaCQLClient.java
index 2dae5a0694..f37d260293 100644
--- a/scylla/src/main/java/site/ycsb/db/scylla/ScyllaCQLClient.java
+++ b/scylla/src/main/java/site/ycsb/db/scylla/ScyllaCQLClient.java
@@ -16,10 +16,10 @@
 package site.ycsb.db.scylla;
 
 import com.datastax.driver.core.*;
+import com.datastax.driver.core.policies.DCAwareRoundRobinPolicy;
 import com.datastax.driver.core.policies.LoadBalancingPolicy;
-import com.datastax.driver.core.querybuilder.*;
 import com.datastax.driver.core.policies.TokenAwarePolicy;
-import com.datastax.driver.core.policies.DCAwareRoundRobinPolicy;
+import com.datastax.driver.core.querybuilder.*;
 import site.ycsb.ByteArrayByteIterator;
 import site.ycsb.ByteIterator;
 import site.ycsb.DB;
@@ -86,9 +86,7 @@ public class ScyllaCQLClient extends DB {
 
   public static final String SCYLLA_LWT = "scylla.lwt";
 
-  public static final String TOKEN_AWARE = "scylla.tokenaware";
-  public static final String TOKEN_AWARE_DEFAULT = "false";
-  public static final String TOKEN_AWARE_LOCAL_DC = "scylla.tokenaware_local_dc";
+  public static final String TOKEN_AWARE_LOCAL_DC = "scylla.local_dc";
 
   public static final String TRACING_PROPERTY = "scylla.tracing";
   public static final String TRACING_PROPERTY_DEFAULT = "false";
@@ -162,17 +160,22 @@ public void init() throws DBException {
               .addContactPoints(hosts);
         }
 
-        if (Boolean.parseBoolean(getProperties().getProperty(TOKEN_AWARE, TOKEN_AWARE_DEFAULT))) {
-          LoadBalancingPolicy child;
-          String localDc = getProperties().getProperty(TOKEN_AWARE_LOCAL_DC);
-          if (localDc != null && !localDc.isEmpty()) {
-            child = DCAwareRoundRobinPolicy.builder().withLocalDc(localDc).build();
-            LOGGER.info("Using shard awareness with local DC: {}\n", localDc);
-          } else {
-            child = DCAwareRoundRobinPolicy.builder().build();
-            LOGGER.info("Using shard awareness\n");
+        final String localDC = getProperties().getProperty(TOKEN_AWARE_LOCAL_DC);
+        if (localDC != null && !localDC.isEmpty()) {
+          final LoadBalancingPolicy local = DCAwareRoundRobinPolicy.builder().withLocalDc(localDC).build();
+          final TokenAwarePolicy tokenAware = new TokenAwarePolicy(local);
+          builder = builder.withLoadBalancingPolicy(tokenAware);
+
+          LOGGER.info("Using local datacenter with token awareness: {}\n", localDC);
+
+          // If was not overridden explicitly, set LOCAL_QUORUM
+          if (getProperties().getProperty(READ_CONSISTENCY_LEVEL_PROPERTY) == null) {
+            readConsistencyLevel = ConsistencyLevel.LOCAL_QUORUM;
+          }
+
+          if (getProperties().getProperty(WRITE_CONSISTENCY_LEVEL_PROPERTY) == null) {
+            writeConsistencyLevel = ConsistencyLevel.LOCAL_QUORUM;
           }
-          builder = builder.withLoadBalancingPolicy(new TokenAwarePolicy(child));
         }
 
         cluster = builder.build();
@@ -224,6 +227,10 @@ public void init() throws DBException {
         } else {
           LOGGER.info("Not using LWT\n");
         }
+
+        LOGGER.info("Read consistency: {}, Write consistency: {}\n",
+            readConsistencyLevel.name(),
+            writeConsistencyLevel.name());
       } catch (Exception e) {
         throw new DBException(e);
       }