diff --git a/cassandra/src/cassandra/nemesis.clj b/cassandra/src/cassandra/nemesis.clj index 65a2b91..4f16d1a 100644 --- a/cassandra/src/cassandra/nemesis.clj +++ b/cassandra/src/cassandra/nemesis.clj @@ -199,7 +199,7 @@ (defn flush-generator [opts] - (when (contains? (:admin opts) :flush-compact) + (when (contains? (:admin opts) :flush) (->> (gen/mix [(repeat {:type :info, :f :flush}) (repeat {:type :info, :f :compact})]) (gen/stagger default-interval)))) @@ -207,7 +207,7 @@ (defn flush-package "A combined nemesis package for flush and compaction." [opts] - (when (contains? (:admin opts) :flush-compact) + (when (contains? (:admin opts) :flush) {:nemesis (flush-nemesis) :generator (flush-generator opts) :perf #{{:name "flush" diff --git a/cassandra/src/cassandra/runner.clj b/cassandra/src/cassandra/runner.clj index 3211214..6f50f2d 100644 --- a/cassandra/src/cassandra/runner.clj +++ b/cassandra/src/cassandra/runner.clj @@ -42,21 +42,13 @@ (def admin {"none" [] "join" [:join] - "flush" [:flush-compact]}) + "flush" [:flush]}) (def test-opt-spec [(cli/repeated-opt nil "--workload NAME" "Test(s) to run" [] workload-keys)]) (def cassandra-opt-spec - [(cli/repeated-opt nil "--nemesis NAME" "Which nemeses to use" - [[]] - nemeses) - - (cli/repeated-opt nil "--admin NAME" "Which admin operations to use" - [[]] - admin) - - [nil "--rf REPLICATION_FACTOR" "Replication factor" + [[nil "--rf REPLICATION_FACTOR" "Replication factor" :default 3 :parse-fn #(Long/parseLong %) :validate [pos? "Must be positive"]] @@ -66,6 +58,16 @@ (cli/tarball-opt link-to-tarball)]) +(def nemesis-opt-spec + [(cli/repeated-opt nil "--nemesis NAME" "Which nemeses to use" + [[]] + nemeses)]) + +(def admin-opt-spec + [(cli/repeated-opt nil "--admin NAME" "Which admin operations to use" + [[]] + admin)]) + (defn cassandra-test [opts] (let [target (:target opts) @@ -108,6 +110,8 @@ [] {"test" {:opt-spec (->> test-opt-spec (into cassandra-opt-spec) + (into nemesis-opt-spec) + (into admin-opt-spec) (into cli/test-opt-spec)) :opt-fn (fn [parsed] (-> parsed cli/test-opt-fn)) :usage (cli/test-usage) diff --git a/cassandra/test/cassandra/runner_test.clj b/cassandra/test/cassandra/runner_test.clj index d485e19..5248c2d 100644 --- a/cassandra/test/cassandra/runner_test.clj +++ b/cassandra/test/cassandra/runner_test.clj @@ -6,7 +6,7 @@ (let [opts {:target "cassandra" :workload :batch :nemesis [:crash] - :admin [:flush-compact] + :admin [:flush] :time-limit 60} test (runner/cassandra-test opts)] - (is (= "cassandra-batch-crash-flush-compact" (:name test))))) + (is (= "cassandra-batch-crash-flush" (:name test))))) diff --git a/docker/node/Dockerfile b/docker/node/Dockerfile index 9628f20..e321f9f 100644 --- a/docker/node/Dockerfile +++ b/docker/node/Dockerfile @@ -7,6 +7,8 @@ RUN apt-get -y -q update && \ psmisc \ python \ ntpdate \ + gnupg \ + iproute2 \ iptables ADD ./init.sh /init.sh diff --git a/scalardb/src/scalardb/db/postgres.clj b/scalardb/src/scalardb/db/postgres.clj new file mode 100644 index 0000000..3823478 --- /dev/null +++ b/scalardb/src/scalardb/db/postgres.clj @@ -0,0 +1,136 @@ +(ns scalardb.db.postgres + (:require [clojure.tools.logging :refer [info]] + [jepsen + [control :as c] + [db :as db] + [util :refer [meh]]] + [jepsen.control.util :as cu] + [jepsen.os.debian :as debian])) + +(def ^:private ^:const DEFAULT_VERSION "15") +(def ^:private ^:const TIMEOUT_SEC 600) +(def ^:private ^:const INTERVAL_SEC 10) + +(defn- install! + "Installs PostgreSQL." + [{:keys [version] :or {version DEFAULT_VERSION}}] + (let [postgre (keyword (str "postgresql-" version)) + client (keyword (str "postgresql-client-" version))] + (c/su + (c/exec :wget + :--quiet + :-O + :- "https://www.postgresql.org/media/keys/ACCC4CF8.asc" + c/| :apt-key :add :-) + (debian/install [:lsb-release]) + (let [release (c/exec :lsb_release :-cs)] + (debian/add-repo! "postgresql" + (str "deb http://apt.postgresql.org/pub/repos/apt/ " + release "-pgdg main"))) + (debian/install [postgre client]) + (c/su (c/exec :sed :-i + (c/lit "\"s/#listen_addresses = 'localhost'/listen_addresses = '*'/g\"") + (str "/etc/postgresql/" version "/main/postgresql.conf"))) + (c/su (c/exec :echo + (c/lit "host all all 0.0.0.0/0 trust") + c/| :tee :-a + (str "/etc/postgresql/" version "/main/pg_hba.conf") + :> "/dev/null")) + (c/su (meh (c/exec :service :postgresql :stop))) + (c/exec "update-rc.d" :postgresql :disable)))) + +(defn- get-bin-dir + [version] + (str "/usr/lib/postgresql/" version "/bin")) + +(defn- get-main-dir + [version] + (str "/var/lib/postgresql/" version "/main")) + +(defn- configure! + [{:keys [version] :or {version DEFAULT_VERSION}}] + (c/sudo "postgres" + (c/exec (str (get-bin-dir version) "/initdb") + :-D (get-main-dir version)))) + +(defn- get-log-path + [{:keys [version] :or {version DEFAULT_VERSION}}] + (str "/var/log/postgresql/postgresql-" version "-main.log")) + +(defn- start! + [] + (c/su (c/exec :service :postgresql :start))) + +(defn- stop! + [] + (c/su (meh (c/exec :service :postgresql :stop)))) + +(defn- wipe! + [{:keys [version] :or {version DEFAULT_VERSION}}] + (stop!) + (c/su (meh (c/exec :rm :-r (get-main-dir version)))) + (c/su (meh (c/exec :rm (get-log-path version))))) + +(defn live-node? + [test] + (let [node (-> test :nodes first)] + (try + (c/on node (c/sudo "postgres" (c/exec :pg_isready))) + true + (catch Exception _ + (info node "is down") + false)))) + +(defn wait-for-recovery + "Wait for the node bootstrapping." + ([test] + (wait-for-recovery TIMEOUT_SEC INTERVAL_SEC test)) + ([timeout-sec interval-sec test] + (when-not (live-node? test) + (Thread/sleep (* interval-sec 1000)) + (if (>= timeout-sec interval-sec) + (wait-for-recovery (- timeout-sec interval-sec) interval-sec test) + (throw (ex-info "Timed out waiting for the postgres node" + {:cause "The node couldn't start"})))))) + +(defn db + "Setup PostgreSQL." + [] + (reify + db/DB + (setup! [_ test _] + (when-not (:leave-db-running? test) + (wipe! test)) + (install! test) + (configure! test) + (start!)) + + (teardown! [_ test _] + (when-not (:leave-db-running? test) + (wipe! test))) + + db/Primary + (primaries [_ test] (:nodes test)) + (setup-primary! [_ _ _]) + + db/Pause + (pause! [_ _ _] + (c/su (c/exec :service :postgresql :stop))) + (resume! [_ _ _] + (c/su (c/exec :service :postgresql :start))) + + db/Kill + (start! [_ _ _] + (c/su (c/exec :service :postgresql :restart))) + (kill! [_ _ _] + (doseq [pattern (shuffle + ["postgres -D" ; Main process + "main: checkpointer" + "main: background writer" + "main: walwriter" + "main: autovacuum launcher"])] + (Thread/sleep (rand-int 100)) + (info "Killing" pattern "-" (cu/grepkill! pattern)))) + + db/LogFiles + (log-files [_ test _] [(get-log-path test)]))) diff --git a/scalardb/src/scalardb/db_extend.clj b/scalardb/src/scalardb/db_extend.clj index 7481d9d..cbfb409 100644 --- a/scalardb/src/scalardb/db_extend.clj +++ b/scalardb/src/scalardb/db_extend.clj @@ -1,7 +1,8 @@ (ns scalardb.db-extend (:require [cassandra.core :as cassandra] [clojure.string :as string] - [jepsen.db :as db]) + [jepsen.db :as db] + [scalardb.db.postgres :as postgres]) (:import (com.scalar.db.storage.cassandra CassandraAdmin CassandraAdmin$ReplicationStrategy CassandraAdmin$CompactionStrategy) @@ -36,6 +37,7 @@ (when (nil? nodes) (throw (ex-info "No living node" {:test test}))) (doto (Properties.) + (.setProperty "scalar.db.storage" "cassandra") (.setProperty "scalar.db.contact_points" (string/join "," nodes)) (.setProperty "scalar.db.username" "cassandra") (.setProperty "scalar.db.password" "cassandra") @@ -44,8 +46,29 @@ (.setProperty "scalar.db.consensus_commit.serializable_strategy" ((:serializable-strategy test) SERIALIZABLE_STRATEGIES)))))) +(defrecord ExtPostgres [] + DbExtension + (live-nodes [_ test] (postgres/live-node? test)) + (wait-for-recovery [_ test] (postgres/wait-for-recovery test)) + (create-table-opts [_ _] {}) + (create-properties + [_ test] + (let [node (-> test :nodes first)] + ;; We have only one node in this test + (doto (Properties.) + (.setProperty "scalar.db.storage" "jdbc") + (.setProperty "scalar.db.contact_points" + (str "jdbc:postgresql://" node ":5432/")) + (.setProperty "scalar.db.username" "postgres") + (.setProperty "scalar.db.password" "postgres") + (.setProperty "scalar.db.consensus_commit.isolation_level" + ((:isolation-level test) ISOLATION_LEVELS)) + (.setProperty "scalar.db.consensus_commit.serializable_strategy" + ((:serializable-strategy test) SERIALIZABLE_STRATEGIES)))))) + (def ^:private ext-dbs - {:cassandra (->ExtCassandra)}) + {:cassandra (->ExtCassandra) + :postgres (->ExtPostgres)}) (defn extend-db [db db-type] @@ -57,6 +80,12 @@ db/Primary (primaries [_ test] (db/primaries db test)) (setup-primary! [_ test node] (db/setup-primary! db test node)) + db/Pause + (pause! [_ test node] (db/pause! db test node)) + (resume! [_ test node] (db/resume! db test node)) + db/Kill + (start! [_ test node] (db/start! db test node)) + (kill! [_ test node] (db/kill! db test node)) db/LogFiles (log-files [_ test node] (db/log-files db test node)) DbExtension diff --git a/scalardb/src/scalardb/runner.clj b/scalardb/src/scalardb/runner.clj index 42e4cbd..5be70c4 100644 --- a/scalardb/src/scalardb/runner.clj +++ b/scalardb/src/scalardb/runner.clj @@ -2,13 +2,15 @@ (:gen-class) (:require [cassandra.core :as cassandra] [cassandra.nemesis :as cn] - [cassandra.runner :as car] + [cassandra.runner :as cr] + [clojure.tools.logging :refer [warn]] [clojure.string :as string] [jepsen [core :as jepsen] [cli :as cli] [generator :as gen] [tests :as tests]] + [jepsen.nemesis [combined :as jn]] [scalardb [core :refer [INITIAL_TABLE_ID]] [transfer] @@ -19,16 +21,26 @@ [transfer-append-2pc] [elle-append-2pc] [elle-write-read-2pc] - [db-extend :refer [extend-db]]])) + [db-extend :refer [extend-db]]] + [scalardb.db + [postgres :as postgres]])) (def db-keys "The map of test DBs." - {"cassandra" :cassandra}) + {"cassandra" :cassandra + "postgres" :postgres}) (defn- gen-db + "Returns [extended-db constructed-nemesis num-max-nodes]." [db-key faults admin] (case db-key - :cassandra (let [db (extend-db (cassandra/db) :cassandra)] + :cassandra (let [db (extend-db (cassandra/db) :cassandra) + ;; replace :kill nemesis with :crash for Cassandra + faults (mapv #(if (= % :kill) :crash %) faults)] + (when-not (every? #(some? (get cr/nemeses (name %))) faults) + (throw + (ex-info + (str "Invalid nemesis for Cassandra: " faults) {}))) [db (cn/nemesis-package {:db db @@ -38,7 +50,20 @@ :primaries :majority :majorities-ring - :minority-third]}})]) + :minority-third]}}) + Integer/MAX_VALUE]) + :postgres (let [db (extend-db (postgres/db) :postgres)] + (when (seq admin) + (warn "The admin operations are ignored:" admin)) + [db + (jn/nemesis-package + {:db db + :interval 60 + :faults faults + :partition {:targets [:one]} + :kill {:targets [:one]} + :pause {:targets [:one]}}) + 1]) (throw (ex-info "Unsupported DB" {:db db-key})))) (def workload-keys @@ -63,12 +88,25 @@ :elle-append-2pc scalardb.elle-append-2pc/workload :elle-write-read-2pc scalardb.elle-write-read-2pc/workload}) +(def nemeses + "A map of nemeses." + {"none" [] + "partition" [:partition] + "packet" [:packet] + "clock" [:clock] + "crash" [:kill] + "pause" [:pause]}) + (def test-opt-spec [(cli/repeated-opt nil "--db NAME" "DB(s) on which the test is run" [:cassandra] db-keys) (cli/repeated-opt nil "--workload NAME" "Test(s) to run" [] workload-keys) + (cli/repeated-opt nil "--nemesis NAME" "Which nemeses to use" + [[]] + nemeses) + [nil "--isolation-level ISOLATION_LEVEL" "isolation level" :default :snapshot :parse-fn keyword @@ -104,11 +142,12 @@ (defn scalardb-test [base-opts db-key workload-key faults admin] - (let [[db nemesis] (gen-db db-key faults admin) + (let [[db nemesis max-nodes] (gen-db db-key faults admin) consistency-model (->> base-opts :consistency-model (mapv keyword)) workload-opts (merge base-opts scalardb-opts - {:consistency-model consistency-model}) + {:nodes (vec (take max-nodes (:nodes base-opts))) + :consistency-model consistency-model}) workload ((workload-key workloads) workload-opts)] (merge tests/noop-test workload-opts @@ -131,7 +170,8 @@ (defn test-cmd [] {"test" {:opt-spec (->> test-opt-spec - (into car/cassandra-opt-spec) + (into cr/cassandra-opt-spec) + (into cr/admin-opt-spec) (into cli/test-opt-spec)) :opt-fn (fn [parsed] (-> parsed cli/test-opt-fn)) :usage (cli/test-usage)