From c6507ab563b19f01aece0c1c2ef6723ee5a8d345 Mon Sep 17 00:00:00 2001 From: Oliver Howell Date: Fri, 28 Jun 2024 15:32:52 +0100 Subject: [PATCH] DOC-65: 1st pass at Platform nav (#1133) Merge 1st pass and review --- antora-playbook-local.yml | 6 +- docs/antora.yml | 4 +- docs/modules/ROOT/nav.adoc | 230 +++- .../modules/ROOT/pages/capacity-planning.adoc | 76 -- docs/modules/ROOT/pages/index.adoc | 2 +- .../modules/ROOT/pages/management-center.adoc | 6 + docs/modules/ROOT/pages/placeholder.adoc | 3 + .../modules/ROOT/pages/what-is-hazelcast.adoc | 147 ++ docs/modules/ROOT/pages/whats-new.adoc | 66 + docs/modules/clients/partials/nav.adoc | 12 +- .../pages/aws-deployments.adoc | 16 + .../pages/back-pressure.adoc | 74 + .../pages/best-practices.adoc | 1192 +---------------- .../cluster-performance/pages/near-cache.adoc | 655 +++++++++ .../pages/performance-tips.adoc | 714 ++++++++++ .../cluster-performance/pages/pipelining.adoc | 61 + .../cluster-performance/pages/threading.adoc | 338 +++++ docs/modules/clusters/pages/legacy-ucd.adoc | 6 +- .../clusters/pages/ucn-migrate-ucd.adoc | 2 +- .../clusters/partials/ucn-migrate-tip.adoc | 2 +- docs/modules/computing/partials/nav.adoc | 3 +- docs/modules/configuration/partials/nav.adoc | 1 + .../pages/data-connections-configuration.adoc | 6 +- .../pages/mongo-dc-configuration.adoc | 2 +- docs/modules/data-structures/pages/cpmap.adoc | 4 +- .../modules/data-structures/partials/nav.adoc | 1 + .../deploy/pages/enterprise-licenses.adoc | 2 +- docs/modules/deploy/partials/nav.adoc | 6 - .../getting-started/pages/blue-green.adoc | 8 +- .../getting-started/pages/editions.adoc | 24 +- .../pages/install-enterprise.adoc | 4 +- .../pages/install-hazelcast.adoc | 4 +- .../getting-started/pages/persistence.adoc | 10 +- .../getting-started/pages/support.adoc | 9 + docs/modules/getting-started/pages/wan.adoc | 4 +- .../modules/getting-started/partials/nav.adoc | 15 +- docs/modules/integrate/partials/nav.adoc | 33 +- .../pages/deploying-in-kubernetes.adoc | 2 +- docs/modules/kubernetes/partials/nav.adoc | 1 + .../pages/enterprise-rest-api.adoc | 2 +- .../maintain-cluster/pages/rest-api.adoc | 4 +- .../maintain-cluster/pages/shutdown.adoc | 14 +- .../maintain-cluster/partials/nav.adoc | 2 - .../migrate/pages/data-migration-tool.adoc | 15 +- .../migrate/pages/upgrading-from-imdg-3.adoc | 2 +- .../performance/partials/performance-nav.adoc | 11 - docs/modules/pipelines/pages/cdc.adoc | 2 +- .../modules/pipelines/pages/job-security.adoc | 2 +- .../plugins/pages/hazelcast-plugins.adoc | 7 +- docs/modules/query/partials/nav.adoc | 2 +- docs/modules/release-notes/pages/5-4-0.adoc | 336 +++++ .../modules/release-notes/pages/releases.adoc | 4 + docs/modules/serialization/partials/nav.adoc | 1 + docs/modules/spring/partials/nav.adoc | 9 +- docs/modules/sql/partials/nav.adoc | 50 +- .../pages/cpp-client-getting-started.adoc | 412 ++++++ docs/modules/tutorials/pages/cpsubsystem.adoc | 238 ++++ .../pages/csharp-client-getting-started.adoc | 389 ++++++ .../pages/go-client-getting-started.adoc | 317 +++++ ...t-platform-operator-expose-externally.adoc | 574 ++++++++ .../pages/java-client-getting-started.adoc | 435 ++++++ .../tutorials/pages/join-two-streams.adoc | 361 +++++ docs/modules/tutorials/pages/kubernetes.adoc | 374 ++++++ .../pages/nodejs-client-getting-started.adoc | 565 ++++++++ .../pages/python-client-getting-started.adoc | 419 ++++++ .../pages/stream-from-kafka-kerberos.adoc | 158 +++ .../tutorials/pages/tutorials-index.adoc | 17 + docs/modules/wan/pages/failures.adoc | 2 +- 68 files changed, 6965 insertions(+), 1510 deletions(-) create mode 100644 docs/modules/ROOT/pages/management-center.adoc create mode 100644 docs/modules/ROOT/pages/placeholder.adoc create mode 100644 docs/modules/ROOT/pages/what-is-hazelcast.adoc create mode 100644 docs/modules/ROOT/pages/whats-new.adoc create mode 100644 docs/modules/cluster-performance/pages/aws-deployments.adoc create mode 100644 docs/modules/cluster-performance/pages/back-pressure.adoc create mode 100644 docs/modules/cluster-performance/pages/near-cache.adoc create mode 100644 docs/modules/cluster-performance/pages/performance-tips.adoc create mode 100644 docs/modules/cluster-performance/pages/pipelining.adoc create mode 100644 docs/modules/cluster-performance/pages/threading.adoc create mode 100644 docs/modules/release-notes/pages/5-4-0.adoc create mode 100644 docs/modules/release-notes/pages/releases.adoc create mode 100644 docs/modules/tutorials/pages/cpp-client-getting-started.adoc create mode 100644 docs/modules/tutorials/pages/cpsubsystem.adoc create mode 100644 docs/modules/tutorials/pages/csharp-client-getting-started.adoc create mode 100644 docs/modules/tutorials/pages/go-client-getting-started.adoc create mode 100644 docs/modules/tutorials/pages/hazelcast-platform-operator-expose-externally.adoc create mode 100644 docs/modules/tutorials/pages/java-client-getting-started.adoc create mode 100644 docs/modules/tutorials/pages/join-two-streams.adoc create mode 100644 docs/modules/tutorials/pages/kubernetes.adoc create mode 100644 docs/modules/tutorials/pages/nodejs-client-getting-started.adoc create mode 100644 docs/modules/tutorials/pages/python-client-getting-started.adoc create mode 100644 docs/modules/tutorials/pages/stream-from-kafka-kerberos.adoc create mode 100644 docs/modules/tutorials/pages/tutorials-index.adoc diff --git a/antora-playbook-local.yml b/antora-playbook-local.yml index ce3dc897c..b07be7a0f 100644 --- a/antora-playbook-local.yml +++ b/antora-playbook-local.yml @@ -1,12 +1,14 @@ site: title: Documentation url: http:localhost:5000 - start_page: hazelcast:getting-started:get-started-cli.adoc + start_page: hazelcast:ROOT:what-is-hazelcast.adoc robots: disallow keys: docsearch_id: 'QK2EAH8GB0' docsearch_api: 'ef7bd9485eafbd75d6e8425949eda1f5' docsearch_index: 'prod_hazelcast_docs' +urls: + html_extension_style: drop content: sources: - url: . @@ -17,7 +19,7 @@ content: start_path: docs ui: bundle: - url: https://github.com/hazelcast/hazelcast-docs-ui/releases/latest/download/ui-bundle.zip #../hazelcast-docs-ui/build/ui-bundle.zip + url: ../hazelcast-docs-ui/build/ui-bundle.zip snapshot: true asciidoc: attributes: diff --git a/docs/antora.yml b/docs/antora.yml index 0eed56763..0d6be56e8 100644 --- a/docs/antora.yml +++ b/docs/antora.yml @@ -29,7 +29,7 @@ asciidoc: page-latest-supported-python-client: '5.3.0' page-latest-supported-nodejs-client: '5.3.0' page-latest-supported-clc: '5.3.1' - open-source-product-name: 'Open Source' - enterprise-product-name: 'Enterprise' + open-source-product-name: 'Community Edition' + enterprise-product-name: 'Enterprise Edition' nav: - modules/ROOT/nav.adoc diff --git a/docs/modules/ROOT/nav.adoc b/docs/modules/ROOT/nav.adoc index 4acbad02c..2d8222081 100644 --- a/docs/modules/ROOT/nav.adoc +++ b/docs/modules/ROOT/nav.adoc @@ -1,34 +1,131 @@ -.Get Started -* xref:index.adoc[What is Hazelcast Platform] -include::getting-started:partial$nav.adoc[] +.Get started +* xref:whats-new.adoc[What's new in 5.4] +* xref:what-is-hazelcast.adoc[What is Hazelcast Platform] +* xref:getting-started:editions.adoc[Available versions] +//* xref:placeholder.adoc[Feature overview] +//* xref:placeholder.adoc[How Hazelcast works] +* Start a local cluster +** xref:getting-started:get-started-docker.adoc[Docker] +** xref:getting-started:get-started-cli.adoc[CLI] +** xref:getting-started:get-started-binary.adoc[Binary] +** xref:getting-started:get-started-java.adoc[Java Embedded] +* xref:getting-started:enterprise-overview.adoc[Get started with Enterprise Edition] +** xref:getting-started:persistence.adoc[] +** xref:getting-started:authenticate-clients.adoc[] +** xref:getting-started:wan.adoc[] +** xref:getting-started:blue-green.adoc[] +* Get started with SQL +** xref:sql:get-started-sql.adoc[] +** xref:sql:learn-sql.adoc[] +** xref:sql:get-started-sql-files.adoc[] +* xref:clients:hazelcast-clients.adoc[Get started with a Hazelcast Client] +* xref:getting-started:support.adoc[] -.Develop Solutions -* Finding the Right Tool +.Install & upgrade +* Overview +** xref:deploy:choosing-a-deployment-option.adoc[Available Topologies] +** xref:deploy:versioning-compatibility.adoc[Versioning and Compatibility] +** xref:deploy:deploying-in-cloud.adoc[Deploying on Cloud] +*** xref:deploy:deploying-on-aws.adoc[] +*** xref:deploy:deploying-on-azure.adoc[] +*** xref:deploy:deploying-on-gcp.adoc[] +** xref:deploy:deploying-with-docker.adoc[] +* xref:getting-started:install-enterprise.adoc[Install {enterprise-product-name}] +** xref:deploy:enterprise-licenses.adoc[Managing Enterprise license keys] +* xref:getting-started:install-hazelcast.adoc[Install {open-source-product-name}] +// * xref:placeholder.adoc[Upgrade to Enterprise from OSS/Community] +* xref:migrate:data-migration-tool.adoc[] +* Upgrades +** xref:migrate:upgrading-from-jet.adoc[] +** xref:migrate:upgrading-from-imdg-4.adoc[] +** xref:migrate:upgrading-from-imdg-3.adoc[] +** xref:migrate:migration-tool-imdg.adoc[] +*** xref:migrate:dmt-command-reference.adoc[] +* xref:release-notes:releases.adoc[Release notes] +// * xref:placeholder.adoc[Troubleshooting] +// * xref:placeholder.adoc[FAQ] + +.Develop & build + +* Ingestion ** xref:ingest:overview.adoc[] ** xref:computing:distributed-computing.adoc[] ** xref:query:overview.adoc[] -include::clients:partial$nav.adoc[] +* Best Practices +** xref:capacity-planning.adoc[] +** xref:cluster-performance:performance-tips.adoc[] +** xref:cluster-performance:back-pressure.adoc[] +** xref:cluster-performance:pipelining.adoc[] +** xref:cluster-performance:aws-deployments.adoc[] +** xref:cluster-performance:threading.adoc[] +** xref:cluster-performance:near-cache.adoc[] +include::architecture:partial$nav.adoc[] +* Member/Client Discovery +** xref:clusters:discovery-mechanisms.adoc[] +** xref:clusters:discovering-by-tcp.adoc[] +** xref:clusters:discovering-by-multicast.adoc[] +** xref:clusters:discovering-native-clients.adoc[] +include::kubernetes:partial$nav.adoc[] include::data-structures:partial$nav.adoc[] -* xref:cache:overview.adoc[] -+ --- include::mapstore:partial$nav.adoc[] --- include::pipelines:partial$nav.adoc[] +* SQL +** xref:sql:sql-overview.adoc[Overview] +** SQL Over Maps +*** xref:sql:get-started-sql.adoc[Tutorial] +*** xref:sql:querying-maps-sql.adoc[Queries] +*** xref:sql:mapping-to-maps.adoc[Mappings] +** SQL Over Kafka +*** xref:sql:learn-sql.adoc[Tutorial] +*** xref:sql:mapping-to-kafka.adoc[Mappings] +** SQL Over File Systems +*** xref:sql:get-started-sql-files.adoc[Tutorial] +*** xref:sql:mapping-to-a-file-system.adoc[Mappings] +** SQL Over JDBC +*** xref:sql:mapping-to-jdbc.adoc[Mappings] +** SQL Over MongoDB +*** xref:sql:mapping-to-mongo.adoc[Mappings] +** xref:sql:working-with-json.adoc[Working with JSON] +** xref:sql:querying-streams.adoc[Stream Processing] +** xref:sql:parameterized-queries.adoc[Query Parameters] +** xref:sql:finding-mappings.adoc[Finding Mappings] +** xref:sql:improving-performance.adoc[Improving Performance] +** xref:sql:sql-reflection-configuration.adoc[Reflection Configuration] +** xref:sql:troubleshooting.adoc[Troubleshooting] +** Statements +*** xref:sql:sql-statements.adoc[Overview] +*** xref:sql:alter-job.adoc[`ALTER JOB`] +*** xref:sql:create-data-connection.adoc[`CREATE DATA CONNECTION`] +*** xref:sql:create-index.adoc[`CREATE INDEX`] +*** xref:sql:create-job.adoc[`CREATE JOB`] +*** xref:sql:create-mapping.adoc[`CREATE MAPPING`] +*** xref:sql:create-snapshot.adoc[`CREATE SNAPSHOT`] +*** xref:sql:create-view.adoc[`CREATE VIEW`] +*** xref:sql:delete.adoc[`DELETE`] +*** xref:sql:drop-data-connection.adoc[`DROP DATA CONNECTION`] +*** xref:sql:drop-job.adoc[`DROP JOB`] +*** xref:sql:drop-mapping.adoc[`DROP MAPPING`] +*** xref:sql:drop-snapshot.adoc[`DROP SNAPSHOT`] +*** xref:sql:drop-view.adoc[`DROP VIEW`] +*** xref:sql:explain.adoc[`EXPLAIN`] +*** xref:sql:sink-into.adoc[`INSERT INTO`/`SINK INTO`] +*** xref:sql:select.adoc[`SELECT`] +*** xref:sql:show-jobs.adoc[`SHOW JOBS`] +*** xref:sql:show-resources.adoc[`SHOW RESOURCES`] +*** xref:sql:show-mappings.adoc[`SHOW MAPPINGS`] +*** xref:sql:update.adoc[`UPDATE`] +** xref:sql:functions-and-operators.adoc[Functions and Expressions] +** xref:sql:data-types.adoc[] +** xref:sql:user-defined-types.adoc[] +** Optimizing SQL queries +*** xref:sql:partition-pruning.adoc[Partition Pruning] +** xref:query:predicate-overview.adoc[Predicates API] include::query:partial$nav.adoc[] * Improving Query Performance ** xref:query:indexing-maps.adoc[] ** xref:performance:caching-deserialized-values.adoc[] ** xref:data-structures:preventing-out-of-memory.adoc[] -* SQL -+ --- -include::sql:partial$nav.adoc[] --- -include::integrate:partial$nav.adoc[] -include::spring:partial$nav.adoc[] include::computing:partial$nav.adoc[] -* xref:computing:entry-processor.adoc[] * xref:clusters:user-code-namespaces.adoc[] include::clusters:partial$nav.adoc[] include::serialization:partial$nav.adoc[] @@ -37,25 +134,9 @@ include::transactions:partial$nav.adoc[] include::test:partial$nav.adoc[] include::troubleshoot:partial$nav.adoc[] -.Deploy Clusters -* xref:production-checklist.adoc[] -* xref:capacity-planning.adoc[] -* Installation -** xref:deploy:choosing-a-deployment-option.adoc[] -** xref:getting-started:editions.adoc[Editions and Distributions] -** xref:getting-started:install-hazelcast.adoc[{open-source-product-name}] -** xref:getting-started:install-enterprise.adoc[{enterprise-product-name}] -* Member/Client Discovery -** xref:clusters:discovery-mechanisms.adoc[] -** xref:clusters:discovering-by-tcp.adoc[] -** xref:clusters:discovering-by-multicast.adoc[] -** xref:clusters:discovering-native-clients.adoc[] -include::deploy:partial$nav.adoc[] -include::kubernetes:partial$nav.adoc[] +.Operate & manage -.Configure and Manage Clusters include::configuration:partial$nav.adoc[] -* xref:deploy:enterprise-licenses.adoc[] * xref:maintain-cluster:logging.adoc[] include::maintain-cluster:partial$nav.adoc[] * Partition Groups and Networking @@ -63,11 +144,12 @@ include::maintain-cluster:partial$nav.adoc[] ** xref:clusters:partition-group-configuration.adoc[Grouping Partitions] ** xref:clusters:network-configuration.adoc[Networking] * xref:configuration:jet-configuration.adoc[] -include::cluster-performance:partial$nav.adoc[] include::secure-cluster:partial$nav.adoc[] include::fault-tolerance:partial$nav.adoc[] +** xref:storage:high-density-memory.adoc[] include::tiered-storage:partial$nav.adoc[] include::cp-subsystem:partial$nav.adoc[] +** xref:cluster-performance:thread-per-core-tpc.adoc[] include::data-connections:partial$nav.adoc[] include::wan:partial$nav.adoc[] * xref:extending-hazelcast:extending-hazelcast.adoc[] @@ -81,25 +163,71 @@ include::wan:partial$nav.adoc[] ** xref:osgi:design.adoc[] ** xref:osgi:using-osgi-service.adoc[] -.Upgrade -include::migrate:partial$nav.adoc[] +.Integrate +* xref:integrate:connectors.adoc[Overview] +* Files +// Files need an overview (options, what's available for SQL, what's available for Jet API) +** xref:integrate:file-connector.adoc[] +** xref:integrate:legacy-file-connector.adoc[] +* Integrating with Spring +** xref:spring:overview.adoc[Overview] +** xref:spring:configuration.adoc[] +** xref:spring:springaware.adoc[] +** xref:spring:add-caching.adoc[] +** xref:spring:hibernate.adoc[] +** xref:spring:transaction-manager.adoc[] +** xref:spring:best-practices.adoc[] +* xref:integrate:kafka-connect-connectors.adoc[] +* Messaging System Connectors +** xref:integrate:messaging-system-connectors.adoc[Overview] +** xref:integrate:kafka-connector.adoc[] +** xref:integrate:kinesis-connector.adoc[] +** xref:integrate:jms-connector.adoc[] +** xref:integrate:pulsar-connector.adoc[] +* Data Structure Connectors +// Need an overview (options, what's available for SQL, what's available for JetAPI) +** xref:integrate:jcache-connector.adoc[] +** xref:integrate:list-connector.adoc[] +** xref:integrate:map-connector.adoc[] +** xref:integrate:reliable-topic-connector.adoc[] +* Databases +** xref:integrate:database-connectors.adoc[Overview] +** xref:integrate:jdbc-connector.adoc[] +** xref:integrate:cdc-connectors.adoc[] +** xref:integrate:elasticsearch-connector.adoc[] +** xref:integrate:mongodb-connector.adoc[] +** xref:integrate:influxdb-connector.adoc[] +** xref:integrate:redis-connector.adoc[] +* xref:integrate:test-connectors.adoc[] +* xref:integrate:socket-connector.adoc[] +* xref:integrate:http-connector.adoc[] +* xref:integrate:twitter-connector.adoc[] +* xref:integrate:custom-connectors.adoc[] -.Reference - -include::architecture:partial$nav.adoc[] - -* xref:system-properties.adoc[] - -* xref:faq.adoc[] - -* xref:list-of-metrics.adoc[Metrics] - -* xref:phone-homes.adoc[] - -* xref:compact-binary-specification.adoc[] +.Clients & APIs +* xref:clients:hazelcast-clients.adoc[Overview] +* xref:clients:java.adoc[] +* xref:clients:cplusplus.adoc[] +* xref:clients:dotnet.adoc[] +* xref:maintain-cluster:enterprise-rest-api.adoc[] +* xref:clients:memcache.adoc[] +* xref:clients:python.adoc[] +* xref:clients:nodejs.adoc[] +* xref:clients:go.adoc[] +.Tools & plugins +* xref:management-center.adoc[Management Center] +* xref:kubernetes:deploying-in-kubernetes.adoc#hazelcast-platform-operator-for-kubernetesopenshift[Platform Operator] +* xref:clients:clc.adoc[Command Line Client (CLC)] +* xref:ROOT:simulator.adoc[Simulator] * xref:plugins:hazelcast-plugins.adoc[] ** xref:plugins:cloud-discovery.adoc[] ** xref:plugins:web-session-replication.adoc[] ** xref:plugins:framework-integration.adoc[] ** xref:plugins:other-integrations.adoc[] + +.Reference +* xref:ROOT:glossary.adoc[] +* xref:system-properties.adoc[] +* xref:faq.adoc[] +* xref:list-of-metrics.adoc[Metrics] diff --git a/docs/modules/ROOT/pages/capacity-planning.adoc b/docs/modules/ROOT/pages/capacity-planning.adoc index ce1d003ac..91b6a7de3 100644 --- a/docs/modules/ROOT/pages/capacity-planning.adoc +++ b/docs/modules/ROOT/pages/capacity-planning.adoc @@ -227,82 +227,6 @@ Multi-socket clients are the smart clients which maintain a connection to each m Unisocket clients have a single connection to the entire cluster. You can find more information about these two modes here: xref:clients:java.adoc#java-client-operation-modes[Java Client Operation Modes]. -== Uniform Hardware - -Hazelcast is designed to run efficiently on homogeneous clusters. All JVM -processes that participate in the cluster should have equal CPU, memory -and network resources. One slow cluster member can kill the performance -of the whole cluster. - -=== Minimal Configuration - -Hazelcast is a lightweight framework and is reported to run well on devices -such as the Raspberry Pi Zero (1GHz single-core CPU, 512MB RAM). - -=== Recommended Configuration - -As a starting point for data-intensive operations, consider machines -such as AWS https://aws.amazon.com/ec2/instance-types/c5/[c5.2xlarge] -with: - -* 8 CPU cores -* 16 GB RAM -* 10 Gbps network - -=== CPU - -Hazelcast can use hundreds of CPU cores efficiently by exploiting data and -task parallelism. Adding more CPU can therefore help with scaling the -CPU-bound computations. If you're using jobs and pipelines, read about the -xref:architecture:distributed-computing.adoc#cooperative-execution-engine[Execution model] -to understand how Hazelcast makes the computation parallel and design your pipelines according to it. - -By default, Hazelcast uses all available CPU. Starting two Hazelcast -instances on one machine therefore doesn't bring any performance benefit -as the instances would compete for the same CPU resources. - -Don't rely just on CPU usage when benchmarking your cluster. Simulate -production workload and measure the throughput and latency instead. The -task manager of Hazelcast can be configured to use the CPU aggressively. -As an example, see https://hazelcast.com/blog/idle-green-threads-in-jet/[this benchmark]: the CPU usage was close to 20% with just 1000 events/s. At 1m items/s -the CPU usage was 100% even though Jet still could push around 5 million -items/s on that machine. - -=== Network - -Hazelcast uses the network internally to shuffle data and to replicate the -backups. The network is also used to read input data from and to write -results to remote systems or to do RPC calls when enriching. In fact a -lot of Hazelcast jobs are network-bound. A 1 Gbit network connection is a -recommended minimum, but using a 10 Gbit or faster network -can improve application performance. Also consider scaling the cluster -out (adding more members to the cluster) to distribute the load. - -Consider collocating a Hazelcast cluster with the data source and sink to avoid -moving data back and forth over the wire. If you must choose between colocating -Hazelcast with the source or sink, choose the source. Processed results are often -aggregated, so the size is reduced. - -A Hazelcast cluster is designed to run in a single LAN and can encounter unexpected -performance problems if a single cluster is split across multiple different networks. -Latency is the strongest constraint in most network scenarios, so deploying Hazelcast -clusters to a network with high or varying latencies (even on the same LAN) can lead -to unpredictable performance results. - -=== Disk - -Hazelcast is an in-memory framework. Cluster disks aren't involved in regular -operations except for logging and thus are not critical for the cluster -performance. There are optional features of Hazelcast (such as Persistence and -CP Persistence) which may utilize disk space, but even when they are in use a -Hazelcast system is primarily in-memory. - -Consider using more performant disks if you use the following Hazelcast features: - -* xref:pipelines:sources-sinks.adoc[The file connector] for reading or writing to files on the cluster's file system. -* xref:storage:persistence.adoc[Persistence] for saving map data to disk. -* xref:cp-subsystem:persistence.adoc[CP Persistence] for strong resiliency guarantees when using the CP Subsystem. - == Size for Failures Hazelcast clusters are elastic to deal with failures and performance spikes. diff --git a/docs/modules/ROOT/pages/index.adoc b/docs/modules/ROOT/pages/index.adoc index cadae10c9..07b068422 100644 --- a/docs/modules/ROOT/pages/index.adoc +++ b/docs/modules/ROOT/pages/index.adoc @@ -1,4 +1,4 @@ -= What is Hazelcast Platform? += What is Hazelcast Platform? (Index) :!page-pagination: :description: Hazelcast Platform uniquely combines a distributed compute engine and a fast data store in one runtime. It offers unmatched performance, resilience and scale for real-time and AI-driven applications. diff --git a/docs/modules/ROOT/pages/management-center.adoc b/docs/modules/ROOT/pages/management-center.adoc new file mode 100644 index 000000000..4febef055 --- /dev/null +++ b/docs/modules/ROOT/pages/management-center.adoc @@ -0,0 +1,6 @@ += Management Center +:description: Management Center is a tool for managing and monitoring Hazelcast Platform clusters. + +{description} + +For more information on features and getting started with Management Center, see xref:{page-latest-supported-mc}@management-center:getting-started:overview.adoc[Management Center docs]. \ No newline at end of file diff --git a/docs/modules/ROOT/pages/placeholder.adoc b/docs/modules/ROOT/pages/placeholder.adoc new file mode 100644 index 000000000..fb657448b --- /dev/null +++ b/docs/modules/ROOT/pages/placeholder.adoc @@ -0,0 +1,3 @@ += This page is a placeholder + +This page does not exist yet but is needed for the navigation. \ No newline at end of file diff --git a/docs/modules/ROOT/pages/what-is-hazelcast.adoc b/docs/modules/ROOT/pages/what-is-hazelcast.adoc new file mode 100644 index 000000000..cadae10c9 --- /dev/null +++ b/docs/modules/ROOT/pages/what-is-hazelcast.adoc @@ -0,0 +1,147 @@ += What is Hazelcast Platform? +:!page-pagination: +:description: Hazelcast Platform uniquely combines a distributed compute engine and a fast data store in one runtime. It offers unmatched performance, resilience and scale for real-time and AI-driven applications. + +Hazelcast is a distributed computation and storage platform for consistently +low-latency querying, aggregation and stateful computation against event +streams and traditional data sources. {description} It allows you to quickly build +resource-efficient, real-time applications. You can deploy it at any scale +from small edge devices to a large cluster of cloud instances. + +Hazelcast can process data on a set of networked and +clustered computers that pool together their random access memories (RAM) to let +applications share data with other applications running in the cluster. When data +is stored in RAM, applications run a lot faster since it does not need to be +retrieved from disk and put into RAM prior to processing. Using Hazelcast, +you can store and process your data in RAM, spread and replicate it across a cluster of +machines; replication gives you resilience to failures of cluster members. + +Hazelcast is implemented in Java language and has clients for Java, C++, .NET, REST, Python, +Go and Node.js. Hazelcast also speaks Memcached and REST protocols. + +Your cloud-native applications can easily use Hazelcast. +It is flexible enough to use as a data and computing platform out-of-the-box +or as a framework for your own cloud-native applications and microservices. + +Hazelcast is designed to be lightweight and easy to use. Since it +is delivered as a compact library, it easily +plugs into your software solution. + +It is designed to scale up to hundreds of members and thousands of clients. +When you add new members, they automatically discover the cluster +and linearly increase both the memory and +processing capacity. The members maintain a TCP connection between +each other and all communication is performed through +this layer. Each cluster member is configured to be the same in terms +of functionality. The oldest member (the first member created +in the cluster) automatically performs the stored and streaming data assignment to cluster members. +If the oldest member dies, the second oldest member takes over. + +Hazelcast offers simple scalability, partitioning (sharding), and re-balancing +out-of-the-box. It does not require any extra coordination processes. NoSQL and +traditional databases are difficult to scale out and manage. They require additional +processes for coordination and high availability. With Hazelcast, when you start +another process to add more capacity, data and backups are automatically and evenly balanced. + +== What Can You Do with Hazelcast? + +You can request data, listen to events, submit data processing tasks using +Hazelcast clients connected to a cluster. Hazelcast has clients implemented in Java, +.Net, C++, Node.js, Go and Python languages. It also communicates with Memcache and +REST protocols. See the xref:clients:hazelcast-clients.adoc[Hazelcast Clients section]. + +You can build data pipelines using SQL or the Java API which enable the data to +flow from an application to a data source or from a data source to an analytics database. +A very simple example can be reading words from a file, converting them into all-uppercase, +and output the results to your console. See the xref:pipelines:overview.adoc[Building Data Pipelines section]. + +You can import data from databases, files, messaging systems, on-premise and cloud systems +in various formats (data ingestion). Hazelcast offers pipelines and loading/storing interfaces for +this purpose. See the xref:ingest:overview.adoc[Ingesting Data section]. + +You can run queries on the data using SQL in your maps or external systems like Apache Kafka. You can also use +the predicates API to filter and retrieve data in your maps. See the xref:query:overview.adoc[Distributed Queries section]. + +You can run computational tasks on different cluster members (distributed computing); +for this you can use the pipelines, entry processors, and executor services. +See xref:computing:distributed-computing.adoc[Distributed Computing section]. + +You can store your data using the distributed implementation of various +data structures like maps, caches, queues, topics, concurrency utilities. +See the xref:data-structures:distributed-data-structures.adoc[Distributed Data Structures section]. + +You can use it as a distributed second level cache for your Hibernate entities, collections and queries. +Also, Hazelcast can be used as a cache for applications based on the Spring Framework for distributing +shared data in real time. + +You can have multiple Hazelcast clusters at different locations in sync +by replicating their state over WAN environments. See the xref:wan:wan.adoc[Synchronizing Data Across a WAN section]. + +You can listen to the events happening in the cluster, on the data structures and clients so that +you are notified when those events happen. See the xref:events:distributed-events.adoc[Distributed Events section]. + +Please see the Develop Solutions chapter for all the scenarios you can realize using Hazelcast. + +The following are example use cases: + +* Increasing the transactions per second and system uptime in payment processing +* Authorizing and authenticating the credit cards using multiple algorithms within milliseconds for fraud detection +* Decreasing order processing times with low latencies in e-commerce +* Being a real-time streamer to detect application performance +* Clustering highly changing data with event notifications, e.g., user based events, and +queueing and distributing background tasks +* Being a distributed topic (publish/subscribe server) to build scalable chat servers for smartphones +* Constructing a strongly consistent layer using its CP +(CP with respect the CAP principle) subsystem built on top of the Raft consensus algorithm +* Distributing user object states across the cluster, to pass messages between objects +and to share system data structures (static initialization state, mirrored objects, object +identity generators) +* Being a multi-tenancy cache where each tenant has its own stored data +* Sharing datasets, e.g., table-like data structure, to be used by applications +* Storing session data in web applications (enabling horizontal scalability of the web application) + +The applications call for stored data from data sources, e.g., databases, and these sources +are slow since they are not designed to respond in time. Hazelcast runs in the shared +standard memories (RAMs) of a cluster of servers which sits in between the applications and the datasources. +It gets data from the data sources, pulls it up in the memory and serves it to +the applications at in-memory speeds, instead of RPM speeds. This makes Hazelcast +a low-latency solution (fast response times). + +When it comes to streaming data, they come from sources like files, Apache Kafka, IoT and MQ +and they are born in the moment. Hazelcast captures this data in the moment and effectively +processes it on the wire. And this makes Hazelcast a real-time processing platform. + +The unique capability of Hazelcast is its ability to process both batch and streaming data, +with low latency and in real-time, enabling transactional and analytical processing. + +== Architecture Overview + +The fundamental key components of Hazelcast are as follows: + +* A *member* is the computational and data storage unit in Hazelcast. Typically +it is a JVM. +* A Hazelcast *cluster* is a set of members communicating with each other. Members which run Hazelcast +automatically discover one another and form a cluster at runtime. +* *Partitions* are the memory segments that store portions of data. They are distributed evenly +among the available cluster members. They can contain hundreds or thousands of data entries each, +depending on the memory capacity of your system. Hazelcast also automatically creates backups of these partitions +which are also distributed in the cluster. This makes Hazelcast resilient to data loss. + +NOTE: _Node_ and _Member_ are interchangeable, and both mean a Java Virtual Machine (JVM) on which one or more instances of Hazelcast are in operation. + +Hazelcast's *streaming engine* focuses on data transformation while it does all the heavy +lifting of getting the data flowing and computation running across a cluster of members. +It supports working with both bounded (batch) and unbounded (streaming) data. + +Hazelcast's *storage engine* is the distributed, fast, and operational data store dealing with +persistence of data. + +Hazelcast comes out of the box with different sources and sinks. *Sources* are where Hazelcast +pulls the data, and *sinks* are where it outputs the processed data result. Sources and sinks +are also referred to as *connectors*. Its unified connector API provides a simple way to read files, +unified across different sources of the data. See the xref:pipelines:sources-sinks.adoc[Sources and Sinks section] +for more information about the unified connector API and the supported sources and sinks. + + + + diff --git a/docs/modules/ROOT/pages/whats-new.adoc b/docs/modules/ROOT/pages/whats-new.adoc new file mode 100644 index 000000000..1be086586 --- /dev/null +++ b/docs/modules/ROOT/pages/whats-new.adoc @@ -0,0 +1,66 @@ += What's New in Hazelcast Platform +:description: Here are the highlights of what's new and improved in Hazelcast Platform +[[whats-new]] + +{description} + +* xref:{page-latest-supported-hazelcast}@hazelcast:ROOT:whats-new.adoc[What's New in Hazelcast Platform 5.4] +* xref:{page-latest-supported-mc}@management-center:ROOT:whats-new.adoc[What's New in Management Center 5.4] + +== Use CP Map for mission-critical workflows +[.enterprise]*Enterprise* + +A new CP Map data structure is the latest addition to our xref:cp-subsystem:cp-subsystem.adoc[CP Subsystem]. Unlike traditional maps built for availability (AP), CP Maps guarantee data consistency even during network partitions, ensuring that every member within the cluster sees the same data. CP Maps are ideal for storing sensitive information or managing mission-critical workflows. + +The CP Subsystem is the Hazelcast component that builds a strongly consistent layer for a set of distributed data structures. As well as network partitions, the CP Subsystem withstands server and client failures. xref:management-center:cp-subsystem:dashboard.adoc[Hazelcast Management Center] allows you to inspect the CP Subsystem as a whole – for individual data record access, you can use the Hazelcast Command Line Client (CLC). + +For more information on how to use this new `CPMap` data structure, see xref:data-structures:cpmap.adoc[]. + +== Manage resources effectively with User Code Namespaces +[.enterprise]*Enterprise* + +User Code Namespaces provide a container for Java classpath resources, such as user code and accompanying artifacts such as property files. This helps you effectively manage access to resources in different namespaces through either static or dynamic configuration. This is an Enterprise feature that extends and replaces the now deprecated xref:clusters:legacy-ucd.adoc[User Code Deployment] functionality. + +For more information on how to enable and configure User Code Namespaces, or migrate from User Code Deployment, see xref:clusters:user-code-namespaces.adoc[]. + +== Connector updates + +**Kafka Connect Source connector** is now generally available, which means that Kafka Connect sources can scale across the whole cluster instead of just a single member. For more information, see xref:integrate:kafka-connect-connectors.adoc[]. + +**Mongo SQL Connector (SQL over MongoDB)** is now generally available. With the addition of security support, you can now secure your data connection with TLS when creating MongoDB mappings over SQL. For more information, see xref:sql:mapping-to-mongo.adoc[]. + +**JDBC SQL Connector (SQL over JDBC)** is now generally available. In addition to MySQL and PostgreSQL, the JDBC SQL connector now also supports Oracle and Microsoft SQL Server databases. For more information, see xref:sql:mapping-to-jdbc.adoc#data-type-mapping-between-hazelcast-and-mssql[Mapping between Hazelcast and MSSQL]. + +== Thread-Per-Core (TPC) now generally available (GA) +[.enterprise]*Enterprise* + +Enable Thread-Per-Core (TPC) on both clients and cluster members to improve system performance, especially in environments where the number of CPU cores on your machines does not match the recommended hardware. TPC is an alternative to traditional Staged Event-Driven Architecture (SEDA) that supports easier scaling based on the number of available cores. + +For more information on how to enable and configure TPC, see xref:cluster-performance:thread-per-core-tpc.adoc[]. + +== Tiered Storage now generally available (GA) +[.enterprise]*Enterprise* + +Tiered Storage ensures that frequently accessed (_high-hit_) data remains in fast memory and less frequently accessed (_low-hit_) data is stored on cheaper disks. We are delighted to confirm this functionality is now generally available as an Enterprise feature. + +Tiered Storage uses an in-memory index to provide faster access to map entries. A dedicated memory manager decides which entries to store in the memory tier and which to store in the disk tier. If a map entry is not accessed for some time, it is flushed or _spilled_ to disk – if a map is frequently accessed, it stays in memory. Tiered Storage allows the cluster to hold more data than the total available RAM. + +For more information on how to enable and configure this feature, see xref:tiered-storage:overview.adoc[]. + +== Upgrade Requirements + +Hazelcast Platform 5.4 requires Java 17 or later. + +== Release Notes + +For detailed release notes that include new features and enhancements, breaking changes, deprecations and other fixes, see xref:release-notes:5-4-0.adoc[5.4 Release Notes]. + +== Hazelcast Command Line Client (CLC) + +You can use the Hazelcast Command Line Client (CLC) to connect to and interact with clusters on Hazelcast Platform and Hazelcast Cloud directly from the command line or through scripts. + +With Hazelcast CLC 5.3.7 you can now import a subset of the Java client configuration. You can also define custom template actions using advanced scripts, and add a success note when a project is created successfully. + +For detailed release notes that include new features and fixes, see xref:clc:ROOT:release-notes-5.3.7.adoc[Hazelcast CLC 5.3.7]. + +To get started with Hazelcast CLC, see xref:clc:ROOT:install-clc.adoc[Installing the Hazelcast CLC]. diff --git a/docs/modules/clients/partials/nav.adoc b/docs/modules/clients/partials/nav.adoc index 2fcab321c..8b1378917 100644 --- a/docs/modules/clients/partials/nav.adoc +++ b/docs/modules/clients/partials/nav.adoc @@ -1,11 +1 @@ -* Connecting to a Cluster -** xref:clients:hazelcast-clients.adoc[] -** xref:clients:java.adoc[] -** xref:clients:cplusplus.adoc[] -** xref:clients:dotnet.adoc[] -** xref:clients:rest.adoc[] -** xref:clients:memcache.adoc[] -** xref:clients:python.adoc[] -** xref:clients:nodejs.adoc[] -** xref:clients:go.adoc[] -** xref:clients:clc.adoc[] \ No newline at end of file + diff --git a/docs/modules/cluster-performance/pages/aws-deployments.adoc b/docs/modules/cluster-performance/pages/aws-deployments.adoc new file mode 100644 index 000000000..1687c9079 --- /dev/null +++ b/docs/modules/cluster-performance/pages/aws-deployments.adoc @@ -0,0 +1,16 @@ +[[aws-depl-perf]] += AWS Deployments + +When you deploy Hazelcast clusters on AWS EC2 instances, you can place the +cluster members on the same https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/placement-groups.html#placement-groups-cluster[Cluster Placement Group]. This approach can drastically reduce latency among members. +Additionally, you can also consider using private IPs +instead of public ones to increase the throughput when the cluster +members are placed in the same VPC. + +For the best performance of your Hazelcast on AWS EC2: + +* Select the newest Linux AMIs. +* Select the HVM based instances. +* Select at least a system with 8 vCPUs, for example, c4.2xlarge. For an overview of all types of +EC2 instances, refer to https://www.ec2instances.info[this web page^]. +* Consider setting a placement group. diff --git a/docs/modules/cluster-performance/pages/back-pressure.adoc b/docs/modules/cluster-performance/pages/back-pressure.adoc new file mode 100644 index 000000000..82bc2cd93 --- /dev/null +++ b/docs/modules/cluster-performance/pages/back-pressure.adoc @@ -0,0 +1,74 @@ +[[back-pressure]] += Back Pressure + +Hazelcast uses operations to make remote calls. For example, a `map.get` is an operation and +a `map.put` is one operation for the primary +and one operation for each of the backups; that is, `map.put` is executed for the primary and also for each backup. +In most cases, there is a natural balance between the number of threads performing operations +and the number of operations being executed. However, the following can disrupt this balance and operations +and eventually lead to `OutofMemoryException` (`OOME`): + +* Asynchronous calls: With async calls, the system can be flooded with requests. +* Asynchronous backups: The asynchronous backups can build up. + +To prevent the system from crashing, Hazelcast provides back pressure. Back pressure works by limiting the number of concurrent operation invocations and periodically making an async backup sync. + +== Member Side + +Back pressure is disabled by default and you can enable it using the following system property: + +`hazelcast.backpressure.enabled` + +To control the number of concurrent invocations, you can configure the number of invocations allowed for each partition using the +following system property: + +`hazelcast.backpressure.max.concurrent.invocations.per.partition` + +The default value of this system property is 100. Using the default configuration, a system can +have (271 + 1) * 100 = 27200 concurrent invocations (271 partitions + 1 for generic operations). + +Back pressure is only applied to normal operations. System operations, like heart beats and repartitioning operations, +are not influenced by back pressure. 27200 invocations might seem like a lot, but keep in mind that executing a task on `IExecutor` +or acquiring a lock also requires an operation. + +If the maximum number of invocations has been reached, Hazelcast automatically applies an exponential backoff policy. This +gives the system some time to deal with the load. +Using the following system property, you can configure the maximum time to wait before a `HazelcastOverloadException` is thrown: + +`hazelcast.backpressure.backoff.timeout.millis` + +This system property's default value is 60000 milliseconds. + +The Health Monitor keeps an eye on the usage of the invocations. +If it sees a member has consumed 70% or more of the invocations, it starts to log health messages. + +Apart from controlling the number of invocations, you also need to control the number of pending async backups. +This is done by periodically making these backups sync instead of async. +This forces all pending backups to get drained. For this, Hazelcast tracks the number of +asynchronous backups for each partition. At every **Nth** call, one synchronization is forced. This **N** is +controlled through the following property: + +`hazelcast.backpressure.syncwindow` + +This system property's default value is 100. It means, out of 100 *asynchronous* backups, +Hazelcast makes one of them a *synchronous* one. A randomization is added, +so the sync window with default configuration is between 75 and 125 +invocations. + +== Client Side + +To prevent the system on the client side from overloading, you can apply +a constraint on the number of concurrent invocations. +You can use the following system property on the client side for this purpose: + +`hazelcast.client.max.concurrent.invocations` + +This property defines the maximum allowed number of concurrent invocations. +When it is not explicitly set, it has the value `Integer.MAX_VALUE` by default, which means infinite. +When set and the maximum number of concurrent invocations exceeds this value, +Hazelcast throws `HazelcastOverloadException` when a new invocation comes in. + +The back off timeout and controlling the number of +pending async backups (sync window) is not supported on the client side. + +NOTE: See the xref:ROOT:system-properties.adoc[System Properties appendix] to learn how to configure the system properties. diff --git a/docs/modules/cluster-performance/pages/best-practices.adoc b/docs/modules/cluster-performance/pages/best-practices.adoc index 637dee2c8..02f75083a 100644 --- a/docs/modules/cluster-performance/pages/best-practices.adoc +++ b/docs/modules/cluster-performance/pages/best-practices.adoc @@ -1,1186 +1,12 @@ = Best Practices :page-aliases: performance:data-affinity.adoc, performance:near-cache.adoc, performance:back-pressure.adoc, performance:cpu-thread-affinity.adoc, performance:best-practices.adoc, performance:pipelining.adoc, performance:slowoperationdetector.adoc, performance:threading-model.adoc -[[basic-recs]] -== Basic Performance Recommendations - -* 8 cores per Hazelcast server instance -* Minimum of 8 GB RAM per Hazelcast member (if not using the High-Density Memory Store) -* Dedicated NIC per Hazelcast member -* Linux—any distribution -* All members should run within the same subnet -* All members should be attached to the same network switch - -[[homogenous]] -=== Homogeneous Hardware Resources - -One of the most effective optimization strategies for Hazelcast is to -ensure that Hazelcast services are allocated their own dedicated machine resources. -Using dedicated, properly sized hardware (or virtual hardware) ensures that -Hazelcast members have ample CPU, memory, and network resources without competing with other processes or services. - -Hazelcast distributes load evenly across all its members and assumes -that the resources available to each of its members are homogeneous. -In a cluster with a mix of more and less powerful machines, the weaker -members cause bottlenecks, leaving the stronger members underutilized. -For predictable performance, it is best to use equivalent hardware for all Hazelcast members. - -=== Using Single Member per Machine - -A Hazelcast member assumes it is alone on a machine, so we recommend not running multiple -Hazelcast members on a machine. Having multiple -members on a single machine most likely gives a worse performance compared to -running a single member, since there will be more -context switching, less batching, etc. So unless it is proven that running multiple members per machine does give a better -performance/behavior in your particular setup, it is best to run a single member per machine. - -=== Using Operation Threads Efficiently - -By default, Hazelcast uses the machine's core count to determine the number of operation threads. Creating more -operation threads than this core count is highly unlikely to lead to an improved performance since there will be more context -switching, more thread notification, etc. - -Especially if you have a system that does simple operations like put and get, -it is better to use a lower thread count than the number of cores. -The reason behind the increased performance -by reducing the core count is that the operations executed on the operation threads normally execute very fast and there can -be a very significant amount of overhead caused by thread parking and unparking. If there are fewer threads, a thread needs -to do more work, will block less and therefore needs to be notified less. - -=== Avoiding Random Changes - -Tweaking can be very rewarding because significant performance improvements are possible. By default, Hazelcast tries -to behave at its best for all situations, but this doesn't always lead to the best performance. So if you know what -you are doing and what to look for, it can be very rewarding to tweak. However, it is also important that tweaking should -be done with proper testing to see if there is actually an improvement. Tweaking without proper benchmarking -is likely going to lead to confusion and could cause all kinds of problems. In case of doubt, we recommend not to tweak. - -=== Creating the Right Benchmark Environment - -When benchmarking, it is important that the benchmark reflects your production environment. Sometimes with calculated -guess, a representative smaller environment can be set up; but if you want to use the benchmark statistics to inference -how your production system is going to behave, you need to make sure that you get as close as your production setup as -possible. Otherwise, you are at risk of spotting the issues too late or focusing on the things which are not relevant. - -[[aws-depl-perf]] -== AWS Deployments - -When you deploy Hazelcast clusters on AWS EC2 instances, you can consider to place the -cluster members on the same https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/placement-groups.html#placement-groups-cluster[Cluster Placement Group]. This helps to reduce the latency among members drastically. -Additionally, you can also consider using private IPs -instead of public ones to increase the throughput when the cluster -members are placed in the same VPC. - -For the best performance of your Hazelcast on AWS EC2: - -* Select the newest Linux AMIs. -* Select the HVM based instances. -* Select at least a system with 8 vCPUs, e.g., c4.2xlarge. For an overview of all types of -EC2 instances, please check https://www.ec2instances.info[this web page^]. -* Consider setting a placement group. - -[[pipelining]] -== Pipelining - -With pipelining, you can send multiple -requests in parallel using a single thread and therefore can increase throughput. -As an example, suppose that the round trip time for a request/response -is 1 millisecond. If synchronous requests are used, e.g., `IMap.get()`, then the maximum throughput out of these requests from -a single thread is 1/001 = 1000 operations/second. One way to solve this problem is to introduce multithreading to make -the requests in parallel. For the same example, if we would use 2 threads, then the maximum throughput doubles from 1000 -operations/second, to 2000 operations/second. - -However, introducing threads for the sake of executing requests isn't always convenient and doesn't always lead to an optimal -performance; this is where the pipelining can be used. Instead of using multiple threads to have concurrent invocations, -you can use asynchronous method calls such as `IMap.getAsync()`. If you would use 2 asynchronous calls from a single thread, -then the maximum throughput is 2*(1/001) = 2000 operations/second. Therefore, to benefit from the pipelining, asynchronous calls need to -be made from a single thread. The pipelining is a convenience implementation to provide back pressure, i.e., controlling -the number of inflight operations, and it provides a convenient way to wait for all the results. - -[source,java] ----- -Pipelining pipelining = new Pipelining(10); -for (long k = 0; k < 100; k++) { - int key = random.nextInt(keyDomain); - pipelining.add(map.getAsync(key)); -} -// wait for completion -List results = pipelining.results(); ----- - -In the above example, we make 100 asynchronous `map.getAsync()` calls, but the maximum number of inflight calls is 10. - -By increasing the depth of the pipelining, throughput can be increased. The pipelining has its own back pressure, you do not -need to enable the <> on the client or member to have this feature on the pipelining. However, if you have many -pipelines, you may still need to enable the client/member back pressure because it is possible to overwhelm the system -with requests in that situation. See the <> to learn how to enable it on the client or member. - -You can use the pipelining both on the clients and members. You do not need a special configuration, it works out-of-the-box. - -The pipelining can be used for any asynchronous call. You can use it for IMap asynchronous get/put methods as well as for -ICache, IAtomicLong, etc. It cannot be used as a transaction mechanism though. So you cannot do some calls and throw away the pipeline and expect that -none of the requests are executed. If you want to use an atomic behavior, see xref:transactions:providing-xa-transactions.adoc[Transactions] for more details. -The pipelining is just a performance optimization, not a mechanism for atomic behavior. - -[CAUTION] -.Deprecation Notice for Transactions -==== -Transactions have been deprecated, and will be removed as of Hazelcast version 7.0. An improved version of this feature is under consideration. If you are already using transactions, get in touch and share your use case. Your feedback will help us to develop a solution that meets your needs. -==== - -The pipelines are cheap and should frequently be replaced because they accumulate results. It is fine to have a few hundred or -even a few thousand calls being processed with the pipelining. However, all the responses to all requests are stored in the pipeline -as long as the pipeline is referenced. So if you want to process a huge number of requests, then every few hundred or few -thousand calls wait for the pipelining results and just create a new instance. - -Note that the pipelines are not thread-safe. They must be used by a single thread. - -[[back-pressure]] -== Back Pressure - -Hazelcast uses operations to make remote calls. For example, a `map.get` is an operation and -a `map.put` is one operation for the primary -and one operation for each of the backups, i.e., `map.put` is executed for the primary and also for each backup. -In most cases, there is a natural balance between the number of threads performing operations -and the number of operations being executed. However, the following may pile up this balance and operations -and eventually lead to `OutofMemoryException` (`OOME`): - -* Asynchronous calls: With async calls, the system may be flooded with the requests. -* Asynchronous backups: The asynchronous backups may be piling up. - -To prevent the system from crashing, Hazelcast provides back pressure. Back pressure works by limiting the number of concurrent operation invocations and periodically making an async backup sync. - -=== Member Side - -Back pressure is disabled by default and you can enable it using the following system property: - -`hazelcast.backpressure.enabled` - -To control the number of concurrent invocations, you can configure the number of invocations allowed per partition using the -following system property: - -`hazelcast.backpressure.max.concurrent.invocations.per.partition` - -The default value of this system property is 100. Using a default configuration a system is allowed to -have (271 + 1) * 100 = 27200 concurrent invocations (271 partitions + 1 for generic operations). - -Back pressure is only applied to normal operations. System operations like heart beats and repartitioning operations -are not influenced by back pressure. 27200 invocations might seem like a lot, but keep in mind that executing a task on `IExecutor` -or acquiring a lock also requires an operation. - -If the maximum number of invocations has been reached, Hazelcast automatically applies an exponential backoff policy. This -gives the system some time to deal with the load. -Using the following system property, you can configure the maximum time to wait before a `HazelcastOverloadException` is thrown: - -`hazelcast.backpressure.backoff.timeout.millis` - -This system property's default value is 60000 milliseconds. - -The Health Monitor keeps an eye on the usage of the invocations. -If it sees a member has consumed 70% or more of the invocations, it starts to log health messages. - -Apart from controlling the number of invocations, you also need to control the number of pending async backups. -This is done by periodically making these backups sync instead of async. -This forces all pending backups to get drained. For this, Hazelcast tracks the number of -asynchronous backups for each partition. At every **Nth** call, one synchronization is forced. This **N** is -controlled through the following property: - -`hazelcast.backpressure.syncwindow` - -This system property's default value is 100. It means, out of 100 *asynchronous* backups, -Hazelcast makes 1 of them a *synchronous* one. A randomization is added, -so the sync window with default configuration is between 75 and 125 -invocations. - -=== Client Side - -To prevent the system on the client side from overloading, you can apply -a constraint on the number of concurrent invocations. -You can use the following system property on the client side for this purpose: - -`hazelcast.client.max.concurrent.invocations` - -This property defines the maximum allowed number of concurrent invocations. -When it is not explicitly set, it has the value `Integer.MAX_VALUE` by default, which means infinite. -When you set it and if the maximum number of concurrent invocations is exceeded this value, -Hazelcast throws `HazelcastOverloadException` when a new invocation comes in. - -Please note that back off timeout and controlling the number of -pending async backups (sync window) is not supported on the client side. - -NOTE: See the xref:ROOT:system-properties.adoc[System Properties appendix] to learn how to configure the system properties. - -[[near-cache]] -== Near Cache - -Access to small-to-medium, read-mostly data sets may be sped up -by creating a Near Cache. This cache maintains copies of distributed data -in local memory for very fast access. - -Benefits: - -* Avoids the network and deserialization costs of retrieving frequently-used data remotely -* Eventually consistent -* Can persist keys on a filesystem and reload them on restart. This means you can have your Near Cache ready right after application start -* Can use deserialized objects as Near Cache keys to speed up lookups - -Costs: - -* Increased memory consumption in the local JVM -* High invalidation rates may outweigh the benefits of locality of reference -* Strong consistency is not maintained; you may read stale data - -Map or Cache entries in Hazelcast are partitioned across the cluster members. -Hazelcast clients do not have local data at all. Suppose you read the key `k` a number of times from -a Hazelcast client or `k` is owned by another member in your cluster. -Then each `map.get(k)` or `cache.get(k)` will be a remote operation, which creates a lot of network trips. -If you have a data structure that is mostly read, then you should consider creating a local Near Cache, -so that reads are sped up and less network traffic is created. - -These benefits do not come for free. See the following trade-offs: - -* Members with a Near Cache has to hold the extra cached data, which increases memory consumption. -* If invalidation is enabled and entries are updated frequently, then invalidations will be costly. -* Near Cache breaks the strong consistency guarantees; you might be reading stale data. - -Near Cache is highly recommended for data structures that are mostly read. - -In a client/server system you must enable the Near Cache separately on the client, without the need -to configure it on the server. Please note that Near Cache configuration is specific to the server or client itself: -a data structure on a server may not have Near Cache configured while the same data structure on a client may have Near Cache configured. -They also can have different Near Cache configurations. - -If you are using Near Cache, you should take into account that -your hits to the keys in the Near Cache are not reflected as hits to the original keys on the primary members. -This has for example an impact on IMap's maximum idle seconds or time-to-live seconds expiration. -Therefore, even though there is a hit on a key in Near Cache, your original key on the primary member may expire. - -NOTE: Near Cache works only when you access data via `map.get(k)` or `cache.get(k)` methods. -Data returned using a predicate is not stored in the Near Cache. - -=== Hazelcast Data Structures with Near Cache Support - -The following matrix shows the Hazelcast data structures with Near Cache support. -Please have a look at the next section for a detailed explanation of `cache-local-entries`, `local-update-policy`, `preloader` and `serialize-keys`. - -|=== -| Data structure | Near Cache Support | `cache-local-entries` | `local-update-policy` | `preloader` | `serialize-keys` - -| IMap member -| yes -| yes -| no -| no -| yes - -| IMap client -| yes -| no -| no -| yes -| yes - -| JCache member -| no -| no -| no -| no -| no - -| JCache client -| yes -| no -| yes -| yes -| yes - -| ReplicatedMap member -| no -| no -| no -| no -| no - -| ReplicatedMap client -| yes -| no -| no -| no -| no - -| TransactionalMap member -| limited -| no -| no -| no -| no - -| TransactionalMap client -| no -| no -| no -| no -| no -|=== - -NOTE: Even though lite members do not store any data for Hazelcast data structures, -you can enable Near Cache on lite members for faster reads. - -=== Configuring Near Cache - -The following shows the configuration for the Hazelcast Near Cache. - -NOTE: Please keep in mind that, if you want to use near cache on a Hazelcast member, -configure it on the member; if you want to use it on a Hazelcast client, configure it on the client. - -**Declarative Configuration:** - -[tabs] -==== -XML:: -+ --- -[source,xml] ----- - - ... - - BINARY - true - 0 - 60 - - false - INVALIDATE - - - ... - ----- --- - -YAML:: -+ -[source,yaml] ----- -hazelcast: - near-cache: - myDataStructure: - in-memory-format: BINARY - invalidate-on-change: true - time-to-live-seconds: 0 - max-idle-seconds: 60 - eviction: - size: 1000 - max-size-policy: ENTRY_COUNT - eviction-policy: LFU - cache-local-entries: false - local-update-policy: INVALIDATE - preloader: - enabled: true - directory: nearcache-example - store-initial-delay-seconds: 0 - store-interval-seconds: 0 ----- -==== - -The element `` has an optional attribute `name` whose default value is `default`. - -**Programmatic Configuration:** - -[source,java] ----- -include::ROOT:example$/performance/ExampleNearCacheConfiguration.java[tag=nearcacheconfig] ----- - -The class https://docs.hazelcast.org/docs/{full-version}/javadoc/com/hazelcast/config/NearCacheConfig.html[NearCacheConfig^] -is used for all supported Hazelcast data structures on members and clients. - -The following are the descriptions of all configuration elements and attributes: - -* `in-memory-format`: Specifies in which format data is stored in your Near Cache. -Note that a map's in-memory format can be different from that of its Near Cache. Available values are as follows: -** `BINARY`: Data is stored in serialized binary format (default value). -** `OBJECT`: Data is stored in deserialized form. -** `NATIVE`: Data is stored in the Near Cache that uses Hazelcast's High-Density Memory Store feature. -This option is available only in Hazelcast {enterprise-product-name}. Note that a map and -its Near Cache can independently use High-Density Memory Store. -For example, while your map does not use High-Density Memory Store, its Near Cache can use it. -* `invalidate-on-change`: Specifies whether the cached entries are evicted when the entries are updated or removed. -Its default value is true. -* `time-to-live-seconds`: Maximum number of seconds for each entry to stay in the Near Cache. -Entries that are older than this period are automatically evicted from the Near Cache. -Regardless of the eviction policy used, `time-to-live-seconds` still applies. -Any integer between 0 and `Integer.MAX_VALUE`. 0 means infinite. Its default value is 0. -* `max-idle-seconds`: Maximum number of seconds each entry can stay in the Near Cache as untouched (not read). -Entries that are not read more than this period are removed from the Near Cache. -Any integer between 0 and `Integer.MAX_VALUE`. 0 means `Integer.MAX_VALUE`. Its default value is 0. -* `eviction`: Specifies the eviction behavior when you use High-Density Memory Store for your Near Cache. -It has the following attributes: -** `eviction-policy`: Eviction policy configuration. Available values are as follows: -*** `LRU`: Least Recently Used (default value). -*** `LFU`: Least Frequently Used. -*** `NONE`: No items are evicted and the property `max-size` is ignored. -You still can combine it with `time-to-live-seconds` and `max-idle-seconds` to evict items from the Near Cache. -*** `RANDOM`: A random item is evicted. -** `max-size-policy`: Maximum size policy for eviction of the Near Cache. Available values are as follows: -*** `ENTRY_COUNT`: Maximum size based on the entry count in the Near Cache (default value). -*** `USED_NATIVE_MEMORY_SIZE`: Maximum used native memory size of the specified Near Cache in MB to trigger the eviction. -If the used native memory size exceeds this threshold, the eviction is triggered. -Available only for `NATIVE` in-memory format. This is supported only by Hazelcast {enterprise-product-name}. -*** `USED_NATIVE_MEMORY_PERCENTAGE`: Maximum used native memory percentage of the specified Near Cache to trigger the eviction. -If the native memory usage percentage (relative to maximum native memory size) exceeds this threshold, the eviction is triggered. -Available only for `NATIVE` in-memory format. This is supported only by Hazelcast {enterprise-product-name}. -*** `FREE_NATIVE_MEMORY_SIZE`: Minimum free native memory size of the specified Near Cache in MB to trigger the eviction. -If free native memory size goes below this threshold, eviction is triggered. Available only for `NATIVE` in-memory format. -This is supported only by Hazelcast {enterprise-product-name}. -*** `FREE_NATIVE_MEMORY_PERCENTAGE`: Minimum free native memory percentage of the specified Near Cache to trigger eviction. -If free native memory percentage (relative to maximum native memory size) goes below this threshold, eviction is triggered. -Available only for `NATIVE` in-memory format. This is supported only by Hazelcast {enterprise-product-name}. -** `size`: Maximum size of the Near Cache used for `max-size-policy`. When this is reached the Near Cache is evicted based on -the policy defined. Any integer between `1` and `Integer.MAX_VALUE`. This value has different defaults, depending on the data structure. -*** `IMap`: Its default value is `Integer.MAX_VALUE` for on-heap maps and `10000` for the `NATIVE` in-memory format. -*** `JCache`: Its default value is `10000`. -* `cache-local-entries`: Specifies whether the local entries are cached. It can be useful when in-memory format for -Near Cache is different from that of the map. By default, it is disabled. -Is just available on Hazelcast members, not on Hazelcast clients (which have no local entries). -* `local-update-policy`: Specifies the update policy of the local Near Cache. -It is available on JCache clients. Available values are as follows: -** `INVALIDATE`: Removes the Near Cache entry on mutation. After the mutative call to the member completes but before the operation returns to the caller, -the Near Cache entry is removed. Until the mutative operation completes, the readers still continue to read the old value. -But as soon as the update completes the Near Cache entry is removed. -Any threads reading the key after this point will have a Near Cache miss and call through to the member, obtaining the new entry. -This setting provides read-your-writes consistency. This is the default setting. -** `CACHE_ON_UPDATE`: Updates the Near Cache entry on mutation. After the mutative call to the member completes but before the put returns to the caller, -the Near Cache entry is updated. -So a remove will remove it and one of the put methods will update it to the new value. -Until the update/remove operation completes, the entry's old value can still be read from the Near Cache. -But before the call completes the Near Cache entry is updated. Any threads reading the key after this point read the new entry. -If the mutative operation was a remove, the key will no longer exist in the cache, both the Near Cache and the original copy in the member. -The member initiates an invalidate event to any other Near Caches, however the caller Near Cache is -not invalidated as it already has the new value. This setting also provides read-your-writes consistency. -* `preloader`: Specifies if the Near Cache should store and preload its keys for a faster re-population after -a Hazelcast client restart. Is just available on IMap and JCache clients. It has the following attributes: -** `enabled`: Specifies whether the preloader for this Near Cache is enabled or not, `true` or `false`. -** `directory`: Specifies the parent directory for the preloader of this Near Cache. The filenames for -the preloader storage are generated from the Near Cache name. You can additionally specify the parent directory -to have multiple clients on the same machine with the same Near Cache names. -** `store-initial-delay-seconds`: Specifies the delay in seconds until the keys of this Near Cache -are stored for the first time. Its default value is `600` seconds. -** `store-interval-seconds`: Specifies the interval in seconds in which the keys of this Near Cache are stored. -Its default value is `600` seconds. - -=== Near Cache Configuration Examples - -This section shows some configuration examples for different Hazelcast data structures. - -==== Near Cache Example for IMap - -The following are configuration examples for IMap Near Caches for Hazelcast members and clients. - -[tabs] -==== -XML:: -+ --- -[source,xml] ----- - - ... - - BINARY - - OBJECT - false - 600 - - true - - - ... - ----- --- - -YAML:: -+ --- -[source,yaml] ----- -hazelcast: - map: - mostlyReadMap: - in-memory-format: BINARY - near-cache: - in-memory-format: OBJECT - invalidate-on-change: false - time-to-live-seconds: 600 - eviction: - eviction-policy: NONE - max-size-policy: ENTRY_COUNT - size: 5000 - cache-local-entries: true ----- --- - -Java:: -+ -[source,java] ----- -EvictionConfig evictionConfig = new EvictionConfig() - .setEvictionPolicy(EvictionPolicy.NONE) - .setMaximumSizePolicy(MaxSizePolicy.ENTRY_COUNT) - .setSize(5000); - -NearCacheConfig nearCacheConfig = new NearCacheConfig() - .setInMemoryFormat(InMemoryFormat.OBJECT) - .setInvalidateOnChange(false) - .setTimeToLiveSeconds(600) - .setEvictionConfig(evictionConfig); - -Config config = new Config(); -config.getMapConfig("mostlyReadMap") - .setInMemoryFormat(InMemoryFormat.BINARY) - .setNearCacheConfig(nearCacheConfig); ----- -==== - -The Near Cache configuration for maps on members is a child of the map configuration, -so you do not have to define the map name in the Near Cache configuration. - -[tabs] -==== -XML:: -+ --- -[source,xml] ----- - - ... - - OBJECT - true - - - ... - ----- --- - -YAML:: -+ --- -[source,yaml] ----- -hazelcast-client: - near-cache: - mostlyReadMap: - in-memory-format: OBJECT - invalidate-on-change: true - eviction: - eviction-policy: LRU - max-size-policy: ENTRY_COUNT - size: 50000 ----- --- - -Java:: -+ -[source,java] ----- -EvictionConfig evictionConfig = new EvictionConfig() - .setEvictionPolicy(EvictionPolicy.LRU) - .setMaximumSizePolicy(MaxSizePolicy.ENTRY_COUNT) - .setSize(50000); - -NearCacheConfig nearCacheConfig = new NearCacheConfig() - .setName("mostlyReadMap") - .setInMemoryFormat(InMemoryFormat.OBJECT) - .setInvalidateOnChange(true) - .setEvictionConfig(evictionConfig); - -ClientConfig clientConfig = new ClientConfig() - .addNearCacheConfig(nearCacheConfig); ----- -==== - -The Near Cache on the client side must have the same name as the data structure on the member for which -this Near Cache is being created. You can use wildcards, so in this example `mostlyRead*` would also match the map `mostlyReadMap`. - -A Near Cache can have its own `in-memory-format` which is independent of the `in-memory-format` of the data structure. - -==== Near Cache Example for JCache Clients - -The following is a configuration example for a JCache Near Cache for a Hazelcast client. - -[tabs] -==== -XML:: -+ --- -[source,xml] ----- - - ... - - OBJECT - true - - CACHE_ON_UPDATE - - ... - ----- --- - -YAML:: -+ --- -[source,yaml] ----- -hazelcast-client: - near-cache: - mostlyReadCache: - in-memory-format: OBJECT - invalidate-on-change: true - eviction: - eviction-policy: LRU - max-size-policy: ENTRY_COUNT - size: 30000 - local-update-policy: CACHE_ON_UPDATE ----- --- - -Java:: -+ -[source,java] ----- -EvictionConfig evictionConfig = new EvictionConfig() - .setEvictionPolicy(EvictionPolicy.LRU) - .setMaximumSizePolicy(MaxSizePolicy.ENTRY_COUNT) - .setSize(30000); - -NearCacheConfig nearCacheConfig = new NearCacheConfig() - .setName("mostlyReadCache") - .setInMemoryFormat(InMemoryFormat.OBJECT) - .setInvalidateOnChange(true) - .setEvictionConfig(evictionConfig) - .setLocalUpdatePolicy(LocalUpdatePolicy.CACHE_ON_UPDATE); - -ClientConfig clientConfig = new ClientConfig() - .addNearCacheConfig(nearCacheConfig); ----- -==== - -==== Example for Near Cache with High-Density Memory Store - -[navy]*Hazelcast {enterprise-product-name} Feature* - -The following is a configuration example for an IMap High-Density Near Cache for a Hazelcast member. - -[tabs] -==== -XML:: -+ --- -[source,xml] ----- - - ... - - OBJECT - - NATIVE - - - - ... - ----- --- - -YAML:: -+ --- -[source,yaml] ----- -hazelcast: - map: - mostlyReadMapWithHighDensityNearCache - in-memory-format: OBJECT - near-cache: - in-memory-format: NATIVE - eviction: - eviction-policy: LFU - max-size-policy: USED_NATIVE_MEMORY_PERCENTAGE - size: 90 ----- --- - -Java:: -+ -[source,java] ----- -EvictionConfig evictionConfig = new EvictionConfig() - .setEvictionPolicy(EvictionPolicy.LFU) - .setMaximumSizePolicy(MaxSizePolicy.USED_NATIVE_MEMORY_PERCENTAGE) - .setSize(90); - -NearCacheConfig nearCacheConfig = new NearCacheConfig() - .setInMemoryFormat(InMemoryFormat.NATIVE) - .setEvictionConfig(evictionConfig); - -Config config = new Config(); -config.getMapConfig("mostlyReadMapWithHighDensityNearCache") - .setInMemoryFormat(InMemoryFormat.OBJECT) - .setNearCacheConfig(nearCacheConfig); ----- -==== - -Keep in mind that you should have already enabled the High-Density Memory Store usage for your cluster. -See the xref:storage:high-density-memory.adoc#configuring-high-density-memory-store[Configuring High-Density Memory Store section]. - -Note that a map and its Near Cache can independently use High-Density Memory Store. -For example, if your map does not use High-Density Memory Store, its Near Cache can still use it. - -=== Near Cache Eviction - -In the scope of Near Cache, eviction means evicting (clearing) the entries selected according to -the given `eviction-policy` when the specified `max-size-policy` has been reached. - -The `max-size-policy` defines the state when the Near Cache is full and determines whether -the eviction should be triggered. The `size` is either interpreted as entry count, memory size or percentage, depending on the chosen policy. - -Once the eviction is triggered the configured `eviction-policy` determines which, if any, entries must be evicted. - -Note that the policies mentioned are configured under the `near-cache` configuration block, as seen in the above -<>. - -=== Near Cache Expiration - -Expiration means the eviction of expired records. A record is expired: - -* if it is not touched (accessed/read) for `max-idle-seconds` -* `time-to-live-seconds` passed since it is put to Near Cache. - -The actual expiration is performed in the following cases: - -* When a record is accessed: it is checked if the record is expired or not. -If it is expired, it is evicted and `null` is returned as the value to the caller. -* In the background: there is an expiration task that periodically (currently 5 seconds) scans records and evicts the expired records. - -Note that `max-idle-seconds` and `time-to-live-seconds` are configured under the `near-cache` configuration block, as seen in the above -<>. - -=== Near Cache Invalidation - -Invalidation is the process of removing an entry from the Near Cache when its value is updated or -it is removed from the original data structure (to prevent stale reads). -Near Cache invalidation happens asynchronously at the cluster level, but synchronously at the current member. -This means that the Near Cache is invalidated within the whole cluster after the modifying operation is finished, -but updated from the current member before the modifying operation is done. -A modifying operation can be an EntryProcessor, an explicit update or remove as well as an expiration or eviction. -Generally, whenever the state of an entry changes in the record store by updating its value or removing it, the invalidation event is sent for that entry. - -Invalidations can be sent from members to client Near Caches or to member Near Caches, either individually or in batches. -Default behavior is sending in batches. If there are lots of mutating operations such as put/remove on data structures, -it is advised that you configure batch invalidations. -This reduces the network traffic and keeps the eventing system less busy, but may increase the delay of individual invalidations. - -You can use the following system properties to configure the Near Cache invalidation: - -* `hazelcast.map.invalidation.batch.enabled`: Enable or disable batching. -Its default value is `true`. When it is set to `false`, all invalidations are sent immediately. -* `hazelcast.map.invalidation.batch.size`: Maximum number of invalidations in a batch. Its default value is `100`. -* `hazelcast.map.invalidation.batchfrequency.seconds`: If the collected invalidations do not reach the configured batch size, -a background process sends them periodically. Its default value is `10` seconds. - -If there are a lot of clients or many mutating operations, batching should remain enabled and -the batch size should be configured with the `hazelcast.map.invalidation.batch.size` system property to a suitable value. - -=== Near Cache Consistency - -==== Eventual Consistency - -Near Caches are invalidated by invalidation events. Invalidation events can be lost due to the fire-and-forget fashion of the eventing system. -If an event is lost, reads from Near Cache can indefinitely be stale. - -To solve this problem, Hazelcast provides -eventually consistent behavior for IMap/JCache Near Caches by detecting invalidation losses. -After detection of an invalidation loss, stale data is made unreachable and Near Cache's `get` calls to -that data are directed to the underlying IMap/JCache to fetch the fresh data. - -You can configure eventual consistency with the system properties below (same properties are valid for both member and client side Near Caches): - -* `hazelcast.invalidation.max.tolerated.miss.count`: Default value is 10. -If missed invalidation count is bigger than this value, relevant cached data is made unreachable. -* `hazelcast.invalidation.reconciliation.interval.seconds`: Default value is 60 seconds. -This is a periodic task that scans cluster members periodically to compare generated invalidation events with the received ones from Near Cache. - -==== Locally Initiated Changes - -For local invalidations, when a record is updated/removed, future reads will see this -update/remove to provide read-your-writes consistency. To achieve this consistency, Near Cache configuration provides the following update policies: - -* `INVALIDATE` -* `CACHE_ON_UPDATE` - -If you choose `INVALIDATE`, the entry is removed from the Near Cache after the update/remove occurs in -the underlying data structure and before the operation (get) returns to the caller. -Until the update/remove operation completes, the entry's old value can still be read from the Near Cache. - -If you choose `CACHE_ON_UPDATE`, the entry is updated after the -update/remove occurs in the underlying data structure and before the operation (put/get) returns to the caller. -If it is an update operation, it removes the entry and the new value is placed. -Until the update/remove operation completes, the entry's old value can still be read from the Near Cache. -Any threads reading the key after this point read the new entry. If the mutative operation was a remove, -the key will no longer exist in the Near Cache and the original copy in the member. - -=== Near Cache Preloader - -The Near Cache preloader is a functionality to store the keys from a Near Cache to provide -a fast re-population of the previous hot data set after a Hazelcast Client has been restarted. -It is available on IMap and JCache clients. - -The Near Cache preloader stores the keys (not the values) of Near Cache entries in regular intervals. -You can define the initial delay via `store-initial-delay-seconds`, e.g., if you know that your hot data set needs -some time to build up. You can configure the interval via `store-interval-seconds` which determines how often -the key-set is stored. The persistence does not run continuously. Whenever the storage is scheduled, it is performed on the actual keys in the Near Cache. - -The Near Cache preloader is triggered on the first initialization of the data structure on the client, e.g., `client.getMap("myNearCacheMap")`. -This schedules the preloader, which works in the background, so your application is not blocked. -The storage is enabled after the loading is completed. - -The configuration parameter `directory` is optional. -If you omit it, the base directory becomes the user working directory (normally where the JVM was started or -configured with the system property `user.dir`). -The storage filenames are always created from the Near Cache name. -So even if you use a wildcard name in the Near Cache Configuration, the preloader filenames are unique. - -NOTE: If you run multiple Hazelcast clients with enabled Near Cache preloader on the same machine, -you have to configure a unique storage filename for each client or run them from different user directories. -If two clients would write into the same file, only the first client succeeds. -The following clients throw an exception as soon as the Near Cache preloader is triggered. - -== CPU Thread Affinity - -Hazelcast offers configuring CPU threads so that you have a lot better control -on the latency and a better throughput. This configuration provides you -with the CPU thread affinity, where certain threads can have affinity for particular CPUs. - -The following affinity configurations are available for a member: - -``` --Dhazelcast.io.input.thread.affinity=1-3 --Dhazelcast.io.output.thread.affinity=3-5 --Dhazelcast.operation.thread.affinity=7-10,13 --Dhazelcast.operation.response.thread.affinity=15,16 -``` - -The following affinity configurations are available for a client: - -``` --Dhazelcast.client.io.input.thread.affinity=1-4 --Dhazelcast.client.io.output.thread.affinity=5-8 --Dhazelcast.client.response.thread.affinity=7-9 -``` - -You can set the CPU thread affinity properties shown above only on the command line. - -Let's have a look at how we define the values for the above configuration -properties: - -* **Individual CPUs**, e.g., `1,2,3`: This means there are going to be -three threads. The first thread runs on CPU 1, the second thread on CPU 2, and so on. -* **CPU ranges**, e.g., `1-3`: Shortcut syntax for `1,2,3`. -* **Group**, e.g., `[1-3]`: This configures three threads and each of -these threads can run on CPU 1, 2 and 3. -* **Group with thread count**, e.g., `[1-3]:2`: This configures two -threads and each of these two threads can run on CPU 1, 2 and 3. - -You can also combine those, e.g., `1,2,[5-7],[10,12,16]:2`. - -Note that, the syntax for CPU thread affinity shown above not only determines -the mapping of CPUs to threads, it also determines the thread count. -If you use CPU thread affinity, e.g., `hazelcast.io.input.thread.affinity`, -then `hazelcast.io.input.thread.count` is ignored. See <> for more -information about specifying explicit thread counts. - -If you don't configure affinity for a category of threads, it means they can run on any CPU. - -Let's look at an example. Assuming you have the `numactl` utility, run -the following command on your machine to see the mapping between the NUMA -nodes and threads: - -``` -numactl --hardware -``` - -An example output is shown below: - -``` -available: 2 nodes (0-1) -node 0 cpus: 0 1 2 3 4 5 6 7 8 9 20 21 22 23 24 25 26 27 28 29 -node 0 size: 393090 MB -node 0 free: 372729 MB -node 1 cpus: 10 11 12 13 14 15 16 17 18 19 30 31 32 33 34 35 36 37 38 39 -node 1 size: 393216 MB -node 1 free: 343296 MB -node distances: -node 0 1 - 0: 10 21 - 1: 21 10 -``` - -If you want to configure 20 threads on NUMA node 0 and 20 threads on NUMA node 1, -and confine the threads to these NUMA nodes, you can use the following configuration: - -``` --Dhazelcast.operation.thread.affinity=[0-9,20-29],[10-19,30-39] -``` - -See https://en.wikipedia.org/wiki/Non-uniform_memory_access[here^] -for information about NUMA nodes. - -== Threading Model - -Your application server has its own threads. Hazelcast does not use these; it manages its own threads. - -NOTE: For information on threading in Thread-Per-Core (TPC) environments, see xref:cluster-performance:thread-per-core-tpc.adoc[]. - -=== I/O Threading - -Hazelcast uses a pool of threads for I/O. A single thread does not perform all the I/O. -Instead, multiple threads perform the I/O. On each cluster member, the I/O threading is split up in 3 types of I/O threads: - -* I/O thread for the accept requests -* I/O threads to read data from other members/clients -* I/O threads to write data to other members/clients - -You can configure the number of I/O threads using the `hazelcast.io.thread.count` system property. -Its default value is 3 per member. If 3 is used, in total there are 7 I/O threads: -1 accept I/O thread, 3 read I/O threads and 3 write I/O threads. Each I/O thread has -its own Selector instance and waits on the `Selector.select` if there is nothing to do. - -NOTE: You can also specify counts for input and output threads separately. -There are `hazelcast.io.input.thread.count` and `hazelcast.io.output.thread.count` properties for this purpose. -See the xref:ROOT:system-properties.adoc[System Properties appendix] for information about these properties and how to set them. - -Hazelcast periodically scans utilization of each I/O thread and -can decide to migrate a connection to a new thread if -the existing thread is servicing a disproportionate number of I/O events. -You can customize the scanning interval by configuring the `hazelcast.io.balancer.interval.seconds` system property; -its default interval is 20 seconds. You can disable the balancing process by setting this property to a negative value. - -In case of the read I/O thread, when sufficient bytes for a packet have been received, the `Packet` object is created. This `Packet` object is -then sent to the system where it is de-multiplexed. If the `Packet` header signals that it is an operation/response, the `Packet` is handed -over to the operation service (see the <>). If the `Packet` is an event, it is handed -over to the event service (see the <>). - -=== Event Threading - -Hazelcast uses a shared event system to deal with components that rely on events, such as topic, collections, listeners and Near Cache. - -Each cluster member has an array of event threads and each thread has its own work queue. When an event is produced, -either locally or remotely, an event thread is selected (depending on if there is a message ordering) and the event is placed -in the work queue for that event thread. - -You can set the following properties -to alter the system's behavior: - -* `hazelcast.event.thread.count`: Number of event-threads in this array. Its default value is 5. -* `hazelcast.event.queue.capacity`: Capacity of the work queue. Its default value is 1000000. -* `hazelcast.event.queue.timeout.millis`: Timeout for placing an item on the work queue in milliseconds. Its default value is 250 milliseconds. - -If you process a lot of events and have many cores, changing the value of `hazelcast.event.thread.count` property to -a higher value is a good practice. This way, more events can be processed in parallel. - -Multiple components share the same event queues. If there are 2 topics, say A and B, for certain messages -they may share the same queue(s) and hence the same event thread. If there are a lot of pending messages produced by A, then B needs to wait. -Also, when processing a message from A takes a lot of time and the event thread is used for that, B suffers from this. -That is why it is better to offload processing to a dedicated thread (pool) so that systems are better isolated. - -If the events are produced at a higher rate than they are consumed, the queue grows in size. To prevent overloading the system -and running into an `OutOfMemoryException`, the queue is given a capacity of 1 million items. When the maximum capacity is reached, the items are -dropped. This means that the event system is a 'best effort' system. There is no guarantee that you are going to get an -event. Topic A might have a lot of pending messages and therefore B cannot receive messages because the queue -has no capacity and messages for B are dropped. - -=== IExecutor Threading - -Executor threading is straight forward. When a task is received to be executed on Executor E, then E will have its -own `ThreadPoolExecutor` instance and the work is placed in the work queue of this executor. -Thus, Executors are fully isolated, but still share the same underlying hardware - most importantly the CPUs. - -You can configure the IExecutor using the `ExecutorConfig` (programmatic configuration) or -using `` (declarative configuration). See also the xref:computing:executor-service.adoc#configuring-executor-service[Configuring Executor Service section]. - -=== Operation Threading - -The following are the operation types: - -* operations that are aware of a certain partition, e.g., `IMap.get(key)` -* operations that are not partition aware, e.g., `IExecutorService.executeOnMember(command, member)` - -Each of these operation types has a different threading model explained in the following sections. - -==== Partition-aware Operations - -To execute partition-aware operations, an array of operation threads is created. -The default value of this array's size is the number of cores and it has a minimum value of 2. -This value can be changed using the `hazelcast.operation.thread.count` property. - -Each operation thread has its own work queue and it consumes messages from this work queue. If a partition-aware -operation needs to be scheduled, the right thread is found using the formula below. - -`threadIndex = partitionId % partition thread-count` - -After the `threadIndex` is determined, the operation is put in the work queue of that operation thread. This means the followings: - -* A single operation thread executes operations for multiple partitions; -if there are 271 partitions and 10 partition threads, then roughly every operation thread executes operations for 27 partitions. -* Each partition belongs to only 1 operation thread. -All operations for a partition are always handled by exactly the same operation thread. -* Concurrency control is not needed to deal with partition-aware operations because -once a partition-aware operation is put in the work queue of a partition-aware operation thread, only 1 thread is able to touch that partition. - -Because of this threading strategy, there are two forms of false sharing you need to be aware of: - -* False sharing of the partition - two completely independent data structures share the same partition. -For example, if there is a map `employees` and a map `orders`, -the method `employees.get("peter")` running on partition 25 may be blocked by -the method `orders.get(1234)` also running on partition 25. -If independent data structures share the same partition, a slow operation on one data structure can slow down the other data structures. -* False sharing of the partition-aware operation thread - each operation thread is responsible for executing - operations on a number of partitions. For example, *thread 1* could be responsible for partitions 0, 10, 20, etc. and *thread-2* could be responsible for partitions - 1, 11, 21, etc. If an operation for partition 1 takes a lot of time, it blocks the execution of an operation for partition - 11 because both of them are mapped to the same operation thread. - -You need to be careful with long-running operations because you could starve operations of a thread. -As a general rule, the partition thread should be released as soon as possible because operations are not designed -as long-running operations. That is why, for example, it is very dangerous to execute a long-running operation -using `AtomicReference.alter()` or an `IMap.executeOnKey()`, because these operations block other operations to be executed. - -Currently, there is no support for work stealing. Different partitions that map to the same thread may need to wait -till one of the partitions is finished, even though there are other free partition-aware operation threads available. - -**Example:** - -Take a cluster with three members. Two members have 90 primary partitions and one member has 91 primary partitions. Let's -say you have one CPU and four cores per CPU. By default, four operation threads will be allocated to serve 90 or 91 partitions. - -==== Non-Partition-aware Operations - -To execute operations that are not partition-aware, e.g., `IExecutorService.executeOnMember(command, member)`, generic operation -threads are used. When the Hazelcast instance is started, an array of operation threads is created. The size of this array -has a default value of the number of cores divided by two with a minimum value of 2. It can be changed using the -`hazelcast.operation.generic.thread.count` property. - -A non-partition-aware operation thread does not execute an operation for a specific partition. Only partition-aware - operation threads execute partition-aware operations. - -Unlike the partition-aware operation threads, all the generic operation threads share the same work queue: `genericWorkQueue`. - -If a non-partition-aware operation needs to be executed, it is placed in that work queue and any generic operation -thread can execute it. The big advantage is that you automatically have work balancing since any generic operation -thread is allowed to pick up work from this queue. - -The disadvantage is that this shared queue can be a point of contention. You may not see this contention in -production since performance is dominated by I/O and the system does not run many non-partition-aware operations. - -==== Priority Operations - -In some cases, the system needs to run operations with a higher priority, e.g., an important system operation. -To support priority operations, Hazelcast has the following features: - -* For partition-aware operations: Each partition thread has its own work queue and it also has a priority - work queue. The partition thread always checks the priority queue before it processes work from its normal work queue. -* For non-partition-aware operations: Next to the `genericWorkQueue`, there is also a `genericPriorityWorkQueue`. When a priority operation - needs to be run, it is put in the `genericPriorityWorkQueue`. Like the partition-aware operation threads, a generic - operation thread first checks the `genericPriorityWorkQueue` for work. - -Since a worker thread blocks on the normal work queue (either partition specific or generic), a priority operation -may not be picked up because it is not put in the queue where it is blocking. Hazelcast always sends a 'kick the worker' operation that -only triggers the worker to wake up and check the priority queue. - -==== Operation-response and Invocation-future - -When an Operation is invoked, a `Future` is returned. See the example code below. - -[source,java] ----- -GetOperation operation = new GetOperation( mapName, key ); -Future future = operationService.invoke( operation ); -future.get(); ----- - -The calling side blocks for a reply. In this case, `GetOperation` is set in the work queue for the partition of `key`, where -it eventually is executed. Upon execution, a response is returned and placed on the `genericWorkQueue` where it is executed by a -"generic operation thread". This thread signals the `future` and notifies the blocked thread that a response is available. -Hazelcast has a plan of exposing this `future` to the outside world, and we will provide the ability to register a completion listener so you can perform asynchronous calls. - -==== Local Calls - -When a local partition-aware call is done, an operation is made and handed over to the work queue of the correct partition operation thread, -and a `future` is returned. When the calling thread calls `get` on that `future`, it acquires a lock and waits for the result -to become available. When a response is calculated, the `future` is looked up and the waiting thread is notified. - -In the future, this will be optimized to reduce the amount of expensive systems calls, such as `lock.acquire()`/`notify()` and the expensive -interaction with the operation-queue. Probably, we will add support for a caller-runs mode, so that an operation is directly run on -the calling thread. - -== SlowOperationDetector - -The `SlowOperationDetector` monitors the operation threads and collects information about all slow operations. -An `Operation` is a task executed by a generic or partition thread (see xref:performance:threading-model.adoc#operation-threading[Operation Threading]). -An operation is considered as slow when it takes more computation time than the configured threshold. - -The `SlowOperationDetector` stores the fully qualified classname of the operation and its stacktrace as well as -operation details, start time and duration of each slow invocation. All collected data is available in -the xref:{page-latest-supported-mc}@management-center:monitor-imdg:monitor-members.adoc[Management Center]. - -The `SlowOperationDetector` is configured via the following system properties. - -* `hazelcast.slow.operation.detector.enabled` -* `hazelcast.slow.operation.detector.log.purge.interval.seconds` -* `hazelcast.slow.operation.detector.log.retention.seconds` -* `hazelcast.slow.operation.detector.stacktrace.logging.enabled` -* `hazelcast.slow.operation.detector.threshold.millis` - -See the xref:ROOT:system-properties.adoc[System Properties appendix] for explanations of these properties. - -=== Logging of Slow Operations - -The detected slow operations are logged as warnings in the Hazelcast log files: - -``` -WARN 2015-05-07 11:05:30,890 SlowOperationDetector: [127.0.0.1]:5701 - Slow operation detected: com.hazelcast.map.impl.operation.PutOperation - Hint: You can enable the logging of stacktraces with the following config - property: hazelcast.slow.operation.detector.stacktrace.logging.enabled -WARN 2015-05-07 11:05:30,891 SlowOperationDetector: [127.0.0.1]:5701 - Slow operation detected: com.hazelcast.map.impl.operation.PutOperation - (2 invocations) -WARN 2015-05-07 11:05:30,892 SlowOperationDetector: [127.0.0.1]:5701 - Slow operation detected: com.hazelcast.map.impl.operation.PutOperation - (3 invocations) -``` - -Stacktraces are always reported to the Management Center, but by default they are not printed to keep the log size small. -If logging of stacktraces is enabled, the full stacktrace is printed every 100 invocations. -All other invocations print a shortened version. - -=== Purging of Slow Operation Logs - -Since a Hazelcast cluster can run for a very long time, Hazelcast purges the slow operation logs periodically to prevent an OOME. -You can configure the purge interval and the retention time for each invocation. - -The purging removes each invocation whose retention time is exceeded. -When all invocations are purged from a slow operation log, the log is deleted. - - +Learn more about best practices and Hazelcast recommendations: + +* xref:capacity-planning.adoc[] +* xref:cluster-performance:performance-tips.adoc[] +* xref:cluster-performance:back-pressure.adoc[] +* xref:cluster-performance:pipelining.adoc[] +* xref:cluster-performance:aws-deployments.adoc[] +* xref:cluster-performance:threading.adoc[] +* xref:cluster-performance:near-cache.adoc[] \ No newline at end of file diff --git a/docs/modules/cluster-performance/pages/near-cache.adoc b/docs/modules/cluster-performance/pages/near-cache.adoc new file mode 100644 index 000000000..b73d49570 --- /dev/null +++ b/docs/modules/cluster-performance/pages/near-cache.adoc @@ -0,0 +1,655 @@ +[[near-cache]] += Near Cache + +Access to small-to-medium, read-mostly data sets may be sped up +by creating a Near Cache. This cache maintains copies of distributed data +in local memory for very fast access. + +Benefits: + +* Avoids the network and deserialization costs of retrieving frequently-used data remotely +* Eventually consistent. +* Can persist keys on a filesystem and reload them on restart. This means you can have your Near Cache ready right after application start. +* Can use deserialized objects as Near Cache keys to speed up lookups. + +Costs: + +* Increased memory consumption in the local JVM. +* High invalidation rates can outweigh the benefits of locality of reference. +* Strong consistency is not maintained; you might read stale data. + +Map or Cache entries in Hazelcast are partitioned across the cluster members. +Hazelcast clients do not have local data at all. Suppose you read the key `k` a number of times from +a Hazelcast client or `k` is owned by another member in your cluster. +Then each `map.get(k)` or `cache.get(k)` will be a remote operation, which creates a lot of network trips. +If you have a data structure that is mostly read, consider creating a local Near Cache +so that reads are sped up and less network traffic is created. + +These benefits do not come for free. See the following trade-offs: + +* Members with a Near Cache must hold the extra cached data, which increases memory consumption. +* If invalidation is enabled and entries are updated frequently, then invalidations will be costly. +* Near Cache breaks the strong consistency guarantees; you might be reading stale data. + +Near Cache is highly recommended for data structures that are mostly read. + +In a client/server system you must enable the Near Cache separately on the client, without the need +to configure it on the server. Please note that Near Cache configuration is specific to the server or client itself: +a data structure on a server may not have Near Cache configured while the same data structure on a client may have Near Cache configured. +They also can have different Near Cache configurations. + +If you are using Near Cache, take into account that +your hits to the keys in the Near Cache are not reflected as hits to the original keys on the primary members. +This has for example an impact on IMap's maximum idle seconds or time-to-live seconds expiration. +Therefore, even though there is a hit on a key in Near Cache, your original key on the primary member may expire. + +NOTE: Near Cache works only when you access data using the `map.get(k)` or `cache.get(k)` methods. +Data returned using a predicate is not stored in the Near Cache. + +== Hazelcast Data Structures with Near Cache Support + +The following matrix shows the Hazelcast data structures with Near Cache support. +The next section provides a detailed explanation of `cache-local-entries`, `local-update-policy`, `preloader` and `serialize-keys`. + +|=== +| Data structure | Near Cache Support | `cache-local-entries` | `local-update-policy` | `preloader` | `serialize-keys` + +| IMap member +| yes +| yes +| no +| no +| yes + +| IMap client +| yes +| no +| no +| yes +| yes + +| JCache member +| no +| no +| no +| no +| no + +| JCache client +| yes +| no +| yes +| yes +| yes + +| ReplicatedMap member +| no +| no +| no +| no +| no + +| ReplicatedMap client +| yes +| no +| no +| no +| no + +| TransactionalMap member +| limited +| no +| no +| no +| no + +| TransactionalMap client +| no +| no +| no +| no +| no +|=== + +NOTE: Even though lite members do not store any data for Hazelcast data structures, +you can enable Near Cache on lite members for faster reads. + +== Configuring Near Cache + +The following shows the configuration for the Hazelcast Near Cache. + +NOTE: If you want to use near cache on a Hazelcast member, +configure it on the member; if you want to use it on a Hazelcast client, configure it on the client. + +[tabs] +==== +XML:: ++ +-- +[source,xml] +---- + + ... + + BINARY + true + 0 + 60 + + false + INVALIDATE + + + ... + +---- +-- + +YAML:: ++ +[source,yaml] +---- +hazelcast: + near-cache: + myDataStructure: + in-memory-format: BINARY + invalidate-on-change: true + time-to-live-seconds: 0 + max-idle-seconds: 60 + eviction: + size: 1000 + max-size-policy: ENTRY_COUNT + eviction-policy: LFU + cache-local-entries: false + local-update-policy: INVALIDATE + preloader: + enabled: true + directory: nearcache-example + store-initial-delay-seconds: 0 + store-interval-seconds: 0 +---- + +Java Member API:: ++ +[source,java] +---- +include::ROOT:example$/performance/ExampleNearCacheConfiguration.java[tag=nearcacheconfig] +---- +==== + +The element `` has an optional attribute `name` with a default value of `default`. + + +The class https://docs.hazelcast.org/docs/{full-version}/javadoc/com/hazelcast/config/NearCacheConfig.html[NearCacheConfig^] +is used for all supported Hazelcast data structures on members and clients. + +The following are the descriptions of all configuration elements and attributes: + +* `in-memory-format`: Specifies in which format data is stored in your Near Cache. +Note that a map's in-memory format can be different from that of its Near Cache. Available values are as follows: +** `BINARY`: Data is stored in serialized binary format (default value). +** `OBJECT`: Data is stored in deserialized form. +** `NATIVE`: Data is stored in the Near Cache that uses Hazelcast's High-Density Memory Store feature. +This option is available only in Hazelcast Enterprise. Note that a map and +its Near Cache can independently use High-Density Memory Store. +For example, while your map does not use High-Density Memory Store, its Near Cache can use it. +* `invalidate-on-change`: Specifies whether the cached entries are evicted when the entries are updated or removed. Its default value is true, that is the cached entries are evicted. +* `time-to-live-seconds`: Maximum number of seconds for each entry to stay in the Near Cache. +Entries that are older than this period are automatically evicted from the Near Cache. +Regardless of the eviction policy used, `time-to-live-seconds` still applies. +Any integer between 0 and `Integer.MAX_VALUE`. 0 means infinite. Its default value is 0. +* `max-idle-seconds`: Maximum number of seconds each entry can stay in the Near Cache as untouched (not read). +Entries that are not read more than this period are removed from the Near Cache. +Any integer between 0 and `Integer.MAX_VALUE`. 0 means `Integer.MAX_VALUE`. Its default value is 0. +* `eviction`: Specifies the eviction behavior when you use High-Density Memory Store for your Near Cache. +It has the following attributes: +** `eviction-policy`: Eviction policy configuration. Available values are as follows: +*** `LRU`: Least Recently Used (default value). +*** `LFU`: Least Frequently Used. +*** `NONE`: No items are evicted and the property `max-size` is ignored. +You still can combine it with `time-to-live-seconds` and `max-idle-seconds` to evict items from the Near Cache. +*** `RANDOM`: A random item is evicted. +** `max-size-policy`: Maximum size policy for eviction of the Near Cache. Available values are as follows: +*** `ENTRY_COUNT`: Maximum size based on the entry count in the Near Cache (default value). +*** `USED_NATIVE_MEMORY_SIZE`: Maximum used native memory size of the specified Near Cache in MB to trigger the eviction. +If the used native memory size exceeds this threshold, the eviction is triggered. +Available only for `NATIVE` in-memory format. This is supported only by Hazelcast Enterprise. +*** `USED_NATIVE_MEMORY_PERCENTAGE`: Maximum used native memory percentage of the specified Near Cache to trigger the eviction. +If the native memory usage percentage (relative to maximum native memory size) exceeds this threshold, the eviction is triggered. +Available only for `NATIVE` in-memory format. This is supported only by Hazelcast Enterprise. +*** `FREE_NATIVE_MEMORY_SIZE`: Minimum free native memory size of the specified Near Cache in MB to trigger the eviction. +If free native memory size goes below this threshold, eviction is triggered. Available only for `NATIVE` in-memory format. +This is supported only by Hazelcast Enterprise. +*** `FREE_NATIVE_MEMORY_PERCENTAGE`: Minimum free native memory percentage of the specified Near Cache to trigger eviction. +If free native memory percentage (relative to maximum native memory size) goes below this threshold, eviction is triggered. +Available only for `NATIVE` in-memory format. This is supported only by Hazelcast Enterprise. +** `size`: Maximum size of the Near Cache used for `max-size-policy`. When this is reached the Near Cache is evicted based on +the policy defined. Any integer between `1` and `Integer.MAX_VALUE`. This value has different defaults, depending on the data structure. +*** `IMap`: Its default value is `Integer.MAX_VALUE` for on-heap maps and `10000` for the `NATIVE` in-memory format. +*** `JCache`: Its default value is `10000`. +* `cache-local-entries`: Specifies whether the local entries are cached. It can be useful when in-memory format for +Near Cache is different from that of the map. By default, it is disabled. +Is just available on Hazelcast members, not on Hazelcast clients (which have no local entries). +* `local-update-policy`: Specifies the update policy of the local Near Cache. +It is available on JCache clients. Available values are as follows: +** `INVALIDATE`: Removes the Near Cache entry on mutation. After the mutative call to the member completes but before the operation returns to the caller, +the Near Cache entry is removed. Until the mutative operation completes, the readers still continue to read the old value. +But as soon as the update completes the Near Cache entry is removed. +Any threads reading the key after this point will have a Near Cache miss and call through to the member, obtaining the new entry. +This setting provides read-your-writes consistency. This is the default setting. +** `CACHE_ON_UPDATE`: Updates the Near Cache entry on mutation. After the mutative call to the member completes but before the put returns to the caller, +the Near Cache entry is updated. +So a remove will remove the entry and one of the put methods will update it to the new value. +Until the update/remove operation completes, the entry's old value can still be read from the Near Cache. +But before the call completes the Near Cache entry is updated. Any threads reading the key after this point read the new entry. +If the mutative operation was a remove, the key will no longer exist in the cache, both the Near Cache and the original copy in the member. +The member initiates an invalidate event to any other Near Caches, however the caller Near Cache is +not invalidated as it already has the new value. This setting also provides read-your-writes consistency. +* `preloader`: Specifies if the Near Cache stores and preloads its keys for a faster re-population after +a Hazelcast client restart. Is only available on IMap and JCache clients. It has the following attributes: +** `enabled`: Specifies whether the preloader for this Near Cache is enabled or not, `true` or `false`. +** `directory`: Specifies the parent directory for the preloader of this Near Cache. The filenames for +the preloader storage are generated from the Near Cache name. You can additionally specify the parent directory +to have multiple clients on the same machine with the same Near Cache names. +** `store-initial-delay-seconds`: Specifies the delay in seconds until the keys of this Near Cache +are stored for the first time. Its default value is `600` seconds. +** `store-interval-seconds`: Specifies the interval in seconds in which the keys of this Near Cache are stored. +Its default value is `600` seconds. + +== Near Cache Configuration Examples + +This section shows some configuration examples for different Hazelcast data structures. + +=== Near Cache Example for IMap + +The following are configuration examples for IMap Near Caches for Hazelcast members and clients. + +[tabs] +==== +XML:: ++ +-- +[source,xml] +---- + + ... + + BINARY + + OBJECT + false + 600 + + true + + + ... + +---- +-- + +YAML:: ++ +-- +[source,yaml] +---- +hazelcast: + map: + mostlyReadMap: + in-memory-format: BINARY + near-cache: + in-memory-format: OBJECT + invalidate-on-change: false + time-to-live-seconds: 600 + eviction: + eviction-policy: NONE + max-size-policy: ENTRY_COUNT + size: 5000 + cache-local-entries: true +---- +-- + +Java API:: ++ +[source,java] +---- +EvictionConfig evictionConfig = new EvictionConfig() + .setEvictionPolicy(EvictionPolicy.NONE) + .setMaximumSizePolicy(MaxSizePolicy.ENTRY_COUNT) + .setSize(5000); + +NearCacheConfig nearCacheConfig = new NearCacheConfig() + .setInMemoryFormat(InMemoryFormat.OBJECT) + .setInvalidateOnChange(false) + .setTimeToLiveSeconds(600) + .setEvictionConfig(evictionConfig); + +Config config = new Config(); +config.getMapConfig("mostlyReadMap") + .setInMemoryFormat(InMemoryFormat.BINARY) + .setNearCacheConfig(nearCacheConfig); +---- +==== + +The Near Cache configuration for maps on members is a child of the map configuration, +so you do not have to define the map name in the Near Cache configuration. + +[tabs] +==== +XML:: ++ +-- +[source,xml] +---- + + ... + + OBJECT + true + + + ... + +---- +-- + +YAML:: ++ +-- +[source,yaml] +---- +hazelcast-client: + near-cache: + mostlyReadMap: + in-memory-format: OBJECT + invalidate-on-change: true + eviction: + eviction-policy: LRU + max-size-policy: ENTRY_COUNT + size: 50000 +---- +-- + +Java API:: ++ +[source,java] +---- +EvictionConfig evictionConfig = new EvictionConfig() + .setEvictionPolicy(EvictionPolicy.LRU) + .setMaximumSizePolicy(MaxSizePolicy.ENTRY_COUNT) + .setSize(50000); + +NearCacheConfig nearCacheConfig = new NearCacheConfig() + .setName("mostlyReadMap") + .setInMemoryFormat(InMemoryFormat.OBJECT) + .setInvalidateOnChange(true) + .setEvictionConfig(evictionConfig); + +ClientConfig clientConfig = new ClientConfig() + .addNearCacheConfig(nearCacheConfig); +---- +==== + +The Near Cache on the client side must have the same name as the data structure on the member for which +this Near Cache is being created. You can use wildcards, so in this example `mostlyRead*` would also match the map `mostlyReadMap`. + +A Near Cache can have its own `in-memory-format` which is independent of the `in-memory-format` of the data structure. + +=== Near Cache Example for JCache Clients + +The following is a configuration example for a JCache Near Cache for a Hazelcast client. + +[tabs] +==== +XML:: ++ +-- +[source,xml] +---- + + ... + + OBJECT + true + + CACHE_ON_UPDATE + + ... + +---- +-- + +YAML:: ++ +-- +[source,yaml] +---- +hazelcast-client: + near-cache: + mostlyReadCache: + in-memory-format: OBJECT + invalidate-on-change: true + eviction: + eviction-policy: LRU + max-size-policy: ENTRY_COUNT + size: 30000 + local-update-policy: CACHE_ON_UPDATE +---- +-- + +Java API:: ++ +[source,java] +---- +EvictionConfig evictionConfig = new EvictionConfig() + .setEvictionPolicy(EvictionPolicy.LRU) + .setMaximumSizePolicy(MaxSizePolicy.ENTRY_COUNT) + .setSize(30000); + +NearCacheConfig nearCacheConfig = new NearCacheConfig() + .setName("mostlyReadCache") + .setInMemoryFormat(InMemoryFormat.OBJECT) + .setInvalidateOnChange(true) + .setEvictionConfig(evictionConfig) + .setLocalUpdatePolicy(LocalUpdatePolicy.CACHE_ON_UPDATE); + +ClientConfig clientConfig = new ClientConfig() + .addNearCacheConfig(nearCacheConfig); +---- +==== + +=== Example for Near Cache with High-Density Memory Store + +[navy]*Hazelcast Enterprise Feature* + +The following is a configuration example for an IMap High-Density Near Cache for a Hazelcast member. + +[tabs] +==== +XML:: ++ +-- +[source,xml] +---- + + ... + + OBJECT + + NATIVE + + + + ... + +---- +-- + +YAML:: ++ +-- +[source,yaml] +---- +hazelcast: + map: + mostlyReadMapWithHighDensityNearCache + in-memory-format: OBJECT + near-cache: + in-memory-format: NATIVE + eviction: + eviction-policy: LFU + max-size-policy: USED_NATIVE_MEMORY_PERCENTAGE + size: 90 +---- +-- + +Java API:: ++ +[source,java] +---- +EvictionConfig evictionConfig = new EvictionConfig() + .setEvictionPolicy(EvictionPolicy.LFU) + .setMaximumSizePolicy(MaxSizePolicy.USED_NATIVE_MEMORY_PERCENTAGE) + .setSize(90); + +NearCacheConfig nearCacheConfig = new NearCacheConfig() + .setInMemoryFormat(InMemoryFormat.NATIVE) + .setEvictionConfig(evictionConfig); + +Config config = new Config(); +config.getMapConfig("mostlyReadMapWithHighDensityNearCache") + .setInMemoryFormat(InMemoryFormat.OBJECT) + .setNearCacheConfig(nearCacheConfig); +---- +==== + +Keep in mind that you should have already enabled the High-Density Memory Store usage for your cluster. +See the xref:storage:high-density-memory.adoc#configuring-high-density-memory-store[Configuring High-Density Memory Store section]. + +Note that a map and its Near Cache can independently use High-Density Memory Store. +For example, if your map does not use High-Density Memory Store, its Near Cache can still use it. + +== Near Cache Eviction + +In the scope of Near Cache, eviction means evicting (clearing) the entries selected according to +the given `eviction-policy` when the specified `max-size-policy` has been reached. + +The `max-size-policy` defines the state when the Near Cache is full and determines whether +the eviction should be triggered. The `size` is either interpreted as entry count, memory size or percentage, depending on the chosen policy. + +Once the eviction is triggered the configured `eviction-policy` determines which, if any, entries must be evicted. + +Note that the policies mentioned are configured under the `near-cache` configuration block, as seen in the above +<>. + +== Near Cache Expiration + +Expiration means the eviction of expired records. A record is expired: + +* if it is not touched (accessed/read) for `max-idle-seconds` +* `time-to-live-seconds` passed since it is put to Near Cache. + +The actual expiration is performed in the following cases: + +* When a record is accessed: it is checked if the record is expired or not. +If it is expired, it is evicted and `null` is returned as the value to the caller. +* In the background: there is an expiration task that periodically (currently 5 seconds) scans records and evicts the expired records. + +Note that `max-idle-seconds` and `time-to-live-seconds` are configured under the `near-cache` configuration block, as seen in the above +<>. + +== Near Cache Invalidation + +Invalidation is the process of removing an entry from the Near Cache when its value is updated or +it is removed from the original data structure (to prevent stale reads). +Near Cache invalidation happens asynchronously at the cluster level, but synchronously at the current member. +This means that the Near Cache is invalidated within the whole cluster after the modifying operation is finished, +but updated from the current member before the modifying operation is done. +A modifying operation can be an EntryProcessor, an explicit update or remove as well as an expiration or eviction. +Generally, whenever the state of an entry changes in the record store by updating its value or removing it, the invalidation event is sent for that entry. + +Invalidations can be sent from members to client Near Caches or to member Near Caches, either individually or in batches. +Default behavior is sending in batches. If there are lots of mutating operations such as put/remove on data structures, +it is advised that you configure batch invalidations. +This reduces the network traffic and keeps the eventing system less busy, but may increase the delay of individual invalidations. + +You can use the following system properties to configure the Near Cache invalidation: + +* `hazelcast.map.invalidation.batch.enabled`: Enable or disable batching. +Its default value is `true`. When it is set to `false`, all invalidations are sent immediately. +* `hazelcast.map.invalidation.batch.size`: Maximum number of invalidations in a batch. Its default value is `100`. +* `hazelcast.map.invalidation.batchfrequency.seconds`: If the collected invalidations do not reach the configured batch size, +a background process sends them periodically. Its default value is `10` seconds. + +If there are a lot of clients or many mutating operations, batching should remain enabled and +the batch size should be configured with the `hazelcast.map.invalidation.batch.size` system property to a suitable value. + +== Near Cache Consistency + +=== Eventual Consistency + +Near Caches are invalidated by invalidation events. Invalidation events can be lost due to the fire-and-forget fashion of the eventing system. +If an event is lost, reads from Near Cache can indefinitely be stale. + +To solve this problem, Hazelcast provides +eventually consistent behavior for IMap/JCache Near Caches by detecting invalidation losses. +After detection of an invalidation loss, stale data is made unreachable and Near Cache's `get` calls to +that data are directed to the underlying IMap/JCache to fetch the fresh data. + +You can configure eventual consistency with the system properties below (same properties are valid for both member and client side Near Caches): + +* `hazelcast.invalidation.max.tolerated.miss.count`: Default value is 10. +If missed invalidation count is bigger than this value, relevant cached data is made unreachable. +* `hazelcast.invalidation.reconciliation.interval.seconds`: Default value is 60 seconds. +This is a periodic task that scans cluster members periodically to compare generated invalidation events with the received ones from Near Cache. + +=== Locally Initiated Changes + +For local invalidations, when a record is updated/removed, future reads will see this +update/remove to provide read-your-writes consistency. To achieve this consistency, Near Cache configuration provides the following update policies: + +* `INVALIDATE` +* `CACHE_ON_UPDATE` + +If you choose `INVALIDATE`, the entry is removed from the Near Cache after the update/remove occurs in +the underlying data structure and before the operation (get) returns to the caller. +Until the update/remove operation completes, the entry's old value can still be read from the Near Cache. + +If you choose `CACHE_ON_UPDATE`, the entry is updated after the +update/remove occurs in the underlying data structure and before the operation (put/get) returns to the caller. +If it is an update operation, it removes the entry and the new value is placed. +Until the update/remove operation completes, the entry's old value can still be read from the Near Cache. +Any threads reading the key after this point read the new entry. If the mutative operation was a remove, +the key will no longer exist in the Near Cache and the original copy in the member. + +== Near Cache Preloader + +The Near Cache preloader allows you to store the keys from a Near Cache to provide +a fast re-population of the previous hot data set after a Hazelcast Client has been restarted. +It is available on IMap and JCache clients. + +The Near Cache preloader stores the keys (not the values) of Near Cache entries in regular intervals. +You can define the initial delay using `store-initial-delay-seconds`; for example, if you know that your hot data set needs +some time to build up. You can configure the interval using `store-interval-seconds`, which determines how often +the key-set is stored. The persistence does not run continuously. Whenever the storage is scheduled, it is performed on the actual keys in the Near Cache. + +The Near Cache preloader is triggered on the first initialization of the data structure on the client, for example, `client.getMap("myNearCacheMap")`. +This schedules the preloader, which works in the background, so your application is not blocked. +The storage is enabled after the loading is completed. + +The configuration parameter `directory` is optional. +If you omit it, the base directory becomes the user working directory (normally where the JVM was started or +configured with the system property `user.dir`). +The storage filenames are always created from the Near Cache name. +So even if you use a wildcard name in the Near Cache Configuration, the preloader filenames are unique. + +NOTE: If you run multiple Hazelcast clients with enabled Near Cache preloader on the same machine, +you have to configure a unique storage filename for each client or run them from different user directories. +If two clients write into the same file, only the first client succeeds. +The following clients throw an exception as soon as the Near Cache preloader is triggered. + diff --git a/docs/modules/cluster-performance/pages/performance-tips.adoc b/docs/modules/cluster-performance/pages/performance-tips.adoc new file mode 100644 index 000000000..dca0287ed --- /dev/null +++ b/docs/modules/cluster-performance/pages/performance-tips.adoc @@ -0,0 +1,714 @@ += Performance Tips +:description: The production checklist provides a set of best practices and recommendations to ensure a smooth transition to a production environment which runs a Hazelcast cluster. +[[production-checklist]] + +To achieve good performance in your Hazelcast deployment, it is crucial to tune your +production environment. This section provides guidelines for tuning the performance though we also +recommend to run performance and stress tests to evaluate the application performance. + +[[basic-recs]] +== Basic Recommendations + +* Eight cores per Hazelcast server instance +* Minimum of 8 GB RAM per Hazelcast member (if not using the High-Density Memory Store) +* Dedicated NIC for each Hazelcast member +* Linux—any distribution +* Run all members within the same subnet +* Attach all members to the same network switch + +=== Using Operation Threads Efficiently + +By default, Hazelcast uses the machine's core count to determine the number of operation threads. Creating more +operation threads than this core count is highly unlikely to lead to an improved performance since there will be more context +switching, more thread notification, and so on. + +Especially if you have a system that does simple operations like put and get, +it is better to use a lower thread count than the number of cores. +The reason behind the increased performance +by reducing the core count is that the operations executed on the operation threads normally execute very fast and there can +be a very significant amount of overhead caused by thread parking and unparking. If there are fewer threads, a thread needs +to do more work, will block less and therefore needs to be notified less. + +=== Avoiding Random Changes + +Tweaking can be very rewarding because significant performance improvements are possible. By default, Hazelcast tries +to behave at its best for all situations, but this doesn't always lead to the best performance. So if you know what +you are doing and what to look for, it can be very rewarding to tweak. However, it is also important that tweaking should +be done with proper testing to see if there is actually an improvement. Tweaking without proper benchmarking +is likely going to lead to confusion and could cause all kinds of problems. In case of doubt, we recommend not to tweak. + +=== Creating the Right Benchmark Environment + +When benchmarking, it is important that the benchmark reflects your production environment. Sometimes with a calculated +guess, a representative smaller environment can be set up; but if you want to use the benchmark statistics to infer +how your production system is going to behave, you need to make sure that you get as close as your production setup as +possible. Otherwise, you are at risk of spotting the issues too late or focusing on the things which are not relevant. + +== Hardware + +**Uniform Hardware:** + +To maximize the efficiency and performance of Hazelcast, it's crucial to ensure that all cluster members are equipped with equal CPU, memory, and network resources. This uniformity prevents any single slow member from impeding the overall cluster performance. One effective strategy in achieving this is to allocate dedicated machine resources exclusively for Hazelcast services. + +By providing properly sized hardware or virtual hardware to each member, Hazelcast ensures that all members have ample resources without competing with other processes or services. This approach allows Hazelcast to distribute load evenly across all members and maintain predictable performance. In heterogeneous clusters where some machines are more powerful than others, weaker members can create bottlenecks, leading to underutilization of stronger members. Therefore, for optimal performance, it's advisable to use equivalent hardware for all Hazelcast members. + +**Minimal Recommendation:** + +Hazelcast is a lightweight framework and is reported to run well on devices +such as the Raspberry Pi Zero (1GHz single-core CPU, 512MB RAM). + +**Recommended Configuration:** + +We suggest at least 8 CPU cores or equivalent per member, as well as running a single Hazelcast member for each host. + +NOTE: For environments with _either_ fewer or more cores than 8 CPU, we recommend enabling Thread-Per-Core (TPC). For more info, see xref:cluster-performance:thread-per-core-tpc.adoc[]. + +As a starting point for data-intensive operations, consider machines such as AWS https://aws.amazon.com/ec2/instance-types/c5/[c5.2xlarge] +with: + +* 8 CPU cores +* 16 GB RAM +* 10 Gbps network + +**Single Member per Machine:** + +A Hazelcast member assumes it is alone on a machine, so we recommend not running multiple +Hazelcast members on a machine. Having multiple +members on a single machine is likely to result in worse performance than +running a single member, since there will be more +context switching, less batching, and so on. So unless it is proven that running multiple members on each machine does give a better +performance/behavior in your particular setup, it is best to run a single member per machine. + +**CPU:** + +Hazelcast can use hundreds of CPU cores efficiently by exploiting data and +task parallelism. Adding more CPU can therefore help with scaling the +CPU-bound computations. If you're using jobs and pipelines, read about the +xref:architecture:distributed-computing.adoc#cooperative-execution-engine[Execution model] +to understand how Hazelcast makes the computation parallel and design your pipelines according to it. + +By default, Hazelcast uses all available CPU. Starting two Hazelcast +instances on one machine therefore doesn't bring any performance benefit +as the instances would compete for the same CPU resources. + +Don't rely just on CPU usage when benchmarking your cluster. Simulate +production workload and measure the throughput and latency instead. The +task manager of Hazelcast can be configured to use the CPU aggressively. +As an example, see https://hazelcast.com/blog/idle-green-threads-in-jet/[this benchmark]: the CPU usage was close to 20% with just 1000 events/s. At 1m items/s +the CPU usage was 100% even though Jet still could push around 5 million +items/s on that machine. + +**Disk:** + +Hazelcast is an in-memory framework. Cluster disks aren't involved in regular +operations except for logging and thus are not critical for the cluster +performance. There are optional features of Hazelcast (such as Persistence and +CP Persistence) which can use disk space, but even when they are in use a +Hazelcast system is primarily in-memory. + +Consider using more performant disks if you use the following Hazelcast features: + +* xref:pipelines:sources-sinks.adoc[The file connector] for reading or writing to files on the cluster's file system. +* xref:storage:persistence.adoc[Persistence] for saving map data to disk. +* xref:cp-subsystem:persistence.adoc[CP Persistence] for strong resiliency guarantees when using the CP Subsystem. + +== Operating System + +Hazelcast works in many operating environments and some environments +have unique considerations. These are highlighted below. + +As a general suggestion, we recommend turning off the swapping at operating system level; see <>. + +[[solaris]]**Solaris:** + +Hazelcast is certified for Solaris SPARC. + +However, the following modules are not supported for the Solaris operating system: + +- `hazelcast-jet-grpc` +- `hazelcast-jet-protobuf` +- `hazelcast-jet-python` + +**Disable Transparent Huge Pages (THP):** + +Transparent Huge Pages (THP) is the Linux Memory Management +feature which aims to improve the application performance by +using the larger memory pages. In most of the cases it works fine +but for databases and in-memory data grids it usually causes a significant performance drop. +Since it's enabled on most of the Linux distributions, we do recommend disabling +it when you run Hazelcast. + +Use the following command to check if it's enabled: + +``` +cat /sys/kernel/mm/transparent_hugepage/enabled +cat /sys/kernel/mm/transparent_hugepage/defrag + +``` + +Or an alternative command if you run RHEL: + +``` +cat /sys/kernel/mm/redhat_transparent_hugepage/enabled +cat /sys/kernel/mm/redhat_transparent_hugepage/defrag +``` + +To disable it permanently, please see the corresponding documentation +for the Linux distribution that you use. Here is an example of the instructions +for RHEL: https://access.redhat.com/solutions/46111. + +[[disabling-swap-usage]] +**Disable Swap Usage:** + +Swapping behavior can be configured by setting the kernel parameter +(`/proc/sys/vm/swappiness`) and can be turned off completely by executing +`swapoff -a` as the root user in Linux systems. We highly recommend turning +off the swapping on the machines that run Hazelcast. When your operating systems +start swapping, garbage collection activities take much longer due to the low speed of disc access. + +The Linux kernel parameter, `vm.swappiness`, is a value from 0-100 that controls +the swapping of application data from physical memory to virtual memory on disk. +To prevent Linux kernel to start swapping memory to disk way too early, +we need to set the default of 60 to value between 0 and 10. +The higher the parameter value, the more aggressively inactive processes are +swapped out from physical memory. The lower the value, the less they are swapped, +forcing filesystem buffers to be emptied. In case swapping needs to be kept enabled, +we recommend setting the value between 0 and 10 to prevent the Linux kernel +to start swapping memory to disk way too early. + +``` +sudo sysctl vm.swappiness=10 +``` + +**VMWare ESX:** + +Hazelcast is certified on VMWare VSphere 5.5/ESXi 6.0. +Generally speaking, Hazelcast can use all the resources on a full machine. +Splitting a single physical machine into multiple virtual machines and +thereby dividing resources is not required. + +Consider the following for VMWare ESX: + +* Avoid sharing one Network Interface Card (NIC) between multiple virtual machine environments. A Hazelcast cluster is a distributed system and can be very network-intensive. Trying to share one physical NIC between multiple VMs may cause network-related performance problems. +* Avoid over-committing memory. Always use dedicated physical memory for guests running Hazelcast. +* Do not use memory ballooning. +* Be careful overcommitting CPU cores. Monitor CPU steal time metrics. +* Do not move guests while Hazelcast is running - for ESX this means disabling vMotion. If you want to use vMotion (live migration), first stop the Hazelcast cluster then restart it after the migration completes. +* Always enable verbose garbage collection (GC) logs in the Java Virtual Machine. When "Real" time is higher than "User" time, this may indicate virtualization issues. The JVM is not using the CPU to execute application code during garbage collection, and is probably waiting on input/output (I/O) operations. +* Note VMWare guests network types. +* Use pass-through hard disks/partitions; do not use image files. +* Configure partition groups to use a separate underlying physical machine for partition backups. +* If you want to use automatic snapshots, first stop the Hazelcast cluster then restart it after the snapshot. +* Network performance issues, including timeouts, might occur with LRO (Large Receive Offload) +enabled on Linux virtual machines and ESXi/ESX hosts. We have specifically had +this reported in VMware environments, but it could potentially impact other environments as well. +We strongly recommend disabling LRO when running in virtualized environments, see https://kb.vmware.com/s/article/1027511. + +**Windows:** + +According to a reported rare case, I/O threads can consume a lot of CPU cycles +unexpectedly, even in an idle state. This can lead to CPU usage going up to 100%. +This is reported not only for Hazelcast but for other GitHub projects as well. +The workaround for such cases is to supply the system property `-Dhazelcast.io.selectorMode=selectwithfix` on JVM startup. +See the related https://github.com/hazelcast/hazelcast/issues/7943#issuecomment-218586767[GitHub^] issue for more details. + +[[network-tuning]] +== Network + +Hazelcast uses the network internally to shuffle data and to replicate the +backups. The network is also used to read input data from and to write +results to remote systems or to do RPC calls when enriching. In fact a +lot of Hazelcast jobs are network-bound. A 1 Gbit network connection is a +recommended minimum, but using a 10 Gbit or faster network +can improve application performance. Also consider scaling the cluster +out (adding more members to the cluster) to distribute the load. + +Consider collocating a Hazelcast cluster with the data source and sink to avoid +moving data back and forth over the wire. If you must choose between colocating +Hazelcast with the source or sink, choose the source. Processed results are often +aggregated, so the size is reduced. + +A Hazelcast cluster is designed to run in a single LAN and can encounter unexpected +performance problems if a single cluster is split across multiple different networks. +Latency is the strongest constraint in most network scenarios, so deploying Hazelcast +clusters to a network with high or varying latencies (even on the same LAN) can lead +to unpredictable performance results. + +=== Dedicated Network Interface Controller for Hazelcast Members + +Provisioning a dedicated physical network interface controller (NIC) for +Hazelcast members ensures smooth flow of data, including business +data and cluster health checks, across servers. Sharing network interfaces +between a Hazelcast member and another application could result in choking the port, +thus causing unpredictable cluster behavior. + +=== TCP Buffer Size + +TCP uses a congestion window to determine how many packets it +can send at one time; the larger the congestion window, the higher the throughput. +The maximum congestion window is related to the amount of buffer +space that the kernel allocates for each socket. For each socket, +there is a default value for the buffer size, which you can change by using +a system library call just before opening the socket. You can adjust +the buffer sizes for both the receiving and sending sides of a socket. + +To achieve maximum throughput, it is critical to use the optimal TCP +socket buffer sizes for the links you are using to transmit data. +If the buffers are too small, the TCP congestion window will never open up fully, +therefore throttling the sender. If the buffers are too large, +the sender can overrun the receiver such that the sending host is +faster than the receiving host, which causes the receiver to drop packets +and the TCP congestion window to shut down. + +Typically, you can determine the throughput by the following formulae: + +* Transaction per second = buffer size / latency +* Buffer size = Round trip time * network bandwidth + +Hazelcast, by default, configures I/O buffers to 128KB; you can change these +using the following Hazelcast properties: + +* `hazelcast.socket.receive.buffer.size` +* `hazelcast.socket.send.buffer.size` + +The operating system has separate configuration for minimum, default and maximum socket buffer sizes, so it is not guaranteed that the socket buffers allocated to Hazelcast sockets will match the requested buffer size. + +On Linux, the following kernel parameters can be used to configure socket buffer sizes: + +* `net.core.rmem_max`: maximum socket receive buffer size in bytes +* `net.core.wmem_max`: maximum socket send buffer size in bytes +* `net.ipv4.tcp_rmem`: minimum, default and maximum receive buffer size per TCP socket +* `net.ipv4.tcp_wmem`: minimum, default and maximum send buffer size per TCP socket + +To make a temporary change to one of these values, use `sysctl`: +``` +$ sysctl net.core.rmem_max=2097152 +$ sysctl net.ipv4.tcp_rmem="8192 131072 6291456" +``` + +To apply changes permanently, edit file `/etc/sysctl.conf` e.g.: + +``` +$ vi /etc/sysctl.conf +net.core.rmem_max = 2097152 +net.ipv4.tcp_rmem = 8192 131072 6291456 +``` + +Check your Linux distribution's documentation for more information about configuring kernel parameters. + +== JVM + +Here are the essential tips: + +* Enable garbage collection (GC) logs; since Java is getting better and better at GC, use the latest LTS version. G1GC is the default recommended GC policy. +* Use High-Density Memory Store and a small heap; minimum and maximum heap size should be equal. +* Applications that do a lot of querying or data updates need more headroom. +* A basic tuning brings a huge benefit whereas more tuning may bring almost nothing else except complexity; no tuning is recommended unless needed. +* Tuning, if done, should be reviewed periodically. + +=== Garbage Collection + +Keeping track of GC statistics is vital to optimum performance, +especially if you run the JVM with large heap sizes. Tuning the garbage collector +for your use case is often a critical performance practice prior to deployment. +Likewise, knowing what baseline GC behavior looks like and +monitoring for behavior outside normal tolerances will keep you aware of +potential memory leaks and other pathological memory usage. + +To avoid long GC pauses and latencies from the Java Virtual Machine (JVM), we recommend 16 GB or less of maximum JVM heap. If xref:storage:high-density-memory.adoc[High-Density Memory] is enabled, no more than 8 GB of maximum JVM heap is recommended. Horizontal scaling of JVM memory is recommended over vertical scaling if you want to exceed these numbers. + +Enabling GC logs allows troubleshooting if performance problems occur. To enable GC +logging, use the following JVM arguments: + +``` +-Xlog:gc=debug:file=/tmp/gc.log:time,uptime,level,tags:filesize=100m,filecount=10 +``` + +=== Minimize Heap Usage + +The best way to minimize the performance impact of GC +is to keep heap usage small. Maintaining a small heap saves countless +hours of GC tuning and provides improved stability +and predictability across your entire application. +Even if your application uses very large amounts of data, you can still keep +your heap small by using Hazelcast's High-Density Memory Store. + +=== Azul Zing® and Zulu® Support + +Azul Systems, the industry’s only company exclusively focused on +Java and the Java Virtual Machine (JVM), builds fully supported, +certified standards-compliant Java runtime solutions that help +enabling real-time business. Zing is a JVM designed for enterprise +Java applications and workloads that require any combination of low +latency, high transaction rates, large working memory, and/or consistent +response times. Zulu and Zulu Enterprise are Azul’s certified, freely available +open source builds of OpenJDK with a variety of flexible support options, +available in configurations for the enterprise as well as custom and embedded systems. +Azul Zing is certified and supported in Hazelcast Enterprise. When deployed with Zing, +Hazelcast gains performance, capacity, and operational efficiency within the same infrastructure. +Additionally, you can directly use Hazelcast with Zulu without making any changes to your code. + +== Query Tuning + +=== Indexes for Queried Fields + +For queries on fields with ranges, you can use an ordered index. +Hazelcast, by default, caches the deserialized form of the object under +query in the memory when inserted into an index. This removes the overhead +of object deserialization per query, at the cost of increased heap usage. +See the xref:query:indexing-maps.adoc#indexing-ranged-queries[Indexing Ranged Queries section]. + +=== Composite Indexes + +Composite indexes are built on top of multiple map entry +attributes; thus, increase the performance of complex queries significantly +when used correctly. See the xref:query:indexing-maps.adoc#composite-indexes[Composite Indexes section] + +=== Parallel Query Evaluation & Query Thread Pool + +Setting the `hazelcast.query.predicate.parallel.evaluation` property +to `true` can speed up queries when using slow predicates or when there are huge +amount of entries per member. + +If you're using queries heavily, you can benefit from increasing query thread pools. +See the xref:query:querying-maps-predicates.adoc#configuring-the-query-thread-pool[Configuring the Query Thread Pool section]. + +=== In-Memory Format for Queries + +Setting the queried entries' in-memory format to `OBJECT` forces the objects +to be always kept in object format, resulting in faster access for queries, but also in +higher heap usage. It will also incur an object serialization step on every remote get operation. See the xref:data-structures:setting-data-format.adoc[Setting In-Memory Format section]. + +== Serialization Tuning + +Hazelcast supports a range of object serialization mechanisms, +each with their own costs and benefits. Choosing the best serialization +scheme for your data and access patterns can greatly increase the performance +of your cluster. + +For an overview of serialization options with comparative advantages and disadvantages, see xref:serialization:serialization.adoc[]. + +[[serialization-opt-recommendations]] +=== Serialization Optimization Recommendations + +* Use `IMap.set()` on maps instead of `IMap.put()` if you don’t +need the old value. This eliminates unnecessary deserialization of the old value. +* Set `use-native-byte-order` and `allow-unsafe` to `true` in Hazelcast's serialization configuration. +Setting these properties to `true` enables fast copy of primitive +arrays like `byte[]`, `long[]`, etc., in your object. +* Compression is supported only by `Serializable` and +`Externalizable`. It has not been applied to other serializable methods +because it is much slower (around three orders of magnitude slower than +not using compression) and consumes a lot of CPU. However, it can +reduce binary object size by an order of magnitude. +* When `enable-shared-object` is set to `true`, the Java serializer will +back-reference an object pointing to a previously serialized instance. +If set to `false`, every instance is considered unique and copied separately +even if they point to the same instance. The default configuration is false. + +See also the xref:serialization:serialization-configuration.adoc[Serialization Configuration Wrap-Up section] for details. + +[[exec-svc-opt]] +== Executor Service + +Hazelcast executor service is an extension of Java’s built-in executor service +that allows distributed execution and control of tasks. There are a number of +options for Hazelcast executor service that have an impact on performance as summarized below. + +=== Number of Threads + +An executor queue may be configured to have a specific number of +threads dedicated to executing enqueued tasks. Set the number of +threads (`pool-size` property in the executor service configuration) +appropriate to the number of cores available for execution. +Too few threads will reduce parallelism, leaving cores idle, while too +many threads will cause context switching overhead. +See the xref:computing:executor-service.adoc#configuring-executor-service[Configuring Executor Service section]. + +=== Bounded Execution Queue + +An executor queue may be configured to have a maximum number +of tasks (`queue-capacity` property in the executor service configuration). +Setting a bound on the number of enqueued tasks +will put explicit back pressure on enqueuing clients by throwing +an exception when the queue is full. This will avoid the overhead +of enqueuing a task only for it to be canceled because its execution +takes too long. It will also allow enqueuing clients to take corrective +action rather than blindly filling up work queues with tasks faster than they can be executed. +See the xref:computing:executor-service.adoc#configuring-executor-service[Configuring Executor Service section]. + +=== Avoid Blocking Operations in Tasks + +Any time spent blocking or waiting in a running task is thread +execution time wasted while other tasks wait in the queue. +Tasks should be written such that they perform no potentially +blocking operations (e.g., network or disk I/O) in their `run()` or `call()` methods. + +=== Locality of Reference + +By default, tasks may be executed on any member. Ideally, however, +tasks should be executed on the same machine that contains +the data the task requires to avoid the overhead of moving remote data to +the local execution context. Hazelcast executor service provides a number of +mechanisms for optimizing locality of reference. + +* Send tasks to a specific member: using `ExecutorService.executeOnMember()`, +you may direct execution of a task to a particular member +* Send tasks to a key owner: if you know a task needs to operate on a +particular map key, you may direct execution of that task to the member +that owns that key +* Send tasks to all or a subset of members: if, for example, you need to operate +on all the keys in a map, you may send tasks to all members such that each task +operates on the local subset of keys, then return the local result for +further processing + +=== Scaling Executor Services + +If you find that your work queues consistently reach their maximum +and you have already optimized the number of threads and locality +of reference, and removed any unnecessary blocking operations in your tasks, +you may first try to scale up the hardware of the overburdened members +by adding cores and, if necessary, more memory. + +When you have reached diminishing returns on scaling up +(such that the cost of upgrading a machine outweighs the benefits of the upgrade), +you can scale out by adding more members to your cluster. +The distributed nature of Hazelcast is perfectly suited to scaling out, +and you may find in many cases that it is as easy as just configuring and +deploying additional virtual or physical hardware. + +=== Executor Services Guarantees + +In addition to the regular distributed executor service, +Hazelcast also offers durable and scheduled executor services. +Note that when a member failure occurs, durable and scheduled executor +services come with "at least once execution of a task" guarantee, +while the regular distributed executor service has none. +See the xref:computing:durable-executor-service.adoc[Durable] and xref:computing:scheduled-executor-service.adoc[Scheduled] executor services. + +=== Work Queue Is Not Partitioned + +Each member-specific executor will have its own private work-queue. +Once a job is placed on that queue, it will not be taken by another member. +This may lead to a condition where one member has a lot of unprocessed +work while another is idle. This could be the result of an application +call such as the following: + +``` +for(;;){ + iexecutorservice.submitToMember(mytask, member) +} +``` + +This could also be the result of an imbalance caused by the application, +such as in the following scenario: all products by a particular manufacturer +are kept in one partition. When a new, very popular product gets released +by that manufacturer, the resulting load puts a huge pressure on that +single partition while others remain idle. + +=== Work Queue Has Unbounded Capacity by Default + +This can lead to `OutOfMemoryError` because the number of queued tasks +can grow without bounds. This can be solved by setting the `queue-capacity` property +in the executor service configuration. If a new task is submitted while the queue +is full, the call will not block, but will immediately throw a +`RejectedExecutionException` that the application must handle. + +=== No Load Balancing + +There is currently no load balancing available for tasks that can run +on any member. If load balancing is needed, it may be done by creating an +executor service proxy that wraps the one returned by Hazelcast. +Using the members from the `ClusterService` or member information from +`SPI:MembershipAwareService`, it could route "free" tasks to a specific member based on load. + +=== Destroying Executors + +An executor service must be shut down with care because it will +shut down all corresponding executors in every member and subsequent +calls to proxy will result in a `RejectedExecutionException`. +When the executor is destroyed and later a `HazelcastInstance.getExecutorService` +is done with the ID of the destroyed executor, a new executor will be created +as if the old one never existed. + +=== Exceptions in Executors + +When a task fails with an exception (or an error), this exception +will not be logged by Hazelcast by default. This comports with the +behavior of Java’s thread pool executor service, but it can make debugging difficult. +There are, however, some easy remedies: either add a try/catch in your runnable and +log the exception, or wrap the runnable/callable in a proxy that does the logging; +the last option keeps your code a bit cleaner. + +[[client-exec-pool-size]] +=== Client Executor Pool Size + +Hazelcast clients use an internal executor service +(different from the distributed executor service) to perform some of +its internal operations. By default, the thread pool for that executor service +is configured to be the number of cores on the client machine times five; e.g., on a 4-core +client machine, the internal executor service will have 20 threads. +In some cases, increasing that thread pool size may increase performance. + +[[ep]] +== Entry Processors + +Hazelcast allows you to update the whole or a +part of map or cache entries in an efficient and a lock-free way using +entry processors. + +By default the entry processor executes on a partition thread. A partition thread is responsible for handling +one or more partitions. The design of entry processor assumes users have fast user code execution of the `process()` method. +In the pathological case where the code is very heavy and executes in multi-milliseconds, this may create a bottleneck. + +We have a slow user code detector which can be used to log a warning +controlled by the following system properties: + +* `hazelcast.slow.operation.detector.enabled` (default: true) +* `hazelcast.slow.operation.detector.threshold.millis` (default: 10000) + +include::clusters:partial$ucn-migrate-tip.adoc[] + +The defaults catch extremely slow operations but you should set this +much lower, say to 1ms, at development time to catch entry processors +that could be problematic in production. These are good candidates for our optimizations. + +We have two optimizations: + +* `Offloadable` which moves execution off the partition thread to an executor thread +* `ReadOnly` which means we can avoid taking a lock on the key + +These are enabled very simply by implementing these interfaces in your entry processor. +These optimizations apply to the following map methods only: + +* `executeOnKey(Object, EntryProcessor)` +* `submitToKey(Object, EntryProcessor)` +* `submitToKey(Object, EntryProcessor, ExecutionCallback)` + +See the xref:computing:entry-processor.adoc[Entry Processors section]. + +[[tls-ssl-perf]] +== Security + +Here are the essential tips: + +* Security probably won’t be the first thing built +* But it needs to be considered from the outset, as it affects architecture, performance and coding +* Security can then be added before go-live without rework + +TLS/SSL can have a significant impact on performance. There are a few ways to +increase the performance. + +The first thing that can be done is making sure that AES intrinsics are used. +Modern CPUs (2010 or newer Westmere) have hardware support for AES encryption/decryption +and the JIT automatically makes use of these AES intrinsics. They can also be +explicitly enabled using `-XX:+UseAES -XX:+UseAESIntrinsics`, +or disabled using `-XX:-UseAES -XX:-UseAESIntrinsics`. + +A lot of encryption algorithms make use of padding because they encrypt/decrypt in +fixed sized blocks. If there is no enough data +for a block, the algorithm relies on random number generation to pad. Under Linux, +the JVM automatically makes use of `/dev/random` for +the generation of random numbers. `/dev/random` relies on entropy to be able to +generate random numbers. However, if this entropy is +insufficient to keep up with the rate requiring random numbers, it can slow down +the encryption/decryption since `/dev/random` will +block; it could block for minutes waiting for sufficient entropy . This can be fixed +by setting the `-Djava.security.egd=file:/dev/./urandom` system property. +For a more permanent solution, modify the +`/jre/lib/security/java.security` file, look for the +`securerandom.source=/dev/urandom` and change it +to `securerandom.source=file:/dev/./urandom`. Switching to `/dev/urandom` could +be controversial because `/dev/urandom` will not +block if there is a shortage of entropy and the returned random values could +theoretically be vulnerable to a cryptographic attack. +If this is a concern in your application, use `/dev/random` instead. + +Hazelcast's Java smart client automatically makes use of extra I/O threads +for encryption/decryption and this have a significant impact on the performance. +This can be changed using the `hazelcast.client.io.input.thread.count` and +`hazelcast.client.io.output.thread.count` client system properties. +By default it is 1 input thread and 1 output thread. If TLS/SSL is enabled, +it defaults to 3 input threads and 3 output threads. +Having more client I/O threads than members in the cluster does not lead to +an increased performance. So with a 2-member cluster, +2 in and 2 out threads give the best performance. + +[[hd]] +== High-Density Memory Store + +Hazelcast's High-Density Memory Store (HDMS) is an in-memory storage +option that uses native, off-heap memory to store object data +instead of the JVM heap. This allows you to keep data in the memory without +incurring the overhead of garbage collection (GC). HDMS capabilities are supported by +the map structure, JCache implementation, Near Cache, Hibernate caching, and Web Session replications. + +Available to Hazelcast Enterprise customers, HDMS is an ideal solution +for those who want the performance of in-memory data, need the predictability +of well-behaved Java memory management, and don’t want to spend time +and effort on meticulous and fragile GC tuning. + +If you use HDMS with large data sizes, +we recommend a large increase in partition count, starting with 5009 or higher. See the +<> above for more information. Also, if you intend +to preload very large amounts of data into memory (tens, hundreds, or thousands of gigabytes), +be sure to profile the data load time and to take that startup time into account prior to deployment. + +See the xref:storage:high-density-memory.adoc[HDMS section] to learn more. + +== Cluster Size + +Here are the essential tips: + +* Split-brain is a network break, it affects hosts +* You can’t stop a network from a physical or logical break +* If you have an even number of hosts, you make the problem worse +* If you have an odd number of hosts, you make the solution simpler +* Use an odd number of CP groups + +[[many-members]] +=== Clusters with Huge Amount of Members/Clients + +Very large clusters of hundreds of members are possible with Hazelcast, +but stability depends heavily on your network infrastructure and +ability to monitor and manage those many members. Distributed executions +in such an environment will be more sensitive to your application's +handling of execution errors, timeouts, and the optimization of task code. + +In general, you get better results with smaller clusters of Hazelcast members +running on more powerful hardware and a higher number of Hazelcast clients. +When running large numbers of clients, network stability is still a significant factor +in overall stability. If you are running in Amazon EC2, hosting clients +and members in the same zone is beneficial. Using Near Cache on read-mostly +data sets reduces server load and network overhead. You may also try increasing +the number of threads in the client executor pool. + +=== Data Amount + +Total data size should be calculated based on the combination of primary data and backup data. For example, if you have configured your cluster with a backup count of 2, then total memory consumed is actually 3x larger than the primary data size (primary + backup + backup). Partition sizes of 50MB or less are recommended. + +=== Map Entries + +Since entries with large size can bloat the network when deserialized, +we recommend keeping each map entry's size below 1 MB, and keeping the sizes of map entries relatively equal to each other. + +Hazelcast Platform can store terabytes of data, but having a single entry with a large size may cause stability issues. If you have such entries, you should redesign your domain objects and break them into smaller ones. + +=== Partitions + +The number of internal partitions a Hazelcast member uses can be xref:ROOT:capacity-planning.adoc#partition-count[configured], but must be uniform across all members in the cluster. +An optimal partition count and size establish a balance between the +number of partitions on each member and the data amount on each partition. +You can consider the following when deciding on a partition count. + +* The partition count should be a prime number. This helps to minimize the collision of keys across +partitions, ensuring more consistent lookup times. +* A partition count which is too low constrains the cluster. The count should +be large enough for a balanced data or task distribution so that each member +does not manage too few partitions. +* A partition size of 50MB or less typically ensures good performance. Larger clusters may be able to use up to 100MB partition sizes, but will likely also require larger JVM heap sizes to accomodate the increase in data flow. + +If you are a Hazelcast Enterprise customer using the High-Density Data Store with large data sizes, +we recommend a large increase in partition count, starting with 5009 or higher. + +The partition count cannot be easily changed after a cluster is created, so if you have a large cluster be sure to test and set an optimum partition count prior to deployment. If you need to change the partition count after a cluster is already running, you will need to schedule a maintenance window to entirely bring the cluster down. If your cluster uses the xref:storage:persistence.adoc[Persistence] or xref:cp-subsystem:persistence.adoc[CP Persistence] features, those persistent files will need to be removed after the cluster is shut down, as they contain references to the previous partition count. Once all member configurations are updated, and any persistent data structure files are removed, the cluster can be safely restarted. diff --git a/docs/modules/cluster-performance/pages/pipelining.adoc b/docs/modules/cluster-performance/pages/pipelining.adoc new file mode 100644 index 000000000..87f4b36ab --- /dev/null +++ b/docs/modules/cluster-performance/pages/pipelining.adoc @@ -0,0 +1,61 @@ +[[pipelining]] += Pipelining + +Here are the essential tips: + +* If you can write the job as SQL, the query optimizer can take care of it +* If you have to write a pipeline, it can be very hard to spot optimizations +* Focus on early filtering and early depletion + +With pipelining, you can send multiple +requests in parallel using a single thread and therefore can increase throughput. +As an example, suppose that the round trip time for a request/response +is 1 millisecond. If synchronous requests are used, for example `IMap.get()`, then the maximum throughput out of these requests from +a single thread is 1/001 = 1000 operations/second. One way to solve this problem is to introduce multithreading to make +the requests in parallel. For the same example, if we use two threads, then the maximum throughput doubles from 1000 +operations/second, to 2000 operations/second. + +However, introducing threads for the sake of executing requests isn't always convenient and doesn't always lead to an optimal +performance; this is where pipelining can be used. Instead of using multiple threads to have concurrent invocations, +you can use asynchronous method calls such as `IMap.getAsync()`. If you would use two asynchronous calls from a single thread, +then the maximum throughput is 2*(1/001) = 2000 operations/second. Therefore, to benefit from the pipelining, asynchronous calls need to +be made from a single thread. The pipelining is a convenience implementation to provide back pressure - that is, controlling +the number of inflight operations - and provides a convenient way to wait for all the results. + +[source,java] +---- +Pipelining pipelining = new Pipelining(10); +for (long k = 0; k < 100; k++) { + int key = random.nextInt(keyDomain); + pipelining.add(map.getAsync(key)); +} +// wait for completion +List results = pipelining.results(); +---- + +In the above example, we make 100 asynchronous `map.getAsync()` calls, but the maximum number of inflight calls is 10. + +By increasing the depth of the pipelining, throughput can be increased. The pipelining has its own back pressure, you do not +need to enable the <> on the client or member to have this feature on the pipelining. However, if you have many +pipelines, you may still need to enable the client/member back pressure because it is possible to overwhelm the system +with requests in that situation. See the <> to learn how to enable it on the client or member. + +You can use the pipelining both on the clients and members. You do not need a special configuration, it works out-of-the-box. + +The pipelining can be used for any asynchronous call. You can use it for IMap asynchronous get/put methods as well as for +ICache, IAtomicLong, etc. It cannot be used as a transaction mechanism though. So you cannot do some calls and throw away the pipeline and expect that +none of the requests are executed. If you want to use an atomic behavior, see xref:transactions:providing-xa-transactions.adoc[Transactions] for more details. +The pipelining is just a performance optimization, not a mechanism for atomic behavior. + +[CAUTION] +.Deprecation Notice for Transactions +==== +Transactions have been deprecated, and will be removed as of Hazelcast version 7.0. An improved version of this feature is under consideration. If you are already using transactions, get in touch and share your use case. Your feedback will help us to develop a solution that meets your needs. +==== + +The pipelines are cheap and should frequently be replaced because they accumulate results. It is fine to have a few hundred or +even a few thousand calls being processed with the pipelining. However, all the responses to all requests are stored in the pipeline +as long as the pipeline is referenced. So if you want to process a huge number of requests, then every few hundred or few +thousand calls wait for the pipelining results and just create a new instance. + +Note that the pipelines are not thread-safe. They must be used by a single thread. diff --git a/docs/modules/cluster-performance/pages/threading.adoc b/docs/modules/cluster-performance/pages/threading.adoc new file mode 100644 index 000000000..00eb481a2 --- /dev/null +++ b/docs/modules/cluster-performance/pages/threading.adoc @@ -0,0 +1,338 @@ += Threading +:description: Your application server has its own threads. Hazelcast does not use these; it manages its own threads. +{description} + +NOTE: For information on threading in Thread-Per-Core (TPC) environments, see xref:thread-per-core-tpc.adoc[]. + +== I/O Threading + +Hazelcast uses a pool of threads for I/O. A single thread does not perform all the I/O. +Instead, multiple threads perform the I/O. On each cluster member, the I/O threading is split up in three types of I/O threads: + +* I/O thread for the accept requests +* I/O threads to read data from other members/clients +* I/O threads to write data to other members/clients + +You can configure the number of I/O threads using the `hazelcast.io.thread.count` system property. +Its default value is `3`, which is applied to each member. If the default is used, in total there are 7 I/O threads: +one accept I/O thread, three read I/O threads and three write I/O threads. Each I/O thread has +its own Selector instance and waits on the `Selector.select` if there is nothing to do. + +NOTE: You can also specify counts for input and output threads separately. +There are `hazelcast.io.input.thread.count` and `hazelcast.io.output.thread.count` properties for this purpose. +See the xref:ROOT:system-properties.adoc[System Properties appendix] for information about these properties and how to set them. + +Hazelcast periodically scans utilization of each I/O thread and +can decide to migrate a connection to a new thread if +the existing thread is servicing a disproportionate number of I/O events. +You can customize the scanning interval by configuring the `hazelcast.io.balancer.interval.seconds` system property; +its default interval is 20 seconds. You can disable the balancing process by setting this property to a negative value. + +In case of the read I/O thread, when sufficient bytes for a packet have been received, the `Packet` object is created. This `Packet` object is +then sent to the system where it is de-multiplexed. If the `Packet` header signals that it is an operation/response, the `Packet` is handed +over to the operation service (see the <>). If the `Packet` is an event, it is handed +over to the event service (see the <>). + +== Event Threading + +Hazelcast uses a shared event system to deal with components that rely on events, such as topic, collections, listeners and Near Cache. + +Each cluster member has an array of event threads and each thread has its own work queue. When an event is produced, +either locally or remotely, an event thread is selected (depending on if there is a message ordering) and the event is placed +in the work queue for that event thread. + +You can set the following properties +to alter the system's behavior: + +* `hazelcast.event.thread.count`: Number of event-threads in this array. Its default value is 5. +* `hazelcast.event.queue.capacity`: Capacity of the work queue. Its default value is 1000000. +* `hazelcast.event.queue.timeout.millis`: Timeout for placing an item on the work queue in milliseconds. Its default value is 250 milliseconds. + +If you process a lot of events and have many cores, changing the value of `hazelcast.event.thread.count` property to +a higher value is good practice. This way, more events can be processed in parallel. + +Multiple components share the same event queues. If there are two topics, say A and B, for certain messages +they may share the same queue(s) and hence the same event thread. If there are a lot of pending messages produced by A, then B needs to wait. +Also, when processing a message from A takes a lot of time and the event thread is used for that, B suffers from this. +That is why it is better to offload processing to a dedicated thread (pool) so that systems are better isolated. + +If the events are produced at a higher rate than they are consumed, the queue grows in size. To prevent overloading the system +and running into an `OutOfMemoryException`, the queue is given a capacity of one million items. When the maximum capacity is reached, the items are +dropped. This means that the event system is a 'best effort' system. There is no guarantee that you are going to get an +event. Topic A might have a lot of pending messages and therefore B cannot receive messages because the queue +has no capacity and messages for B are dropped. + +== IExecutor Threading + +Executor threading is straight forward. When a task is received to be executed on Executor E, then E will have its +own `ThreadPoolExecutor` instance and the work is placed in the work queue of this executor. +Thus, Executors are fully isolated, but still share the same underlying hardware - most importantly the CPUs. + +You can configure the IExecutor using the `ExecutorConfig` (programmatic configuration) or +using `` (declarative configuration). See also the xref:computing:executor-service.adoc#configuring-executor-service[Configuring Executor Service section]. + +== Operation Threading + +The following are the operation types: + +* operations that are aware of a certain partition; for example., `IMap.get(key)` +* operations that are not partition aware; for example, `IExecutorService.executeOnMember(command, member)` + +Each of these operation types has a different threading model explained in the following sections. + +=== Partition-aware Operations + +To execute partition-aware operations, an array of operation threads is created. +The default value of this array's size is the number of cores and it has a minimum value of `2`. +This value can be changed using the `hazelcast.operation.thread.count` property. + +Each operation thread has its own work queue and it consumes messages from this work queue. If a partition-aware +operation needs to be scheduled, the right thread is found using the formula below. + +`threadIndex = partitionId % partition thread-count` + +After the `threadIndex` is determined, the operation is put in the work queue of that operation thread. This means the following: + +* A single operation thread executes operations for multiple partitions; +if there are 271 partitions and 10 partition threads, then roughly every operation thread executes operations for 27 partitions. +* Each partition belongs to only one operation thread. +All operations for a partition are always handled by exactly the same operation thread. +* Concurrency control is not needed to deal with partition-aware operations because +once a partition-aware operation is put in the work queue of a partition-aware operation thread, only one thread is able to touch that partition. + +Because of this threading strategy, there are two forms of false sharing you need to be aware of: + +* False sharing of the partition - two completely independent data structures share the same partition. +For example, if there is a map `employees` and a map `orders`, +the method `employees.get("peter")` running on partition 25 may be blocked by +the method `orders.get(1234)` also running on partition 25. +If independent data structures share the same partition, a slow operation on one data structure can slow down the other data structures. +* False sharing of the partition-aware operation thread - each operation thread is responsible for executing + operations on a number of partitions. For example, *thread 1* could be responsible for partitions 0, 10, 20 and so on, and *thread-2* could be responsible for partitions + 1, 11, 21 and so on. If an operation for partition 1 takes a lot of time, it blocks the execution of an operation for partition + 11 because both of them are mapped to the same operation thread. + +You need to be careful with long-running operations because you could starve operations of a thread. +As a general rule, the partition thread should be released as soon as possible because operations are not designed +as long-running operations. That is why, for example, it is very dangerous to execute a long-running operation +using `AtomicReference.alter()` or an `IMap.executeOnKey()`, because these operations block other operations to be executed. + +Currently, there is no support for work stealing. Different partitions that map to the same thread may need to wait +until one of the partitions is finished, even though there are other free partition-aware operation threads available. + +**Example:** + +Take a cluster with three members. Two members have 90 primary partitions and one member has 91 primary partitions. Let's +say you have one CPU and four cores per CPU. By default, four operation threads will be allocated to serve 90 or 91 partitions. + +=== Non-Partition-aware Operations + +To execute operations that are not partition-aware - for example, `IExecutorService.executeOnMember(command, member)` - generic operation +threads are used. When the Hazelcast instance is started, an array of operation threads is created. The size of this array +has a default value of the number of cores divided by two with a minimum value of `2`. It can be changed using the +`hazelcast.operation.generic.thread.count` property. + +A non-partition-aware operation thread does not execute an operation for a specific partition. Only partition-aware + operation threads execute partition-aware operations. + +Unlike the partition-aware operation threads, all the generic operation threads share the same work queue: `genericWorkQueue`. + +If a non-partition-aware operation needs to be executed, it is placed in that work queue and any generic operation +thread can execute it. The big advantage is that you automatically have work balancing since any generic operation +thread is allowed to pick up work from this queue. + +The disadvantage is that this shared queue can be a point of contention. You may not see this contention in +production since performance is dominated by I/O and the system does not run many non-partition-aware operations. + +=== Priority Operations + +In some cases, the system needs to run operations with a higher priority; for example, an important system operation. +To support priority operations, Hazelcast has the following features: + +* For partition-aware operations: Each partition thread has its own work queue and it also has a priority + work queue. The partition thread always checks the priority queue before it processes work from its normal work queue. +* For non-partition-aware operations: Next to the `genericWorkQueue`, there is also a `genericPriorityWorkQueue`. When a priority operation + needs to be run, it is put in the `genericPriorityWorkQueue`. Like the partition-aware operation threads, a generic + operation thread first checks the `genericPriorityWorkQueue` for work. + +Since a worker thread blocks the normal work queue (either partition specific or generic), a priority operation +may not be picked up because it is not put in the queue where it is blocked. Hazelcast always sends a 'kick the worker' operation that +only triggers the worker to wake up and check the priority queue. + +=== Operation-response and Invocation-future + +When an Operation is invoked, a `Future` is returned. See the example code below. + +[source,java] +---- +GetOperation operation = new GetOperation( mapName, key ); +Future future = operationService.invoke( operation ); +future.get(); +---- + +The calling side blocks the thread for a reply. In this case, `GetOperation` is set in the work queue for the partition of `key`, where +it eventually is executed. Upon execution, a response is returned and placed on the `genericWorkQueue` where it is executed by a +"generic operation thread". This thread signals the `future` and notifies the blocked thread that a response is available. +Hazelcast has a plan of exposing this `future` to the outside world, and we will provide the ability to register a completion listener so you can perform asynchronous calls. + +=== Local Calls + +When a local partition-aware call is done, an operation is made and handed over to the work queue of the correct partition operation thread, +and a `future` is returned. When the calling thread calls `get` on that `future`, it acquires a lock and waits for the result +to become available. When a response is calculated, the `future` is looked up and the waiting thread is notified. + +In the future, this will be optimized to reduce the amount of expensive systems calls, such as `lock.acquire()`/`notify()` and the expensive +interaction with the operation-queue. Probably, we will add support for a caller-runs mode, so that an operation is directly run on +the calling thread. + +== CPU Thread Affinity + +Hazelcast offers configuring CPU threads so that you have a lot better control +on the latency and a better throughput. This configuration provides you +with the CPU thread affinity, where certain threads can have affinity for particular CPUs. + +The following affinity configurations are available for a member: + +``` +-Dhazelcast.io.input.thread.affinity=1-3 +-Dhazelcast.io.output.thread.affinity=3-5 +-Dhazelcast.operation.thread.affinity=7-10,13 +-Dhazelcast.operation.response.thread.affinity=15,16 +``` + +The following affinity configurations are available for a client: + +``` +-Dhazelcast.client.io.input.thread.affinity=1-4 +-Dhazelcast.client.io.output.thread.affinity=5-8 +-Dhazelcast.client.response.thread.affinity=7-9 +``` + +You can set the CPU thread affinity properties shown above only on the command line. + +Let's have a look at how we define the values for the above configuration +properties: + +* **Individual CPUs**, for example `1,2,3`: This means there are going to be +three threads. The first thread runs on CPU 1, the second thread on CPU 2, and so on. +* **CPU ranges**, for example `1-3`: Shortcut syntax for `1,2,3`. +* **Group**, for example `[1-3]`: This configures three threads and each of +these threads can run on CPU 1, 2 and 3. +* **Group with thread count**, for example `[1-3]:2`: This configures two +threads and each of these two threads can run on CPU 1, 2 and 3. + +You can also combine those, for example `1,2,[5-7],[10,12,16]:2`. + +Note that, the syntax for CPU thread affinity shown above not only determines +the mapping of CPUs to threads, it also determines the thread count. +If you use CPU thread affinity - for example, `hazelcast.io.input.thread.affinity` - +then `hazelcast.io.input.thread.count` is ignored. See <> for more +information about specifying explicit thread counts. + +If you don't configure affinity for a category of threads, it means they can run on any CPU. + +Let's look at an example. Assuming you have the `numactl` utility, run +the following command on your machine to see the mapping between the NUMA +nodes and threads: + +``` +numactl --hardware +``` + +An example output is shown below: + +``` +available: 2 nodes (0-1) +node 0 cpus: 0 1 2 3 4 5 6 7 8 9 20 21 22 23 24 25 26 27 28 29 +node 0 size: 393090 MB +node 0 free: 372729 MB +node 1 cpus: 10 11 12 13 14 15 16 17 18 19 30 31 32 33 34 35 36 37 38 39 +node 1 size: 393216 MB +node 1 free: 343296 MB +node distances: +node 0 1 + 0: 10 21 + 1: 21 10 +``` + +If you want to configure 20 threads on NUMA node 0 and 20 threads on NUMA node 1, +and confine the threads to these NUMA nodes, you can use the following configuration: + +``` +-Dhazelcast.operation.thread.affinity=[0-9,20-29],[10-19,30-39] +``` + +Refer to https://en.wikipedia.org/wiki/Non-uniform_memory_access[Non-uniform memory access^] +for information about NUMA nodes. + +== SlowOperationDetector + +The `SlowOperationDetector` monitors the operation threads and collects information about all slow operations. +An `Operation` is a task executed by a generic or partition thread (see xref:performance:threading-model.adoc#operation-threading[Operation Threading]). +An operation is considered as slow when it takes more computation time than the configured threshold. + +The `SlowOperationDetector` stores the fully qualified classname of the operation and its stacktrace as well as +operation details, start time and duration of each slow invocation. All collected data is available in +the xref:{page-latest-supported-mc}@management-center:monitor-imdg:monitor-members.adoc[Management Center]. + +The `SlowOperationDetector` is configured using the following system properties. + +* `hazelcast.slow.operation.detector.enabled` +* `hazelcast.slow.operation.detector.log.purge.interval.seconds` +* `hazelcast.slow.operation.detector.log.retention.seconds` +* `hazelcast.slow.operation.detector.stacktrace.logging.enabled` +* `hazelcast.slow.operation.detector.threshold.millis` + +See the xref:ROOT:system-properties.adoc[System Properties appendix] for explanations of these properties. + +=== Logging of Slow Operations + +The detected slow operations are logged as warnings in the Hazelcast log files: + +``` +WARN 2015-05-07 11:05:30,890 SlowOperationDetector: [127.0.0.1]:5701 + Slow operation detected: com.hazelcast.map.impl.operation.PutOperation + Hint: You can enable the logging of stacktraces with the following config + property: hazelcast.slow.operation.detector.stacktrace.logging.enabled +WARN 2015-05-07 11:05:30,891 SlowOperationDetector: [127.0.0.1]:5701 + Slow operation detected: com.hazelcast.map.impl.operation.PutOperation + (2 invocations) +WARN 2015-05-07 11:05:30,892 SlowOperationDetector: [127.0.0.1]:5701 + Slow operation detected: com.hazelcast.map.impl.operation.PutOperation + (3 invocations) +``` + +Stacktraces are always reported to the Management Center, but by default they are not printed to keep the log size small. +If logging of stacktraces is enabled, the full stacktrace is printed every 100 invocations. +All other invocations print a shortened version. + +=== Purging of Slow Operation Logs + +Since a Hazelcast cluster can run for a very long time, Hazelcast purges the slow operation logs periodically to prevent an OOME. +You can configure the purge interval and the retention time for each invocation. + +The purging removes each invocation whose retention time is exceeded. +When all invocations are purged from a slow operation log, the log is deleted. + +[[int-response-queue]] +== Setting Response Thread + +You can set the response thread for internal operations both on the members and clients. +By setting the backoff mode on and depending on the use case, you can get a +5-10% performance improvement. However, this increases the CPU utilization. +To enable backoff mode please set the following property for Hazelcast cluster members: + +``` +-Dhazelcast.operation.responsequeue.idlestrategy=backoff +``` + +For Hazelcast clients, use the following property to enable backoff: + +``` +-Dhazelcast.client.responsequeue.idlestrategy=backoff +``` + + + + + diff --git a/docs/modules/clusters/pages/legacy-ucd.adoc b/docs/modules/clusters/pages/legacy-ucd.adoc index 71bf4fe7f..248d7957e 100644 --- a/docs/modules/clusters/pages/legacy-ucd.adoc +++ b/docs/modules/clusters/pages/legacy-ucd.adoc @@ -3,11 +3,11 @@ {description} -{ucn} is only available to {enterprise-product-name} Edition users. +{ucn} is only available to {enterprise-product-name} users. -Hazelcast recommends that {enterprise-product-name} Edition users migrate to {ucn} to benefit from the improved and extended capabilities that it offers. +Hazelcast recommends that {enterprise-product-name} users migrate to {ucn} to benefit from the improved and extended capabilities that it offers. -After the next major release, {open-source-product-name} users can either upgrade to {enterprise-product-name} Edition, or add their resources to the Hazelcast member class paths. +After the next major release, {open-source-product-name} users can either upgrade to {enterprise-product-name}, or add their resources to the Hazelcast member class paths. For further information on migrating from {ucd} to {ucn}, see the xref:clusters:ucn-migrate-ucd.adoc[] topic. diff --git a/docs/modules/clusters/pages/ucn-migrate-ucd.adoc b/docs/modules/clusters/pages/ucn-migrate-ucd.adoc index 6466ae70f..8e062bc32 100644 --- a/docs/modules/clusters/pages/ucn-migrate-ucd.adoc +++ b/docs/modules/clusters/pages/ucn-migrate-ucd.adoc @@ -1,5 +1,5 @@ = Migrate from {ucd} -:description: {ucn} improves and extends the capabilities of {ucd} and is applicable to more use cases. Hazelcast recommends that {enterprise-product-name} Edition users deprecate their {ucd} implementations and migrate to using {ucn}. +:description: {ucn} improves and extends the capabilities of {ucd} and is applicable to more use cases. Hazelcast recommends that {enterprise-product-name} users deprecate their {ucd} implementations and migrate to using {ucn}. :page-enterprise: true :page-beta: false diff --git a/docs/modules/clusters/partials/ucn-migrate-tip.adoc b/docs/modules/clusters/partials/ucn-migrate-tip.adoc index be173f1bd..88b7e6ec7 100644 --- a/docs/modules/clusters/partials/ucn-migrate-tip.adoc +++ b/docs/modules/clusters/partials/ucn-migrate-tip.adoc @@ -1 +1 @@ -CAUTION: {ucd} has been deprecated and will be removed in the next major version. To continue deploying your user code after this time, {open-source-product-name} users can either upgrade to {enterprise-product-name} Edition, or add their resources to the Hazelcast member class paths. Hazelcast recommends that {enterprise-product-name} users migrate their user code to use {ucn}. For further information on migrating from {ucd} to {ucn}, see the xref:clusters:ucn-migrate-ucd.adoc[] topic. \ No newline at end of file +CAUTION: {ucd} has been deprecated and will be removed in the next major version. To continue deploying your user code after this time, {open-source-product-name} users can either upgrade to {enterprise-product-name}, or add their resources to the Hazelcast member class paths. Hazelcast recommends that {enterprise-product-name} users migrate their user code to use {ucn}. For further information on migrating from {ucd} to {ucn}, see the xref:clusters:ucn-migrate-ucd.adoc[] topic. \ No newline at end of file diff --git a/docs/modules/computing/partials/nav.adoc b/docs/modules/computing/partials/nav.adoc index c99e2aecf..c7f307927 100644 --- a/docs/modules/computing/partials/nav.adoc +++ b/docs/modules/computing/partials/nav.adoc @@ -1,4 +1,5 @@ * Java Executor Service ** xref:computing:executor-service.adoc[] ** xref:computing:durable-executor-service.adoc[] -** xref:computing:scheduled-executor-service.adoc[] \ No newline at end of file +** xref:computing:scheduled-executor-service.adoc[] +** xref:computing:entry-processor.adoc[] \ No newline at end of file diff --git a/docs/modules/configuration/partials/nav.adoc b/docs/modules/configuration/partials/nav.adoc index 400c46b10..a26dc7d23 100644 --- a/docs/modules/configuration/partials/nav.adoc +++ b/docs/modules/configuration/partials/nav.adoc @@ -13,3 +13,4 @@ *** xref:configuration:dynamic-config-programmatic-api.adoc[Programmatic API] *** xref:configuration:dynamic-config-update-and-reload.adoc[Update and Reload with REST] *** xref:configuration:dynamic-config-persistence.adoc[Configuration Options] +** xref:phone-homes.adoc[] diff --git a/docs/modules/data-connections/pages/data-connections-configuration.adoc b/docs/modules/data-connections/pages/data-connections-configuration.adoc index 0caf6a1fa..1e37b1fc8 100644 --- a/docs/modules/data-connections/pages/data-connections-configuration.adoc +++ b/docs/modules/data-connections/pages/data-connections-configuration.adoc @@ -86,7 +86,7 @@ SQL:: Data connections created in SQL behave differently to those defined in members' configuration files or in Java. -- To retain SQL-defined data connections after a cluster restart, you must enable xref:storage:configuring-persistence.adoc#sql[SQL metadata persistence]. This feature is available in the {enterprise-product-name} Edition. +- To retain SQL-defined data connections after a cluster restart, you must enable xref:storage:configuring-persistence.adoc#sql[SQL metadata persistence]. This feature is available in the {enterprise-product-name}. - You can create or drop a data connection using SQL commands. To update a data connection, you need to drop and then recreate it. [source,sql] @@ -226,7 +226,7 @@ SQL:: -- Data connections created in SQL behave differently to those defined in members' configuration files or in Java. -- To retain SQL-defined data connections after a cluster restart, you must enable xref:storage:configuring-persistence.adoc#sql[SQL metadata persistence]. This feature is available in the {enterprise-product-name} Edition. +- To retain SQL-defined data connections after a cluster restart, you must enable xref:storage:configuring-persistence.adoc#sql[SQL metadata persistence]. This feature is available in the {enterprise-product-name}. - You can create or drop a data connection using SQL commands. To update a data connection, you need to drop and then recreate it. [source,sql] @@ -355,7 +355,7 @@ SQL:: -- Data connections created in SQL behave differently to those defined in members' configuration files or in Java. -- To retain SQL-defined data connections after a cluster restart, you must enable xref:storage:configuring-persistence.adoc#sql[SQL metadata persistence]. This feature is available in the {enterprise-product-name} Edition. +- To retain SQL-defined data connections after a cluster restart, you must enable xref:storage:configuring-persistence.adoc#sql[SQL metadata persistence]. This feature is available in the {enterprise-product-name}. - You can create or drop a data connection using SQL commands. To update a data connection, you need to drop and then recreate it. [source,SQL,subs="attributes+"] diff --git a/docs/modules/data-connections/pages/mongo-dc-configuration.adoc b/docs/modules/data-connections/pages/mongo-dc-configuration.adoc index 82e9d2aae..6dd21c265 100644 --- a/docs/modules/data-connections/pages/mongo-dc-configuration.adoc +++ b/docs/modules/data-connections/pages/mongo-dc-configuration.adoc @@ -100,7 +100,7 @@ SQL:: -- Data connections created in SQL behave differently from those defined in members' configuration files or in Java. -- To retain SQL-defined data connections after a cluster restart, you must enable xref:storage:configuring-persistence.adoc#sql[SQL metadata persistence]. This feature is available in the {enterprise-product-name} Edition. +- To retain SQL-defined data connections after a cluster restart, you must enable xref:storage:configuring-persistence.adoc#sql[SQL metadata persistence]. This feature is available in the {enterprise-product-name}. - You can create or drop a data connection using SQL commands. To update a data connection, you need to drop and then recreate it. [source,SQL] diff --git a/docs/modules/data-structures/pages/cpmap.adoc b/docs/modules/data-structures/pages/cpmap.adoc index 7794c82e5..fbafdc9c6 100644 --- a/docs/modules/data-structures/pages/cpmap.adoc +++ b/docs/modules/data-structures/pages/cpmap.adoc @@ -19,9 +19,9 @@ This reliability makes it ideal for storing sensitive information such as user b Due to ``CPMap``s distributed nature, above operations involve remote calls. This means that the calls might take longer to complete than non-distributed counterparts, such as ``HashMap``s. -NOTE: `CPMap` is only available in the {enterprise-product-name} Edition. +NOTE: `CPMap` is only available in the {enterprise-product-name}. Your license must include `ADVANCED_CP` to activate this feature and you must use the -{enterprise-product-name} Edition client. +{enterprise-product-name} client. There is no unsafe variant of `CPMap`, unlike other CP data structures. Therefore, CP must be xref:cp-subsystem:configuration.adoc#quickstart-configuration[enabled,window=_blank] before using `CPMap`. diff --git a/docs/modules/data-structures/partials/nav.adoc b/docs/modules/data-structures/partials/nav.adoc index ea09729b2..6f0dcf36d 100644 --- a/docs/modules/data-structures/partials/nav.adoc +++ b/docs/modules/data-structures/partials/nav.adoc @@ -1,5 +1,6 @@ * Distributed Data Structures ** xref:data-structures:distributed-data-structures.adoc[Overview] +** xref:cache:overview.adoc[] ** Map *** xref:data-structures:map.adoc[Overview] *** xref:data-structures:map-config.adoc[] diff --git a/docs/modules/deploy/pages/enterprise-licenses.adoc b/docs/modules/deploy/pages/enterprise-licenses.adoc index b6b83b839..975345c91 100644 --- a/docs/modules/deploy/pages/enterprise-licenses.adoc +++ b/docs/modules/deploy/pages/enterprise-licenses.adoc @@ -9,7 +9,7 @@ // tag::get-license[] Hazelcast {enterprise-product-name} requires a license key. You can get a -30-day trial license from the link:https://hazelcast.com/get-started[Hazelcast website]. +30-day trial license from the link:https://hazelcast.com/get-started?utm_source=docs-website[Hazelcast website]. [[license-key-format]]License keys have the following format: diff --git a/docs/modules/deploy/partials/nav.adoc b/docs/modules/deploy/partials/nav.adoc index 1005f5ca1..e69de29bb 100644 --- a/docs/modules/deploy/partials/nav.adoc +++ b/docs/modules/deploy/partials/nav.adoc @@ -1,6 +0,0 @@ -* Public Cloud -** xref:deploy:deploying-in-cloud.adoc[Overview] -** xref:deploy:deploying-on-aws.adoc[] -** xref:deploy:deploying-on-azure.adoc[] -** xref:deploy:deploying-on-gcp.adoc[] -* xref:deploy:deploying-with-docker.adoc[] \ No newline at end of file diff --git a/docs/modules/getting-started/pages/blue-green.adoc b/docs/modules/getting-started/pages/blue-green.adoc index 082c31ae6..18ea49ae9 100644 --- a/docs/modules/getting-started/pages/blue-green.adoc +++ b/docs/modules/getting-started/pages/blue-green.adoc @@ -51,9 +51,9 @@ Run the following command on the terminal: docker run \ --network hazelcast-network \ --rm \ - -e HZ_NETWORK_PUBLICADDRESS=:5701 \ + -e HZ_NETWORK_PUBLICADDRESS=:5701 \ <1> -e HZ_CLUSTERNAME=blue \ - -e HZ_LICENSEKEY= \ <1> + -e HZ_LICENSEKEY= \ <2> -p 5701:5701 hazelcast/hazelcast-enterprise:{full-version} ---- <1> Replace the `` placeholder with the IP address of your Docker host. @@ -81,9 +81,9 @@ Start another local single-member cluster called `green`. docker run \ --network hazelcast-network \ --rm \ - -e HZ_NETWORK_PUBLICADDRESS=:5702 \ + -e HZ_NETWORK_PUBLICADDRESS=:5702 \ <1> -e HZ_CLUSTERNAME=green \ - -e HZ_LICENSEKEY= \ <1> + -e HZ_LICENSEKEY= \ <2> -p 5702:5701 hazelcast/hazelcast-enterprise:{full-version} ---- <1> Replace the `` placeholder with the IP address of your Docker host. diff --git a/docs/modules/getting-started/pages/editions.adoc b/docs/modules/getting-started/pages/editions.adoc index 8a7aa8bfc..f9841267c 100644 --- a/docs/modules/getting-started/pages/editions.adoc +++ b/docs/modules/getting-started/pages/editions.adoc @@ -1,16 +1,16 @@ = Hazelcast Editions and Distributions -:description: Hazelcast offers {enterprise-product-name} and {open-source-product-name} editions of its member software, each available as either a full or slim distribution. +:description: Hazelcast offers {enterprise-product-name} and {open-source-product-name} versions of its software, each available as either a full or slim distribution. :page-aliases: before-you-begin.adoc {description} -== {enterprise-product-name} Edition +== {enterprise-product-name} -Hazelcast {enterprise-product-name} edition is a commercial product that extends the {open-source-product-name} offering to provide you with many benefits, including the security suite, blue/green deployments for clients, and a xref:support#customer-support.adoc[Hazelcast Customer Support] subscription. +Hazelcast {enterprise-product-name} is a commercial product that extends the {open-source-product-name} offering to provide you with many benefits, including the security suite, blue/green deployments for clients, and a xref:support#customer-support.adoc[Hazelcast Customer Support] subscription. -TIP: In this documentation, topics related to an {enterprise-product-name} edition feature include an [.enterprise]*{enterprise-product-name}* label. +TIP: In this documentation, topics related to an {enterprise-product-name} feature include an [.enterprise]*{enterprise-product-name}* label. -The {enterprise-product-name} edition offers the following features, which are not available in the {open-source-product-name} edition: +The {enterprise-product-name} offers the following features, which are not available in the {open-source-product-name} : * xref:security:overview.adoc[Security suite] * xref:wan:wan.adoc[] @@ -27,16 +27,16 @@ The {enterprise-product-name} edition offers the following features, which are n NOTE: All pre-existing Hazelcast licenses, such as the Pro/Enterprise/Enterprise HD licenses, are valid for Hazelcast Platform. -== {open-source-product-name} Edition +== {open-source-product-name} -Hazelcast {open-source-product-name} edition is free and covered by an Apache License, Version 2.0 +Hazelcast {open-source-product-name} is free and covered by an Apache License, Version 2.0 and a https://hazelcast.com/hazelcast-community-license/[Hazelcast Community License]. The permissions granted in these licenses allow you to do the following: -* Use the {open-source-product-name} edition -* Reproduce the {open-source-product-name} edition -* Distribute the {open-source-product-name} edition with any open and/or closed source applications +* Use the {open-source-product-name} +* Reproduce the {open-source-product-name} +* Distribute the {open-source-product-name} with any open and/or closed source applications == Full and Slim Distributions [[full-slim]] @@ -51,8 +51,8 @@ Other installation options offer only the full distribution. You can find more information on installing the Hazelcast editions in the following topics: -* For the {enterprise-product-name} edition, see the xref:install-enterprise.adoc[] topic -* For the {open-source-product-name} edition, see the xref:install-hazelcast.adoc[] topic +* For the {enterprise-product-name}, see the xref:install-enterprise.adoc[] topic +* For the {open-source-product-name}, see the xref:install-hazelcast.adoc[] topic === Full Distribution diff --git a/docs/modules/getting-started/pages/install-enterprise.adoc b/docs/modules/getting-started/pages/install-enterprise.adoc index 55248b502..1bd5ed31e 100644 --- a/docs/modules/getting-started/pages/install-enterprise.adoc +++ b/docs/modules/getting-started/pages/install-enterprise.adoc @@ -1,10 +1,10 @@ = Installing Hazelcast {enterprise-product-name} -:description: pass:q[To install Hazelcast Enterprise, you can use Docker, the binary, or Java. To unlock the Enterprise features, you must install your Enterprise license key.] +:description: To install Hazelcast {enterprise-product-name}, you can use Docker, the binary, or Java. To unlock the {enterprise-product-name} features, you must install your {enterprise-product-name} license key. :page-aliases: deploy:using-enterprise-edition.adoc, get-started-enterprise.adoc {description} -NOTE: These instructions apply only to the licensed {enterprise-product-name} edition, which provides additional features such as the security suite and blue/green client deployment. If you have the {open-source-product-name} edition, follow the instructions in the xref:install-hazelcast.adoc[] topic. +NOTE: These instructions apply only to the licensed {enterprise-product-name}, which provides additional features such as the security suite and blue/green client deployment. If you have the {open-source-product-name}, follow the instructions in the xref:install-hazelcast.adoc[] topic. Both slim and full distributions are available. For further information on the available editions and distributions, see the xref:editions.adoc[] topic. diff --git a/docs/modules/getting-started/pages/install-hazelcast.adoc b/docs/modules/getting-started/pages/install-hazelcast.adoc index b119574a9..ecbec0634 100644 --- a/docs/modules/getting-started/pages/install-hazelcast.adoc +++ b/docs/modules/getting-started/pages/install-hazelcast.adoc @@ -1,10 +1,10 @@ = Installing Hazelcast {open-source-product-name} -:description: pass:q[You can install the Open Source edition of Hazelcast, using the command line interface (CLI), Docker, the binary, or Java.] +:description: You can install the {open-source-product-name} of Hazelcast, using the command line interface (CLI), Docker, the binary, or Java. :page-aliases: installing-using-download-archives.adoc, deploy:running-in-modular-java.adoc {description} -NOTE: These instructions apply only to the {open-source-product-name} edition. If you have a license, which allows you to take advantage of the additional features offered by {enterprise-product-name} edition, follow the instructions in the xref:install-enterprise.adoc[] topic. +NOTE: These instructions apply only to the {open-source-product-name}. If you have a license, which allows you to take advantage of the additional features offered by {enterprise-product-name}, follow the instructions in the xref:install-enterprise.adoc[] topic. Both slim and full distributions are available. For further information on the available editions and distributions, see the xref:editions.adoc[] topic. diff --git a/docs/modules/getting-started/pages/persistence.adoc b/docs/modules/getting-started/pages/persistence.adoc index 80af5bec6..6dafb550d 100644 --- a/docs/modules/getting-started/pages/persistence.adoc +++ b/docs/modules/getting-started/pages/persistence.adoc @@ -41,12 +41,12 @@ You'll start the first member in a cluster called `hello-world`. Run the followi docker run \ --name first-member --network hazelcast-network \ --rm \ - -e HZ_NETWORK_PUBLICADDRESS=:5701 \ + -e HZ_NETWORK_PUBLICADDRESS=:5701 \ <1> -e HZ_CLUSTERNAME=hello-world \ - -e HZ_LICENSEKEY= \ <1> - -e HZ_PERSISTENCE_ENABLED=true \ <2> - -e HZ_MAP_MYDISTRIBUTEDMAP_DATAPERSISTENCE_ENABLED=true \ <3> - -v ~/persist:/opt/hazelcast/persistence \ <4> + -e HZ_LICENSEKEY= \ <2> + -e HZ_PERSISTENCE_ENABLED=true \ <3> + -e HZ_MAP_MYDISTRIBUTEDMAP_DATAPERSISTENCE_ENABLED=true \ <4> + -v ~/persist:/opt/hazelcast/persistence \ <5> -p 5701:5701 hazelcast/hazelcast-enterprise:{full-version} ---- <1> Replace the `` placeholder with the IP address of your Docker host. diff --git a/docs/modules/getting-started/pages/support.adoc b/docs/modules/getting-started/pages/support.adoc index ff1dbb6fe..e325f6fbb 100644 --- a/docs/modules/getting-started/pages/support.adoc +++ b/docs/modules/getting-started/pages/support.adoc @@ -67,3 +67,12 @@ systems specific information and details. Please consider the above for prompt help from the team and note that the more information is provided upfront the better. Lastly, be prompt in your communication with Hazelcast support - helps to ensure timely resolution of issues. + +== Developer Resources + +* Hazelcast source code can be found at https://github.com/hazelcast/hazelcast[GitHub/Hazelcast^]. +This is also where you can contribute and report issues. +* Hazelcast API can be found at https://docs.hazelcast.org/docs/latest/javadoc/[hazelcast.org/docs/Javadoc^]. +* Code samples can be downloaded from https://github.com/hazelcast/hazelcast-code-samples[GitHub/hazelcast-code-samples^]. +* More use cases and resources can be found at http://www.hazelcast.com[hazelcast.com^]. +* xref:getting-started:support.adoc[Support information]. \ No newline at end of file diff --git a/docs/modules/getting-started/pages/wan.adoc b/docs/modules/getting-started/pages/wan.adoc index ebe75e5c7..b6761200f 100644 --- a/docs/modules/getting-started/pages/wan.adoc +++ b/docs/modules/getting-started/pages/wan.adoc @@ -43,8 +43,8 @@ which means it does not need WAN Replication to be configured. Run the following ---- docker run \ --network hazelcast-network \ - -e HZ_NETWORK_PUBLICADDRESS=:5701 \ - -e HZ_LICENSEKEY= \ <1> + -e HZ_NETWORK_PUBLICADDRESS=:5701 \ <1> + -e HZ_LICENSEKEY= \ <2> -e HZ_CLUSTERNAME=london \ hazelcast/hazelcast-enterprise:{full-version} ---- diff --git a/docs/modules/getting-started/partials/nav.adoc b/docs/modules/getting-started/partials/nav.adoc index f21687629..139597f9c 100644 --- a/docs/modules/getting-started/partials/nav.adoc +++ b/docs/modules/getting-started/partials/nav.adoc @@ -1,15 +1,2 @@ -* Start a Local Cluster -** xref:getting-started:get-started-cli.adoc[CLI] -** xref:getting-started:get-started-docker.adoc[Docker] -** xref:getting-started:get-started-binary.adoc[Binary] -** xref:getting-started:get-started-java.adoc[Java Embedded] -* Explore {enterprise-product-name} Features -** xref:getting-started:enterprise-overview.adoc[] -** xref:getting-started:persistence.adoc[] -** xref:getting-started:authenticate-clients.adoc[] -** xref:getting-started:wan.adoc[] -** xref:getting-started:blue-green.adoc[] -* xref:getting-started:support.adoc[] -* xref:getting-started:resources.adoc[] -* xref:ROOT:glossary.adoc[] + diff --git a/docs/modules/integrate/partials/nav.adoc b/docs/modules/integrate/partials/nav.adoc index ddee698cc..54bcfe6f2 100644 --- a/docs/modules/integrate/partials/nav.adoc +++ b/docs/modules/integrate/partials/nav.adoc @@ -1,36 +1,5 @@ * Connector Guides -** xref:integrate:connectors.adoc[Overview] -** Files -// Files need an overview (options, what's available for SQL, what's available for Jet API) -*** xref:integrate:file-connector.adoc[] -*** xref:integrate:legacy-file-connector.adoc[] -** Kafka Connect Connectors -*** xref:integrate:kafka-connect-connectors.adoc[Source Connectors] -** Messaging System Connectors -*** xref:integrate:messaging-system-connectors.adoc[Overview] -*** xref:integrate:kafka-connector.adoc[] -*** xref:integrate:kinesis-connector.adoc[] -*** xref:integrate:jms-connector.adoc[] -*** xref:integrate:pulsar-connector.adoc[] -** Data Structure Connectors -// Need an overview (options, what's available for SQL, what's available for JetAPI) -*** xref:integrate:jcache-connector.adoc[] -*** xref:integrate:list-connector.adoc[] -*** xref:integrate:map-connector.adoc[] -*** xref:integrate:reliable-topic-connector.adoc[] -** Databases -*** xref:integrate:database-connectors.adoc[Overview] -*** xref:integrate:jdbc-connector.adoc[] -*** xref:integrate:cdc-connectors.adoc[] -*** xref:integrate:elasticsearch-connector.adoc[] -*** xref:integrate:mongodb-connector.adoc[] -*** xref:integrate:influxdb-connector.adoc[] -*** xref:integrate:redis-connector.adoc[] -** xref:integrate:test-connectors.adoc[] -** xref:integrate:socket-connector.adoc[] -** xref:integrate:http-connector.adoc[] -** xref:integrate:twitter-connector.adoc[] -** xref:integrate:custom-connectors.adoc[] + diff --git a/docs/modules/kubernetes/pages/deploying-in-kubernetes.adoc b/docs/modules/kubernetes/pages/deploying-in-kubernetes.adoc index fffeafcd8..a1bcedca1 100644 --- a/docs/modules/kubernetes/pages/deploying-in-kubernetes.adoc +++ b/docs/modules/kubernetes/pages/deploying-in-kubernetes.adoc @@ -82,7 +82,7 @@ Explore some step-by-step guides about how to use Hazelcast in Kubernetes. === Getting Started -* link:https://guides.hazelcast.org/kubernetes/[Deploy Hazelcast Cluster on Kubernetes] +* xref:tutorials:kubernetes.adoc[Deploy Hazelcast using Kubernetes] === Hazelcast Platform Operator diff --git a/docs/modules/kubernetes/partials/nav.adoc b/docs/modules/kubernetes/partials/nav.adoc index cfbb24bd0..5dd952a5b 100644 --- a/docs/modules/kubernetes/partials/nav.adoc +++ b/docs/modules/kubernetes/partials/nav.adoc @@ -1,5 +1,6 @@ * Kubernetes ** xref:kubernetes:deploying-in-kubernetes.adoc[Overview] +** xref:tutorials:kubernetes.adoc[Deploy Hazelcast using Kubernetes] ** xref:kubernetes:deploying-in-kubernetes.adoc#hazelcast-platform-operator-for-kubernetesopenshift[Hazelcast Platform Operator] ** xref:kubernetes:kubernetes-auto-discovery.adoc[Discovery and Partitioning] ** xref:kubernetes:kubernetes-persistence.adoc[Persistence] diff --git a/docs/modules/maintain-cluster/pages/enterprise-rest-api.adoc b/docs/modules/maintain-cluster/pages/enterprise-rest-api.adoc index 33d009914..beb5b8961 100644 --- a/docs/modules/maintain-cluster/pages/enterprise-rest-api.adoc +++ b/docs/modules/maintain-cluster/pages/enterprise-rest-api.adoc @@ -9,7 +9,7 @@ You can interact with the API using various tools and platforms such as cURL, RE In this section, we use cURL to provide the request examples. -NOTE: REST API is only available in the {enterprise-product-name} Edition. +NOTE: REST API is only available in the {enterprise-product-name}. Hazelcast also offered a Community Edition REST API, but this has been deprecated and will be removed as of Hazelcast version 7.0. For more info, see xref:rest-api.adoc[Community Edition REST API]. == Enabling REST API diff --git a/docs/modules/maintain-cluster/pages/rest-api.adoc b/docs/modules/maintain-cluster/pages/rest-api.adoc index b37535594..693bc9826 100644 --- a/docs/modules/maintain-cluster/pages/rest-api.adoc +++ b/docs/modules/maintain-cluster/pages/rest-api.adoc @@ -430,7 +430,7 @@ NOTE: Some of the REST calls listed below need their REST endpoint groups to be See the <> on how to enable them. Also note that the value of `$\{PASSWORD}` in the following calls is checked only if -the security is xref:security:enabling-jaas.adoc[enabled] in Hazelcast, i.e., if you have Hazelcast {enterprise-product-name} Edition. +the security is xref:security:enabling-jaas.adoc[enabled] in Hazelcast, i.e., if you have Hazelcast {enterprise-product-name}. If the security is disabled, the `$\{PASSWORD}` can be left empty. [cols="5a"] @@ -884,7 +884,7 @@ security: ---- ==== -Note that you should enable security in the configuration, i.e., you should have the Hazelcast {enterprise-product-name} edition. +Note that you should enable security in the configuration, i.e., you should have the Hazelcast {enterprise-product-name}. Assuming we have the above authentication configuration, the following is a REST call for the `/hazelcast/rest/management/cluster/state` endpoint, which includes the username and password as call parameters: ``` diff --git a/docs/modules/maintain-cluster/pages/shutdown.adoc b/docs/modules/maintain-cluster/pages/shutdown.adoc index 6a45c9d69..216cd168a 100644 --- a/docs/modules/maintain-cluster/pages/shutdown.adoc +++ b/docs/modules/maintain-cluster/pages/shutdown.adoc @@ -14,10 +14,10 @@ CLI:: -- NOTE: To use the CLI to shut down a cluster, you must first xref:clients:rest.adoc[enable the REST API]. -```bash +[source,bash] +---- bin/hz-cluster-admin -a
-c -o shutdown -``` --- +---- REST API:: + -- @@ -30,15 +30,19 @@ NOTE: To use the REST API to shut down a cluster, you must first xref:clients:re curl --data "${CLUSTERNAME}&${PASSWORD}" http://127.0.0.1:${PORT}/hazelcast/rest/management/cluster/clusterShutdown ---- -- + Java:: + -- -```java +[source,java] +---- HazelcastInstance.getCluster().shutdown() -``` +---- -- + Management Center:: + + -- Use Hazelcast Management Center to xref:{page-latest-supported-mc}@management-center:monitor-imdg:cluster-administration.adoc#cluster-state[shut down your whole Hazelcast cluster]. diff --git a/docs/modules/maintain-cluster/partials/nav.adoc b/docs/modules/maintain-cluster/partials/nav.adoc index fdc4602ef..7b47dd110 100644 --- a/docs/modules/maintain-cluster/partials/nav.adoc +++ b/docs/modules/maintain-cluster/partials/nav.adoc @@ -5,6 +5,4 @@ ** xref:maintain-cluster:member-attributes.adoc[Filtering Members with Attributes] ** xref:maintain-cluster:lite-members.adoc[Lite Members] ** xref:maintain-cluster:shutdown.adoc[Shutting Down] -** xref:maintain-cluster:enterprise-rest-api.adoc[REST API] -*** xref:maintain-cluster:rest-api.adoc[Community Edition REST API] ** xref:management:cluster-utilities.adoc[Cluster Utilities] diff --git a/docs/modules/migrate/pages/data-migration-tool.adoc b/docs/modules/migrate/pages/data-migration-tool.adoc index 80eb90663..7dbead9ee 100644 --- a/docs/modules/migrate/pages/data-migration-tool.adoc +++ b/docs/modules/migrate/pages/data-migration-tool.adoc @@ -1,5 +1,5 @@ = Using the Data Migration Tool -:description: You can use the Data Migration Tool (DMT) to migrate your data from version 4.x or 5.x {open-source-product-name} and {enterprise-product-name} Edition Hazelcast clusters when upgrading to 5.3.x or later versions of {enterprise-product-name} Edition, or moving to the latest {hazelcast-cloud} release. The DMT can also be used for infrastructure consolidation or separation with selective migration of application data between clusters. +:description: You can use the Data Migration Tool (DMT) to migrate your data from version 4.x or 5.x {open-source-product-name} and {enterprise-product-name} Hazelcast clusters when upgrading to 5.3.x or later versions of {enterprise-product-name}, or moving to the latest {hazelcast-cloud} release. The DMT can also be used for infrastructure consolidation or separation with selective migration of application data between clusters. {description} @@ -7,9 +7,9 @@ NOTE: The DMT migrates your data for maps and replicated maps only. Replicated m The DMT is typically used in the following situations: -* When migrating from an {open-source-product-name} cluster to an {enterprise-product-name} Edition cluster -* When migrating from an earlier version of {enterprise-product-name} Edition to a newer version. Such a migration can move directly between specified versions, even if several minor versions exist between them -* When migrating from an on-premise cluster to a self-managed {enterprise-product-name} Edition cluster in the cloud +* When migrating from an {open-source-product-name} cluster to an {enterprise-product-name} cluster +* When migrating from an earlier version of {enterprise-product-name} to a newer version. Such a migration can move directly between specified versions, even if several minor versions exist between them +* When migrating from an on-premise cluster to a self-managed {enterprise-product-name} cluster in the cloud * When migrating from an on-premise cluster to a {hazelcast-cloud} cluster * When you want to migrate specific application data from one cluster to another due to infrastructure changes @@ -59,7 +59,7 @@ Ensure that you have installed the following: When using the DMT, bear the following in mind: * You can run only one migration at a time -* The target cluster must be on 5.3.x {enterprise-product-name} Edition or the latest {hazelcast-cloud} release +* The target cluster must be on 5.3.x {enterprise-product-name} or the latest {hazelcast-cloud} release NOTE: {hazelcast-cloud} Trial and {hazelcast-cloud} Standard have a limit of 14GB of primary data. If you require more, you must use {hazelcast-cloud} Dedicated. For further information on the available {hazelcast-cloud} editions, refer to the https://docs.hazelcast.com/cloud/overview[Hazelcast {hazelcast-cloud}^] documentation. @@ -106,7 +106,7 @@ The clusters work to migrate your data as illustrated below: image::ROOT:dmt_diagram.png[DMT Clusters] -==== Limited Migration Cluster License +=== Limited Migration Cluster License A 10-node limited license is included for use with your migration cluster. @@ -192,6 +192,7 @@ for i in {1..1000}; do clc -c source.yaml map --name my-map set key-$i value-$i Windows:: + +-- [source,shell] ---- for /l %x in (1, 1, 1000) do clc -c source.yaml map --name my-map set key-%x value-%x --quiet @@ -231,7 +232,7 @@ NOTE: If you have multiple data structures, use a new line for each map name. Ensure that the target cluster is running on one of the following: -* {enterprise-product-name} Edition version 5.3.2 or later +* {enterprise-product-name} version 5.3.2 or later * {hazelcast-cloud} ==== Update the Target Configuration diff --git a/docs/modules/migrate/pages/upgrading-from-imdg-3.adoc b/docs/modules/migrate/pages/upgrading-from-imdg-3.adoc index 353d6369c..ade142190 100644 --- a/docs/modules/migrate/pages/upgrading-from-imdg-3.adoc +++ b/docs/modules/migrate/pages/upgrading-from-imdg-3.adoc @@ -85,7 +85,7 @@ for your specific situation. If you are doing lots of queries or other tasks which are CPU-bound, e.g., aggregations, you probably want to have as many cores available to partition operations as possible. -See xref:cluster-performance:best-practices.adoc#threading-model[Threading Model] for more information on Hazelcast's threading model. +See xref:cluster-performance:threading.adoc[Threading Model] for more information on Hazelcast's threading model. == Optimizing for Single Threaded Usages diff --git a/docs/modules/performance/partials/performance-nav.adoc b/docs/modules/performance/partials/performance-nav.adoc index 4d0bb732e..e69de29bb 100644 --- a/docs/modules/performance/partials/performance-nav.adoc +++ b/docs/modules/performance/partials/performance-nav.adoc @@ -1,11 +0,0 @@ -* xref:performance:performance.adoc[] -** xref:performance:pipelining.adoc[] -** xref:performance:data-affinity.adoc[] -** xref:performance:cpu-thread-affinity.adoc[] -** xref:performance:running-on-ec2.adoc[] -** xref:performance:back-pressure.adoc[] -** xref:performance:threading-model.adoc[] -** xref:performance:slowoperationdetector.adoc[] -** xref:performance:near-cache.adoc[] -** xref:performance:caching-deserialized-values.adoc[] -** xref:performance:best-practices.adoc[] \ No newline at end of file diff --git a/docs/modules/pipelines/pages/cdc.adoc b/docs/modules/pipelines/pages/cdc.adoc index 375e22486..a30649095 100644 --- a/docs/modules/pipelines/pages/cdc.adoc +++ b/docs/modules/pipelines/pages/cdc.adoc @@ -157,7 +157,7 @@ You should see the following jars: * hazelcast-jet-cdc-mysql-{full-version}.jar * hazelcast-jet-cdc-postgres-{full-version}.jar + -WARNING: If you have Hazelcast {enterprise-product-name} Edition, you need to manually download the MySQL CDC plugin from Hazelcast's Maven https://repo1.maven.org/maven2/com/hazelcast/jet/hazelcast-jet-cdc-mysql/{full-version}/hazelcast-jet-cdc-mysql-{full-version}-jar-with-dependencies.jar[repository] and then copy it to the `lib/` directory. +WARNING: If you have Hazelcast {enterprise-product-name}, you need to manually download the MySQL CDC plugin from Hazelcast's Maven https://repo1.maven.org/maven2/com/hazelcast/jet/hazelcast-jet-cdc-mysql/{full-version}/hazelcast-jet-cdc-mysql-{full-version}-jar-with-dependencies.jar[repository] and then copy it to the `lib/` directory. . Start Hazelcast. + diff --git a/docs/modules/pipelines/pages/job-security.adoc b/docs/modules/pipelines/pages/job-security.adoc index 169fd35c3..dd5836a28 100644 --- a/docs/modules/pipelines/pages/job-security.adoc +++ b/docs/modules/pipelines/pages/job-security.adoc @@ -11,7 +11,7 @@ To secure your cluster against malicious jobs, you have the following options: NOTE: You cannot run SQL queries when the Jet engine is disabled. - <> [.enterprise]*{enterprise-product-name}*. + -WARNING: The {open-source-product-name} edition of Hazelcast does not include security settings. To secure your Hazelcast members against clients misusing jobs, you can only <> or <>. +WARNING: The {open-source-product-name} of Hazelcast does not include security settings. To secure your Hazelcast members against clients misusing jobs, you can only <> or <>. == Disabling Code Uploads diff --git a/docs/modules/plugins/pages/hazelcast-plugins.adoc b/docs/modules/plugins/pages/hazelcast-plugins.adoc index 30d99810b..43767843b 100644 --- a/docs/modules/plugins/pages/hazelcast-plugins.adoc +++ b/docs/modules/plugins/pages/hazelcast-plugins.adoc @@ -1,4 +1,9 @@ = Hazelcast Plugins [[hazelcast-plugins]] -This chapter describes the plugins using which you can extend Hazelcast's functionalities. +There are a number of Hazelcast and community plugins available for extending Hazelcast functionality and integrating with other products and services: + +* xref:plugins:cloud-discovery.adoc[] +* xref:plugins:web-session-replication.adoc[] +* xref:plugins:framework-integration.adoc[] +* xref:plugins:other-integrations.adoc[] diff --git a/docs/modules/query/partials/nav.adoc b/docs/modules/query/partials/nav.adoc index 8cb880d77..8b1378917 100644 --- a/docs/modules/query/partials/nav.adoc +++ b/docs/modules/query/partials/nav.adoc @@ -1 +1 @@ -* xref:query:predicate-overview.adoc[Predicates API] + diff --git a/docs/modules/release-notes/pages/5-4-0.adoc b/docs/modules/release-notes/pages/5-4-0.adoc new file mode 100644 index 000000000..357819fe0 --- /dev/null +++ b/docs/modules/release-notes/pages/5-4-0.adoc @@ -0,0 +1,336 @@ += 5.4.0 Release Notes + +CAUTION: Starting with this release of Hazelcast Platform, the minimum supported Java version is 17. + +== New Features + +* [.enterprise]*Enterprise* **User Code Namespaces**: Enable deployment and redeployment of your custom classes. See the xref:clusters:user-code-namespaces.adoc[User Code Namespaces documentation]. +* [.enterprise]*Enterprise* **Thread-Per-Core (TPC)**: TPC is now generally available. You can enable this feature on the clients and cluster members for improved performance. See the xref:cluster-performance:thread-per-core-tpc.adoc[Thread-Per-Core (TPC) documentation]. +* [.enterprise]*Enterprise* **CPMap**: Added CPMap as a minimal key-value CP data structure. See xref:data-structures:cpmap.adoc[CPMap documentation]. +https://github.com/hazelcast/hazelcast/pull/25802[#25802] + +== Breaking Changes + +* The `MergingValue` interface within the SPI package now requires the `getDeserializedValue()` method to be defined within implementations, replacing the `getValue()` definition. +https://github.com/hazelcast/hazelcast/pull/25942[#25942] +* Moved the MongoDB connector to the extensions module, that is, its classes and related dependencies relocated; +if you are using Maven to install the connector, you must add `jar-with-dependencies` to your `pom.xml`. +Also removed the permissions for MongoDB connector. +https://github.com/hazelcast/hazelcast/pull/25744[#25744], https://github.com/hazelcast/hazelcast/pull/25701[#25701] +* Method names used as parameters in https://docs.hazelcast.org/docs/latest/javadoc/com/hazelcast/security/SecurityInterceptor.html[`SecurityInterceptor`] checks were reviewed and unified into a single place - class `com.hazelcast.security.SecurityInterceptorConstants`. Some client messages have the method name changed to reflect their purpose better. Some client messages are newly covered by `SecurityInterceptor` checks now. ++ +https://github.com/hazelcast/hazelcast/pull/25020[#25020] +* Renamed the service port for Hazelcast clusters deployed in Kubernetes environments to `hazelcast`. +The previous name, `hazelcast-service-port`, caused member auto-discovery for embedded deployments to fail. +https://github.com/hazelcast/hazelcast/pull/24834[#24834], https://github.com/hazelcast/hazelcast/issues/24705[#24705], https://github.com/hazelcast/hazelcast/issues/24688[#24688] +* Fixed an issue where Hazelcast was not failing fast when a member is started with a blank public address. This has been fixed by introducing a configuration validation +that might break any existing, but invalid, configuration. +https://github.com/hazelcast/hazelcast/pull/24729[#24729] + +== Enhancements + +=== SQL/Jet Engine + +* Removed the beta annotations from the data connection classes. +https://github.com/hazelcast/hazelcast/pull/26000[#26000] +* Replaced the user-defined types (UDTs) feature flag with the cyclic UDT feature flag, to prevent querying such type data. +https://github.com/hazelcast/hazelcast/pull/25977[#25977] +* Added support for loading and storing a single column as the value using the `GenericMapStore`. +https://github.com/hazelcast/hazelcast/pull/25878[#25878] +* Each Jet engine job was creating a client for connecting to the remote cluster, resulting in an excessive number of clients. This has been fixed by introducing a single data connection, which can be reused for all job instances. +https://github.com/hazelcast/hazelcast/pull/25754[#25754], https://github.com/hazelcast/hazelcast/pull/25731[#25731] +* Added support for resolving fields from Avro schemas in Kafka mappings. +https://github.com/hazelcast/hazelcast/pull/25935[#25935] +* Changed the exception type from `CancellationException` to `CancellationByUserException` when the user cancels a job before it is initialized. +https://github.com/hazelcast/hazelcast/pull/25383[#25383] +* Added nested field support for Avro serialization format. +https://github.com/hazelcast/hazelcast/pull/25269[#25269] +* Removed the redundant sort and merge operations in sorted index scans, for the computations where the index order is not needed, for example, aggregations. +https://github.com/hazelcast/hazelcast/pull/25180[#25180] +* Updated the data comparator to improve the performance of sorted index operations. +https://github.com/hazelcast/hazelcast/pull/25196[#25196] +* Added support for partition pruning for the `__key` filters. +https://github.com/hazelcast/hazelcast/pull/25105[25105] +* Added support for partitioned edges in Jet engine's partition pruning. +https://github.com/hazelcast/hazelcast/pull/25062[#25062] +* Added a new mechanism to compute the required partitions to be scanned, if attribute partitioning strategy is applied. +https://github.com/hazelcast/hazelcast/pull/25006[#25006] +* Added the condition type to the `EXPLAIN PLAN` statement outputs for all physical joins. +https://github.com/hazelcast/hazelcast/pull/24899[#24899] +* Added support for nullable types when reading Avro files. +https://github.com/hazelcast/hazelcast/pull/24840[#24840] +* Added the ability to pass parameters for JDBC configuration, such as the fetch size for large tables. +https://github.com/hazelcast/hazelcast/pull/24835[#24835] +* Added support for partition pruning for SQL queries that have trivial filter predicates. +https://github.com/hazelcast/hazelcast/pull/24813[#24813] +* Reflected the https://blogs.oracle.com/javamagazine/post/transition-from-java-ee-to-jakarta-ee[change^] of `javax.jms` to `jakarta.jms` in Hazelcast distributions. +https://github.com/hazelcast/hazelcast/pull/24804[#24804] +* Added support for member pruning for Platform jobs to optimize a job's performance by picking up only the members required for the job. +https://github.com/hazelcast/hazelcast/pull/24689[#24689] +* Added the `stream()` method to the SQL service to return the stream of result rows. +https://github.com/hazelcast/hazelcast/pull/24525[#24525] +* Introduced a new configuration object to control the exact initial partition offsets when consuming records from Kafka via the Jet engine. +https://github.com/hazelcast/hazelcast/pull/21546[#21546] +* Added the new user code module (`UserCodeTransforms`) to run user codes in containerized environments. Currently, the module can be used with the Jet engine to create mappings with the user codes. #6464 +* Added a new UDP socket connector processor (`StreamUDPSocketP`) which can be used as a streaming source. +It accepts a transform function to be applied to the body of the datagram packet before moving it to downstream. #6161 +* Aligned the behavior of `hashCode()` method of `KeyedWindowResult` with that of `Map.Entry`. #697 +* Boxing of partitionId is now avoided in the `getPartitionAssignment()` method when the partition pruning is not used. #486 +* Added the ability to limit the creation of objects through reflection. #296 +* Added the ability to use reusable processor meta supplier for `forceTotalParallelismOne ()` on random members without permissions. #227 +* Added a comparator for the High-Density Memory Store's index entries. Previously, on-heap entries comparators were used which causes incorrect query outputs. #52 + +=== Connectors + +* Implemented a new SQL mapping option to define the preferred local parallelism for connectors that support this configuration. Currently, only the Kafka connector supports this. +See xref:sql:mapping-to-kafka.adoc#creating-a-kafka-mapping[Creating a Kafka Mapping] for example configurations. +https://github.com/hazelcast/hazelcast/pull/26194[#26194] +* Removed the beta annotations from the MongoDB classes. +https://github.com/hazelcast/hazelcast/pull/25743[#25743] +* Added TLS support for MongoDB data connections. +https://github.com/hazelcast/hazelcast/pull/25301[#25301] +* Added Oracle database support to the JDBC SQL connector. +https://github.com/hazelcast/hazelcast/pull/25255[#25255] +* Added support for inline Avro schemas for Kafka mappings. +https://github.com/hazelcast/hazelcast/pull/25207[#25207] +* Added support for `DataSerializable` serialization to Mongo connector. +https://github.com/hazelcast/hazelcast/pull/25197[#25197] +* Check for existing resources for Mongo connector is now done only once; previously, it was performed on every processor. +https://github.com/hazelcast/hazelcast/pull/24953[#24953] +* Hazelcast JDBC connector now supports Microsoft SQL server. +https://github.com/hazelcast/hazelcast/pull/25071[#25071] +* Added the ability to configure the pool size of a MongoDB data connection. See xref:sql:mapping-to-mongo.adoc#creating-a-mongodb-mapping[Creating a MongoDB Mapping]. +https://github.com/hazelcast/hazelcast/pull/25027[#25027] + +=== Data Structures + +* Added check for negative positions on the collections' `getItemAtPositionOrNull()` method. +https://github.com/hazelcast/hazelcast/pull/25672[#25672] +* Introduced a cluster state check to improve the removal of expired map/cache entries from the cluster. +The removal operation is no longer executed if the cluster is in passive state. +https://github.com/hazelcast/hazelcast/pull/24808[#24808] +* Added the `IMap.localValues()` and `IMap.localValues(Predicate)` methods for a faster access to locally owned values in maps. +https://github.com/hazelcast/hazelcast/pull/24673[#24763] +* Added the `deleteAsync()` for maps to asynchronously remove a provided map entry key. +https://github.com/hazelcast/hazelcast/pull/23509[#23509] +* Fixed the Javadoc for caches where it was referring incorrectly to statistics instead of management, for the `setManagementEnabled()` method. +https://github.com/hazelcast/hazelcast/pull/22575[#22575] +* Implemented RAFT lifecycle interfaces for CPMap. #6800 +* Added the `getCPObjectInfos()` method to create a snapshot of all existing CP objects for a given service on the group leader. #615 +* Added the `getCPGroupIds()` method to the CP Subsystem API to allow listing all data structures within a CP group. #591 +* The "last update time" record of the map entries recovered from the disk persistence is not used anymore when calculating the entries' expiration times. #233 +* [.enterprise]*Enterprise* Added the ability to check whether the `ADVANCED_CP` feature is present in the Hazelcast Platform Enterprise license, to enable CPMap. #157 + +=== [.enterprise]*Enterprise* WAN Replication + +* Improved dead connection handling for WAN replication static IP discovery, by introducing health check to the discovery strategy. +https://github.com/hazelcast/hazelcast/pull/25364[#25364] +* Added support for the dynamic adding of WAN replication configurations using Java API. +https://github.com/hazelcast/hazelcast/pull/25118[#25118] +* Added support for evicting map and cache entries through WAN replication by introducing two new properties. When enabled, WAN replication +events are fired when values are evicted from the map and cache objects. See xref:wan:tuning.adoc#replicating-imap-icache-evictions[Replicating `IMap`/`ICache` Evictions]. +https://github.com/hazelcast/hazelcast/pull/24941[#24941] + +=== [.enterprise]*Enterprise* Security + +* Added the ability to check map permissions before suggesting a schema. If a client has permission to read a map, then a suggestion with schema is sent. Otherwise a warning that no +permissions to read maps have been set is generated. +https://github.com/hazelcast/hazelcast/pull/26058[#26058] +* Updated permission checks in `CacheCreateConfig` and `GetSemaphoreType` tasks. +https://github.com/hazelcast/hazelcast/pull/25529[#25529] +* Improved the permission checks in the file connectors by adding a method that returns the permissions required to resolve field names. +https://github.com/hazelcast/hazelcast/pull/25348[#25348] +* Added support for permission subtraction (deny permissions) in client connections. See xref:security:native-client-security.adoc#deny-permissions[Deny Permissions]. +https://github.com/hazelcast/hazelcast/pull/25154[#25154] +* Added the boolean `forceCertValidation` property to the security configuration to initiate a remote certificate validity check. #6235 + +=== Storage + +* Improved the hit/miss cache statistics counter performance. +https://github.com/hazelcast/hazelcast/pull/25146[#25146] +* [.enterprise]*Enterprise* Tiered Store can now be used with map loaders. +https://github.com/hazelcast/hazelcast/pull/24827[#24827] +* Added the ability to configure Tiered Store backed maps dynamically. #670 +* [.enterprise]*Enterprise* Added the ability to persist namespaces for Hot Restart. #402 + +=== Networking + +* Added the ability to evaluate priorities while picking member addresses; when interfaces are not configured, the first possible IP address is no longer used. Instead, all addresses are evaluated and the one with the highest priority (IPv6 address) is selected. +https://github.com/hazelcast/hazelcast/pull/25305[#25305] +* Added the `demoteLocalDataMember()` method to convert members holding data to lite members, enabling a cluster to be alive while there is no data in it. +https://github.com/hazelcast/hazelcast/pull/24617[#24617] +* Improved the duration of joins by the clients to the cluster; the clients can now join instantaneously without waiting by internally delaying the migrations asynchronously. +https://github.com/hazelcast/hazelcast/pull/17582[#17582] + +=== Serialization + +* Added the ability to use consistent serialization service for `ByKey` plans. +https://github.com/hazelcast/hazelcast/pull/25631[#25631] +* Serialization service is not created anymore for light jobs as it creates overhead and generates garbage. #235, #449 + +=== Cloud + +* Enhanced the warning message shown in the cases of incorrect configurations when deploying a Hazelcast client on Amazon EKS. +https://github.com/hazelcast/hazelcast/pull/25568[#25568] +* Added the ability to retry DNS resolutions for the Kubernetes discovery plugin. #445 + +=== Metrics and Logs + +* Disabled the log4j2 shutdown hook for cleaner shutdown logs after a Hazelcast Platform cluster deployed on Kubernetes is shutdown. +https://github.com/hazelcast/hazelcast/pull/26006[#26006] +* Enabled faster execution times and more efficient garbage collection by making method probes to use `MethodHandle` instead of reflection. +https://github.com/hazelcast/hazelcast/pull/25279[#25279] +* Improved the naming convention for CP Session, Lock, and Persistence metrics. +https://github.com/hazelcast/hazelcast/pull/24843[#24843], https://github.com/hazelcast/hazelcast/pull/24836[#24836] +* Added `status` and `userCancelled` flags to job metrics. +https://github.com/hazelcast/hazelcast/pull/24716[#24716] +* Added the `size` and `sizeBytes` metrics for CPMap. See xref:ROOT:list-of-metrics.adoc#cp-subsystem-metrics[CP Subsystem Metrics]. #6807 +* Added the `connectionHealth` and `failedTransmitCount` metrics to WAN Replication. xref:ROOT:list-of-metrics.adoc#wan-replication-metrics[WAN Replication Metrics]. #848 +* Added metrics for the User Code Namespaces feature and set the prefix for these metrics as `ucn`. #745, #625 +* Removed the stack trace for WAN connection exception since its content was the same as the exception log itself. #578 +* Added the ability to collect job execution metrics only from the members which run the job. #194 + +=== Events and Listeners + +* Added the `onCancel()` method to the reliable message listener to trigger a notification when the listener is cancelled for any reason. #286 + +=== REST API + +* Added the new `RestConfig` tag under the server `Config`; it allows configuration of the new REST API server. #508 +* Added the health check endpoints for the new REST API; these include state of the members and cluster, and the member count. #192 + +=== Distribution + +* Improved the binary scripts of Hazelcast Platform for Windows operating systems. +** `common.bat` has been updated to not include excessive spaces in parameters. +** `hz-cli.bat` and `hz-start.bat` have been updated to reference the `common.bat` script with correct paths. +** `hz-start.bat` has been updated to remove double quote expansion for `CLASSPATH`. ++ +#165 +* Updated the versions of following dependencies: +** Snappy to 1.1.10.5 +** Netty to 4.100.Final +** Jackson to 2.14.2 +** Avro to 1.11.3 ++ +https://github.com/hazelcast/hazelcast/pull/24863[#25607], +https://github.com/hazelcast/hazelcast/pull/25555[#25555], +https://github.com/hazelcast/hazelcast/pull/25576[#25576], +https://github.com/hazelcast/hazelcast/issues/22407[#22407] +* Upgraded the Hazelcast Platform's `pom.xml` to use JDK 17, as it requires at minimum JDK 17. #436 +* Updated the copyright year to 2024 in the codebase. #396 + +=== [.enterprise]*Enterprise* Licensing + +* License keys are now masked in the license expiration notifications. +https://github.com/hazelcast/hazelcast/pull/24800[#24800] + +=== API Documentation + +* Detailed the existing partition aware interface description to explain the requirements when calculating the partition ID in case partition aware is implemented. See {platform-javadocs}/{full-version}/com/hazelcast/partition/PartitionAware.html. #875 + +== Fixes + +* Fixed an issue where sending internal Debezium messages was causing failures when connecting to databases. +https://github.com/hazelcast/hazelcast/pull/26027[#26027] +* Fixed an issue where the entry listeners for Replicated Maps were checking the Map permissions instead of the Replicated Map permissions. +https://github.com/hazelcast/hazelcast/pull/25965[#25965] +* Fixed an issue where the queries with indexes were producing duplicate results or failing. +https://github.com/hazelcast/hazelcast/pull/25527[#25527] +* Fixed an issue where the map entries' metadata, such as time-to-live and expiration, was not replicated correctly over WAN after updating existing entries. +https://github.com/hazelcast/hazelcast/pull/25481[#25481] +* Fixed an issue where the loading of compact-serialized generic records by the complex classloaders, such as `JetClassLoader`, were likely to cause deadlocks. +https://github.com/hazelcast/hazelcast/pull/25379[#25379] +* Fixed a memory leak issue happening in Hazelcast members and clients while destroying fenced locks. +https://github.com/hazelcast/hazelcast/pull/25353[#25353] +* Fixed an issue where the sorted index scans were hanging or producing duplicate values when there are multiple entries with the same key. +https://github.com/hazelcast/hazelcast/pull/25328[#25328] +* Fixed an issue where setting indexes in a different order, while dynamically adding a map configuration, was failing. +https://github.com/hazelcast/hazelcast/pull/25234[#25234] +* Fixed an issue where the diagnostic tool was showing the suggestion of enabling it, even it is already enabled. +https://github.com/hazelcast/hazelcast/pull/25220[#25220] +* Fixed an issue where clearing an inexistent map was resulting in an exception. +https://github.com/hazelcast/hazelcast/pull/25202[#25202] +* Fixed an issue where the mechanism to retrieve partitioning strategy on a client was ignoring the provided Hazelcast cluster properties. +https://github.com/hazelcast/hazelcast/pull/25162[#25162] +* Fixed an issue where `ClientConfigXmlGenerator` didn't support the `hazelcast-cloud` configuration. +https://github.com/hazelcast/hazelcast/pull/25155[#25155] +* Fixed an issue where the cache provider was not able to read the YAML configurations. +https://github.com/hazelcast/hazelcast/pull/25137[#25137] +* Fixed an issue where the `getDistributedObjects()` was returning inconsistent results when multiple members simultaneously join to the cluster. +https://github.com/hazelcast/hazelcast/pull/25114[#25114] +* Fixed an issue where zero-config compact serialization was not working on the objects that have a field of type `java.util.UUID`. +https://github.com/hazelcast/hazelcast/pull/25073[#25073] +* Fixed an issue where the retry mechanism for the communications between CP leader and followers was generating too many retries, due to incorrect backoff timeout reset behavior. +https://github.com/hazelcast/hazelcast/pull/25055[#25055] +* Fixed an issue where there was a difference between the elapsed clock time and elapsed total time when listening to migration events. +https://github.com/hazelcast/hazelcast/pull/25028[#25028] +* Fixed an issue where the transaction in the Kafka producer was not committed when a batch job finished. +https://github.com/hazelcast/hazelcast/pull/25024[#25024] +* Fixed an issue where data events were being fired through WAN replication after a split-brain, even when there were no changes in data. +https://github.com/hazelcast/hazelcast/pull/24928[#24928] +* Fixed an issue where the lite members were not reporting statistics for map operations. +https://github.com/hazelcast/hazelcast/pull/24871[#24871] +* Fixed an issue where the blacklisting was ignored after a split-brain scenario. +https://github.com/hazelcast/hazelcast/pull/24830[#24830] +* Fixed an issue where the Kinesis sink might lose data, when retrying on failures, during a terminal snapshot. +https://github.com/hazelcast/hazelcast/pull/24779[#24779] +* Fixed an issue where the member list was not updated after a cluster failover scenario. +https://github.com/hazelcast/hazelcast/pull/24745[#24745] +* Fixed an issue where the batches produced for write-behind queues did not have the expected size of entries. + https://github.com/hazelcast/hazelcast/issues/24763[#24763] +* Fixed an issue where the fused Jet vertex was ignoring the configured local parallelism and using the default parallelism instead. +https://github.com/hazelcast/hazelcast/issues/24683[#24683] +* Fixed an issue where Hazelcast was sending empty map interceptor information to the members that are newly joined to the cluster; it was causing eager map initializations. +https://github.com/hazelcast/hazelcast/pull/24639[#24639] +* Fixed an issue where the REST calls were failing for Hazelcast clusters with TLS v1.3 configured, and deployed on Kubernetes. +https://github.com/hazelcast/hazelcast/pull/24616[#24616] +* Fixed an issue where the predicates did not have managed context injection when the predicate is local or not serialized. +https://github.com/hazelcast/hazelcast/pull/24463[#24463] +* Fixed an issue where the results of the stream-to-stream join could not be inserted into the remote table connected via JDBC, causing an exception. +https://github.com/hazelcast/hazelcast/issues/22459[#22459] +* Fixed an issue where the combining step of `AggregateOperations.maxBy()` was not checking if the incoming element is null, which can happen if some members did not have any items to process. +In this case, the comparator was invoked with the null value which was invalid. #895 +* Fixed a race condition occurred when canceling Jet jobs during their initializations. #889 +* Fixed an issue where the indexes added during the migration of partitions to newly joined members, were not persisted on these new members. +Relatedly, the ability to persist dynamically added indexes, when the Hot Restart feature is enabled, has been implemented. #829, #596 +* Fixed an issue where the merge operations after a split-brain (with no changes in the entry values) were emitting WAN events for offloaded operations. #734 +* Fixed an issue where replicating over WAN from a cluster to other clusters, when all clusters share the same cluster name, was failing. #728 +* Fixed a race condition occurred when the execution of registration/deregistration operation for `JobStatusListener` is offloaded to the event striped executor; now, this offloading is waited to finish. #673 +* Fixed an issue when querying JSON, elements that appear after an element containing nested JSON was not appearing in the query results. #570 +* Fixed an issue where data was lost from the ICache data structure with `NATIVE` entries in a split-brain scenario. #480 +* Fixed an issue where the `ANALYZE INSERT INTO` SQL statement did not generate metrics. #444 +* [.enterprise]*Enterprise* Fixed an issue where the map entries with time-to-live values were being removed as soon as the cluster with persistence enabled is restarted. #233 +* Fixed an issue where map entries' metadata were replicated incorrectly over WAN after a merge, causing deserialization of values. #225 +* Fixed an issue where the process of retrieving metrics for job executions was entering an infinite loop when a job execution is completed on a member, but continued on the other members. +With this fix, only the members on which the jobs have not been completed are queried for metrics; for completed jobs, the metrics are already retrieved from the completed jobs context. #194 +* Fixed an issue where the attribute partitioning strategy was not working with Compact and Portable classes. #127 + +== Removed/Deprecated Features + +* The connector for Elasticsearch 6 is removed, as the Elasticsearch 6 module is removed from Hazelcast distributions. +https://github.com/hazelcast/hazelcast/pull/24734[#24734] +* The evaluation tool for IMDG 3.x users (Hazelcast 3 Connector) is removed. In the upcoming releases, a new tool for migrating data from 3.x versions will be introduced. +https://github.com/hazelcast/hazelcast/pull/25051[#25051] +* Transactions have been deprecated, and will be removed as of Hazelcast version 7.0. +An improved version of this feature is under consideration. If you are already using transactions, get in touch and share your use case. Your feedback will help us to develop a solution that meets your needs. +* Portable Serialization has been deprecated. We recommend you use Compact Serialization as Portable Serialization will be removed as of version 7.0. +* The user code deployment API is deprecated, and will be removed in Hazelcast Platform version 6.0. #223 + +== Contributors + +We would like to thank the contributors from our open source community +who worked on this release: + +* https://github.com/anestoruk[Andrzej Nestoruk] +* https://github.com/hhromic[Hugo Hromic] +* https://github.com/aditya-32[Aditya Ranjan Barik] +* https://github.com/azotcsit[Aleksei Zotov] +* https://github.com/LarsKrogJensen[LarsKorgJensen] +* https://github.com/vladykin[Alexey Vladykin] +* https://github.com/lprimak[Lenny Primak] +* https://github.com/lfgcampos[Lucas Campos] +* https://github.com/tommyk-gears[Tommy Karlsson] +* https://github.com/vinicius-colutti[Vinicius Colutti] +* https://github.com/lukasblu[Lukas Blunschi] +* https://github.com/anestoruk[Andrzej Nestoruk] diff --git a/docs/modules/release-notes/pages/releases.adoc b/docs/modules/release-notes/pages/releases.adoc new file mode 100644 index 000000000..6b0b79908 --- /dev/null +++ b/docs/modules/release-notes/pages/releases.adoc @@ -0,0 +1,4 @@ += Hazelcast Platform 5.4.x release notes + +* xref:release-notes:5-4-0.adoc[5.4.0] +* xref:release-notes:5-3-0.adoc[5.3.0] \ No newline at end of file diff --git a/docs/modules/serialization/partials/nav.adoc b/docs/modules/serialization/partials/nav.adoc index 4ac76a1b9..64a82db35 100644 --- a/docs/modules/serialization/partials/nav.adoc +++ b/docs/modules/serialization/partials/nav.adoc @@ -1,6 +1,7 @@ * Serialization ** xref:serialization:serialization.adoc[Overview] ** xref:serialization:compact-serialization.adoc[] +*** xref:compact-binary-specification.adoc[] ** xref:serialization:serializing-json.adoc[HazelcastJsonValue] ** xref:serialization:custom-serialization.adoc[] ** Other Serialization Options diff --git a/docs/modules/spring/partials/nav.adoc b/docs/modules/spring/partials/nav.adoc index 9b7dffd0e..8b1378917 100644 --- a/docs/modules/spring/partials/nav.adoc +++ b/docs/modules/spring/partials/nav.adoc @@ -1,8 +1 @@ -* Integrating with Spring -** xref:spring:overview.adoc[Overview] -** xref:spring:configuration.adoc[] -** xref:spring:springaware.adoc[] -** xref:spring:add-caching.adoc[] -** xref:spring:hibernate.adoc[] -** xref:spring:transaction-manager.adoc[] -** xref:spring:best-practices.adoc[] + diff --git a/docs/modules/sql/partials/nav.adoc b/docs/modules/sql/partials/nav.adoc index 914bde25a..139597f9c 100644 --- a/docs/modules/sql/partials/nav.adoc +++ b/docs/modules/sql/partials/nav.adoc @@ -1,50 +1,2 @@ -* xref:sql:sql-overview.adoc[Overview] -* SQL Over Maps -** xref:sql:get-started-sql.adoc[Tutorial] -** xref:sql:querying-maps-sql.adoc[Queries] -** xref:sql:mapping-to-maps.adoc[Mappings] -* SQL Over Kafka -** xref:sql:learn-sql.adoc[Tutorial] -** xref:sql:mapping-to-kafka.adoc[Mappings] -* SQL Over File Systems -** xref:sql:get-started-sql-files.adoc[Tutorial] -** xref:sql:mapping-to-a-file-system.adoc[Mappings] -* SQL Over JDBC -** xref:sql:mapping-to-jdbc.adoc[Mappings] -* SQL Over MongoDB -** xref:sql:mapping-to-mongo.adoc[Mappings] -* xref:sql:working-with-json.adoc[Working with JSON] -* xref:sql:querying-streams.adoc[Stream Processing] -* xref:sql:parameterized-queries.adoc[Query Parameters] -* xref:sql:finding-mappings.adoc[Finding Mappings] -* xref:sql:improving-performance.adoc[Improving Performance] -* xref:sql:sql-reflection-configuration.adoc[Reflection Configuration] -* xref:sql:troubleshooting.adoc[Troubleshooting] -* Statements -** xref:sql:sql-statements.adoc[Overview] -** xref:sql:alter-job.adoc[`ALTER JOB`] -** xref:sql:create-data-connection.adoc[`CREATE DATA CONNECTION`] -** xref:sql:create-index.adoc[`CREATE INDEX`] -** xref:sql:create-job.adoc[`CREATE JOB`] -** xref:sql:create-mapping.adoc[`CREATE MAPPING`] -** xref:sql:create-snapshot.adoc[`CREATE SNAPSHOT`] -** xref:sql:create-view.adoc[`CREATE VIEW`] -** xref:sql:delete.adoc[`DELETE`] -** xref:sql:drop-data-connection.adoc[`DROP DATA CONNECTION`] -** xref:sql:drop-job.adoc[`DROP JOB`] -** xref:sql:drop-mapping.adoc[`DROP MAPPING`] -** xref:sql:drop-snapshot.adoc[`DROP SNAPSHOT`] -** xref:sql:drop-view.adoc[`DROP VIEW`] -** xref:sql:explain.adoc[`EXPLAIN`] -** xref:sql:sink-into.adoc[`INSERT INTO`/`SINK INTO`] -** xref:sql:select.adoc[`SELECT`] -** xref:sql:show-jobs.adoc[`SHOW JOBS`] -** xref:sql:show-resources.adoc[`SHOW RESOURCES`] -** xref:sql:show-mappings.adoc[`SHOW MAPPINGS`] -** xref:sql:update.adoc[`UPDATE`] -* xref:sql:functions-and-operators.adoc[Functions and Expressions] -* xref:sql:data-types.adoc[] -* xref:sql:user-defined-types.adoc[] -* Optimizing SQL queries -** xref:sql:partition-pruning.adoc[Partition Pruning] + diff --git a/docs/modules/tutorials/pages/cpp-client-getting-started.adoc b/docs/modules/tutorials/pages/cpp-client-getting-started.adoc new file mode 100644 index 000000000..62abe0062 --- /dev/null +++ b/docs/modules/tutorials/pages/cpp-client-getting-started.adoc @@ -0,0 +1,412 @@ += Getting Started with the Hazelcast C++ Client +:description: This tutorial will get you started with the Hazelcast C++ client and manipulate a map. + +== What You'll Learn + +{description} + +== Before you Begin + +* C++ 11 or above +* https://hazelcast.com/products/viridian/[Hazelcast Viridian Cloud Account] +* A text editor or IDE + +== Start a Hazelcast Viridian Cloud Cluster + +1. Sign up for a Hazelcast Viridian Cloud account (free trial is available). +2. Log in to your Hazelcast Viridian Cloud account and start your trial by filling in the welcome questionnaire. +3. A Viridian cluster will be created automatically when you start your trial. +4. Press the Connect Cluster dialog and switch over to the Advanced setup tab for connection information needed below. +5. From the Advanced setup tab, download the keystore files and take note of your Cluster ID, Discovery Token and Password as you will need them later. + +== Setup a Hazelcast Client + +Create a new folder and navigate to it: + +[source] +---- +mkdir hazelcast-cpp-example +cd hazelcast-cpp-example +---- + +Download and install Vcpkg: + + +for Windows; +[source,bash] +---- +git clone https://github.com/microsoft/vcpkg +.\vcpkg\bootstrap-vcpkg.bat +---- + +for non-Windows; +[source,bash] +---- +git clone https://github.com/microsoft/vcpkg +./vcpkg/bootstrap-vcpkg.sh +---- + +Download and install hazelcast-cpp-client: + + +for Windows; +[source,bash] +---- +.\vcpkg\vcpkg.exe install "hazelcast-cpp-client[openssl]" --recurse +---- + +for non-Windows; +[source,bash] +---- +./vcpkg/vcpkg install "hazelcast-cpp-client[openssl]" --recurse +---- + +NOTE: Avoid directory names in your path that contain spaces or other non-standard characters. + +Extract the keystore files you downloaded from Viridian into this directory. The files you need for this tutorial are: + +[source,bash] +---- +ca.pem +cert.pem +key.pem +---- + +Create a CMake file in this directory, named "CMakeLists.txt" as follows: + +[source,bash] +---- +cmake_minimum_required(VERSION 3.10) + +project(HazelcastCloud) + +set(CMAKE_CXX_STANDARD 11) +set(CMAKE_CXX_STANDARD_REQUIRED ON) + +find_package(hazelcast-cpp-client CONFIG REQUIRED) + +if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/example.cpp) + add_executable(example example.cpp) + target_link_libraries(example PRIVATE hazelcast-cpp-client::hazelcast-cpp-client) +endif() +---- + +You should have the following entries in the directory: +[source,bash] +---- +CMakeLists.txt +ca.pem +cert.pem +key.pem +vcpkg +---- + +== Understanding the C++ Client + +The following section creates and starts a Hazelcast client with default configuration, connects to your Viridian cluster before shutting the client down at the end. + +Create a C++ file named “example.cpp” and put the following code inside it: + +[source,cpp] +---- +#include +#include + +int +main(int argc, char** argv) +{ + hazelcast::client::client_config config; + + // Viridian Cluster Name and Token + config.set_cluster_name(""); + auto& cloud_configuration = config.get_network_config().get_cloud_config(); + cloud_configuration.enabled = true; + cloud_configuration.discovery_token = ""; + + // configure SSL + boost::asio::ssl::context ctx(boost::asio::ssl::context::tlsv12); + + try { + ctx.load_verify_file("ca.pem"); + ctx.use_certificate_file("cert.pem", boost::asio::ssl::context::pem); + ctx.set_password_callback( + [&](std::size_t max_length, + boost::asio::ssl::context::password_purpose purpose) { + return ""; + }); + ctx.use_private_key_file("key.pem", boost::asio::ssl::context::pem); + } catch (std::exception& e) { + std::cerr << "You should copy ca.pem, cert.pem and key.pem files to " + "the working directory, exception cause " + << e.what() << std::endl; + exit(EXIT_FAILURE); + } + config.get_network_config().get_ssl_config().set_context(std::move(ctx)); + + // Connect to your Hazelcast Cluster + auto client = hazelcast::new_client(std::move(config)).get(); + + // take actions + std::cout << "Welcome to your Hazelcast Viridian Cluster!" << std::endl; + + // Shutdown the client connection + client.shutdown().get(); +} +---- + +Compile using CMake as follows: + +[source,bash] +---- +cmake -B build -S . -DCMAKE_TOOLCHAIN_FILE=./vcpkg/scripts/buildsystems/vcpkg.cmake +cmake --build build +---- + +Once complete, run the example: + +[source,bash] +---- +./build/example +---- + +For more information about Vcpkg installation check https://github.com/hazelcast/hazelcast-cpp-client/blob/master/Reference_Manual.md#112-vcpkg-users[here]. +In this tutorial we use CMake for compilation, for other options you can check https://github.com/hazelcast/hazelcast-cpp-client/blob/master/Reference_Manual.md#13-compiling-your-project[here]. + +To understand and use the client, review the https://hazelcast.github.io/hazelcast-cpp-client/api-index.html[C++ API documentation] to better understand what is possible. + +== Understanding the Hazelcast SQL API + +Hazelcast SQL API is a Calcite SQL based interface to allow you to interact with Hazelcast much like any other datastore. + +In the following example, we will create a map and insert into it, entries where the keys are ids and the values are defined as an object representing a city. + +[source,cpp] +---- +#include +#include + +void +create_mapping(hazelcast::client::hazelcast_client client); +void +insert_cities(hazelcast::client::hazelcast_client client); +void +fetch_cities(hazelcast::client::hazelcast_client client); + +struct CityDTO +{ + std::string cityName; + std::string country; + int population; +}; + +// CityDTO serializer +namespace hazelcast { +namespace client { +namespace serialization { + +template<> +struct hz_serializer : compact::compact_serializer +{ + static void write(const CityDTO& object, compact::compact_writer& out) + { + out.write_int32("population", object.population); + out.write_string("city", object.cityName); + out.write_string("country", object.country); + } + + static CityDTO read(compact::compact_reader& in) + { + CityDTO c; + + c.population = in.read_int32("population"); + boost::optional city = in.read_string("city"); + + if (city) { + c.cityName = *city; + } + + boost::optional country = in.read_string("country"); + + if (country) { + c.country = *country; + } + + return c; + } + + static std::string type_name() { return "CityDTO"; } +}; + +} // namespace serialization +} // namespace client +} // namespace hazelcast + +int +main(int argc, char** argv) +{ + hazelcast::client::client_config config; + + // Viridian Cluster Name and Token + config.set_cluster_name(""); + auto& cloud_configuration = config.get_network_config().get_cloud_config(); + cloud_configuration.enabled = true; + cloud_configuration.discovery_token = ""; + + // configure SSL + boost::asio::ssl::context ctx(boost::asio::ssl::context::tlsv12); + + try { + ctx.load_verify_file("ca.pem"); + ctx.use_certificate_file("cert.pem", boost::asio::ssl::context::pem); + ctx.set_password_callback( + [&](std::size_t max_length, + boost::asio::ssl::context::password_purpose purpose) { + return ""; + }); + ctx.use_private_key_file("key.pem", boost::asio::ssl::context::pem); + } catch (std::exception& e) { + std::cerr << "You should copy ca.pem, cert.pem and key.pem files to " + "the working directory, exception cause " + << e.what() << std::endl; + exit(EXIT_FAILURE); + } + config.get_network_config().get_ssl_config().set_context(std::move(ctx)); + + // Connect to your Hazelcast Cluster + auto client = hazelcast::new_client(std::move(config)).get(); + + // take actions + create_mapping(client); + insert_cities(client); + fetch_cities(client); + + // Shutdown the client connection + client.shutdown().get(); +} + +void +create_mapping(hazelcast::client::hazelcast_client client) +{ + // Mapping is required for your distributed map to be queried over SQL. + // See: https://docs.hazelcast.com/hazelcast/latest/sql/mapping-to-maps + + std::cout << "Creating the mapping..."; + + auto sql = client.get_sql(); + + auto result = sql + .execute(R"(CREATE OR REPLACE MAPPING + cities ( + __key INT, + country VARCHAR, + city VARCHAR, + population INT) TYPE IMAP + OPTIONS ( + 'keyFormat' = 'int', + 'valueFormat' = 'compact', + 'valueCompactTypeName' = 'CityDTO'))") + .get(); + + std::cout << "OK." << std::endl; +} + +void +insert_cities(hazelcast::client::hazelcast_client client) +{ + auto sql = client.get_sql(); + + try { + sql.execute("DELETE FROM cities").get(); + + std::cout << "Inserting data..."; + + // Create mapping for the integers. This needs to be done only once per + // map. + auto result = sql + .execute(R"(INSERT INTO cities + (__key, city, country, population) VALUES + (1, 'London', 'United Kingdom', 9540576), + (2, 'Manchester', 'United Kingdom', 2770434), + (3, 'New York', 'United States', 19223191), + (4, 'Los Angeles', 'United States', 3985520), + (5, 'Istanbul', 'Türkiye', 15636243), + (6, 'Ankara', 'Türkiye', 5309690), + (7, 'Sao Paulo ', 'Brazil', 22429800))") + .get(); + + std::cout << "OK." << std::endl; + } catch (hazelcast::client::exception::iexception& e) { + // don't panic for duplicated keys. + std::cerr << "FAILED, duplicated keys " << e.what() << std::endl; + } +} + +void +fetch_cities(hazelcast::client::hazelcast_client client) +{ + std::cout << "Fetching cities..."; + + auto result = + client.get_sql().execute("SELECT __key, this FROM cities").get(); + + std::cout << "OK." << std::endl; + std::cout << "--Results of 'SELECT __key, this FROM cities'" << std::endl; + + std::printf("| %-4s | %-20s | %-20s | %-15s |\n", + "id", + "country", + "city", + "population"); + + for (auto itr = result->iterator(); itr.has_next();) { + auto page = itr.next().get(); + + for (auto const& row : page->rows()) { + + auto id = row.get_object("__key"); + auto city = row.get_object("this"); + std::printf("| %-4d | %-20s | %-20s | %-15d |\n", + *id, + city->country.c_str(), + city->cityName.c_str(), + city->population); + } + } + + std::cout + << "\n!! Hint !! You can execute your SQL queries on your Viridian " + "cluster over the management center. \n 1. Go to 'Management Center' " + "of your Hazelcast Viridian cluster. \n 2. Open the 'SQL Browser'. \n " + "3. Try to execute 'SELECT * FROM cities'.\n"; +} +---- + +The output of this code is given below: + +[source,bash] +---- +Creating the mapping...OK. +Inserting data...OK. +Fetching cities...OK. +--Results of 'SELECT __key, this FROM cities' +| id | country | city | population | +| 2 | United Kingdom | Manchester | 2770434 | +| 6 | Turkiye | Ankara | 5309690 | +| 1 | United Kingdom | London | 9540576 | +| 7 | Brazil | Sao Paulo | 22429800 | +| 4 | United States | Los Angeles | 3985520 | +| 5 | Turkiye | Istanbul | 15636243 | +| 3 | United States | New York | 19223191 | +---- + +NOTE: Ordering of the keys is NOT enforced and results may NOT correspond to insertion order. + +== Summary + +In this tutorial, you learned how to get started with the Hazelcast C++ Client, connect to a Viridian instance and put data into a distributed map. + +== See Also + +There are a lot of things that you can do with the C++ Client. For more, such as how you can query a map with predicates and SQL, +check out our https://github.com/hazelcast/hazelcast-cpp-client[C++ Client repository] and our https://hazelcast.github.io/hazelcast-cpp-client/api-index.html[C++ API documentation] to better understand what is possible. + +If you have any questions, suggestions, or feedback please do not hesitate to reach out to us via https://slack.hazelcast.com/[Hazelcast Community Slack]. +Also, please take a look at https://github.com/hazelcast/hazelcast-cpp-client/issues[the issue list] if you would like to contribute to the client. diff --git a/docs/modules/tutorials/pages/cpsubsystem.adoc b/docs/modules/tutorials/pages/cpsubsystem.adoc new file mode 100644 index 000000000..a2ef13e93 --- /dev/null +++ b/docs/modules/tutorials/pages/cpsubsystem.adoc @@ -0,0 +1,238 @@ += CP Subsystem and CPMap Tutorial +:description: In this tutorial, you will examine the operations of a CP-enabled Hazelcast Platform cluster, then work with the CPMap data structure using the Hazelcast CLC. + +{description} + +// Give some context about the use case for this tutorial. What will the reader learn? +== Context + +The Hazelcast CP Subsystem adds the capability of true consistency to the primarily AP Hazelcast Platform. When the CP Subsystem is enabled, application developers have access to CPMap, a key-value store similar in function to IMap that provides consistency guarantees. The CP Subsystem also supports Java concurrency primitives, guaranteeing atomic operations of these data structures. + +// Optional: What does the reader need before starting this tutorial? Think about tools or knowledge. Delete this section if your readers can dive straight into the lesson without requiring any prerequisite knowledge. +== Before you Begin + +Before starting this tutorial, make sure that you have installed the following: + +* https://www.docker.com/[Docker] +* https://docs.hazelcast.com/clc/latest/overview[Hazelcast Command Line Client (CLC)] +* https://www.oracle.com/java/technologies/downloads/[JDK 17 or later] +* https://www.docker.com/products/docker-desktop/[Docker Desktop] + +Optional + +* https://maven.apache.org/[Maven] + +* Your preferred Java IDE + +== Step 1. Set up the Cluster and Verify Operations + +In this section, you'll launch a Hazelcast cluster and Management Center using Docker. You'll verify that the CP Subsystem is up and ready for data. You'll then pause, then restart a cluster member and observe the changes in Management Center and view the system logs. + +. Download the repository from https://github.com/hazelcast-guides/cpsubsystem[GitHub]. + +. Run the following commands to start a Hazelcast cluster ++ +```cli +docker compose up -d +docker compose ps +``` +You should see four services up and running: three instances of Hazelcast, and one instance of Management Center. + +. Open the file `hazelcast.yaml` to review the cluster member configuration. + +. Open a web browser and connect to *localhost:8080*. This will open Management Center. Select Dev Mode to open the main dashboard. Go to *Settings*, then open *License*. Paste in the license key from `hazelcast.yaml`. Verify that the license includes Advanced CP, which is required for CPMap functionality. ++ +image::licensed_features.png[] + +. Close the settings screen. The main dashboard should show that the CP Subsystem is accessible. ++ +image::mc_main_dashboard.png[] + +. Scroll down to verify that the cluster has a total of three nodes. + +. From the left side menu, go to *CP Subsystem > Dashboard*. Note that the CP Subsystem has three nodes in the CP Stats window. Under CP Groups, the only group that exists is METADATA; this group is used for CP Subsystem management. + + +== Step 2. Create a CPMap Using CLC + +In this step, you'll use the CLC to connect to one of the cluster members. You'll create a CPMap and add data to the map. You'll then retrieve data from the map. + +. Open a separate terminal window. Enable CPMap capabilities in CLC (only needed for CLC 5.3.7). ++ +```cli +export CLC_EXPERIMENTAL_CPMAP=1 +``` + +. Start CLC using the provided configuration file. ++ +```cli +clc -c clc_docker.yaml +``` +. At the SQL prompt, use the `cpmap` commands to create a CPMap and add data, then retrieve the data. ++ +```cli +\cpmap -n trektos set 1 "James Kirk" +\cpmap -n trektos get 1 +``` ++ +[NOTE] +The backslash is needed for all CLC commands. Otherwise CLC assumes you are entering SQL commands. Type `\help` for a list of all available commands. + +. Run the `trektos.clc` script to add data to the CPMap. ++ +```cli +\script run trektos.clc +``` + +. Verify that additional entries have been added to the map. ++ +```cli +\cpmap -n trektos get 6 +``` + +. In Management Center, examine the CP Subsystem dashboard. Note that there is now a "default" group. This is the group maintaining the RAFT algorithm for the `trektos` CPMap. + +. In Management Center, select CPMaps from the left side. The screen lists the CPMap you just created, along with map statistics. + +. (Optional) In the Client subdirectory, we've included a sample Java client that creates an additional CPMap. To run the client, change to the Client subdirectory, then run the following Maven commands. ++ +```cli +mvn clean compile exec:java -Dexec.mainClass="Client4CP" +``` ++ +Use CLC and Management Center to verify the creation of a second CPMap. + +== Step 3. Simulating a Network Partition + +In this section, you will pause and restart individual cluster members to simulate network partition and member failure conditions. You'll learn about when the CP Subsystem can heal itself, and when administrator intervention is required. + +. From the CLC, run the script sw.clc. This creates an IMap. We will use this to compare data accessibility between an AP IMap and a CP CPMap. ++ +```cli +\script run sw.clc +``` + +. Retrieve an entry from the IMap. ++ +```cli +\map -n starwars get 1 +``` + +. In the terminal window, stop cluster member 2. Observe the changes in the CP Subsystem dashboard. ++ +```cli +docker stop cpsubsystem-hz2-1 +``` ++ +image::dashboard_paused_member.png[] + +. Review the log entries of one of the remaining members to see messages related to data migration for AP data and group rebalancing for CP data. The command is `docker logs `. ++ +Example output: ++ +image::member_depart_log.png[] + +. Using CLC, verify that you can still access both IMap and CPMap data. ++ +```cli +\map -n starwars get 1 +\cpmap -n trektos get 1 +``` + +. Stop cluster member 3. Note the changes in Management Center. ++ +``` +docker stop cpsubsystem-hz3-1 +``` + +. Verify that you can still access AP data, but cannot access CP data. Accessing CP data will give you a "Connecting to cluster..." message. + +. Start cluster member 2 ++ +```cli +docker start cpsubsystem-hz2-1 +``` ++ + +[NOTE] +At this point, we have created a situation where the CP Subsystem is unrecoverable. There is only a minority left in the group, and there is no majority "out there" that retains any CP data. The only path to recovery is to restart all the cluster members. The lesson here is to configure resilience into your CP Subsystem so this cannot happen! + +. Use Docker compose to remove all your containers. ++ +```cli +docker compose down +``` + +== Step 4: Configuring Resilience (Optional) + +In this step, we will add another cluster member, and configure the cluster for automatic member failure. + +. Open the file compose.yaml. Add a fourth instance of Hazelcast to the file. Save your changes. ++ +```cli + hz4: + image: hazelcast/hazelcast-enterprise:5.4.0 + environment: + JAVA_OPTS: -Dhazelcast.config=/project/hazelcast.yaml + volumes: + - ".:/project" +``` +. Open the file hazelcast.yaml. Modify the configuration as follows: +.. Set the member count to 4 +.. Add the timeout and set it to 60 seconds +.. Add the auto-removal and set it to 60 seconds ++ +```cli + cp-subsystem: + cp-member-count: 3 + group-size: 3 + session-time-to-live-seconds: 60 + missing-cp-member-auto-removal-seconds: 60 +``` + +. Start up the containers. +```cli +docker compose up -d +``` +. In Management Center, go to the CP Subsystem dashboard. You should now see four members listed. Note that one has zero nodes. This means it is not participating in any groups. ++ +image::4members.png[] + +. Check the IP address of hz4. If it is one of the active CP members, stop it. If it is not, then stop hz3. Observe the changes in Management System over the next few minutes. ++ +```cli +docker container inspect cpsubsystem-hz4-1 | grep "IPAddress" +docker stop cpsubsystem-hz4-1 +``` + +. Note that the node that previously was not participating in groups has been promoted and is now partipating in the group. ++ +image::promoted.png[] + +. Restart the stopped container. There will be no changes in Management Center, nor to the CP Subsystem. If you look at the log for the restarted system, you will see the following message. + +`2024-04-16 16:25:43 2024-04-16 23:25:43,655 [ INFO] [hz.recursing_benz.generic-operation.thread-3] [c.h.c.i.MetadataRaftGroupManager]: [172.19.0.3]:5701 [dev] [5.4.0] Disabling discovery of initial CP members since it is already completed...` + +. To bring the CP Subsystem back to all four members, use the Management Center dashboard to promote the restarted member. + +[NOTE] +Because we only have one group, no group rebalancing will occur. You can verify this by looking at the system logs for each member. + +== What's Next? + +You now have a working CP Subsystem setup. Experiment with creating multiple groups to observe group distribution when there are more subsystem members than needed by each group. Work with other CP data structures. The documentation links below will provide additional information on + + +== Summary + +In this tutorial, you worked with the CPMap data structure, and experimented with CP Subsystem management. + + +== See Also + +// Optionally, add some links to resources, such as other related guides. + +* Hazelcast Training: https://training.hazelcast.com/cp-subsystem[Strong Data Consistency] - this course provides instruction on CP subsystem operations, configuration, data structures, and fenced locks. +* https://docs.hazelcast.com/hazelcast/latest/cp-subsystem/cp-subsystem[CP Subsystem Overview] +* https://docs.hazelcast.com/hazelcast/latest/data-structures/cpmap[CPMap Documentation] +* https://docs.hazelcast.com/hazelcast/latest/cp-subsystem/cp-subsystem#persistence[CP Subsystem Persistence] diff --git a/docs/modules/tutorials/pages/csharp-client-getting-started.adoc b/docs/modules/tutorials/pages/csharp-client-getting-started.adoc new file mode 100644 index 000000000..199a37f40 --- /dev/null +++ b/docs/modules/tutorials/pages/csharp-client-getting-started.adoc @@ -0,0 +1,389 @@ += Getting Started with the Hazelcast .NET Client +:description: In this tutorial you will see how to connect with the Hazelcast .NET client and manipulate a map. + +== What You'll Learn + +{description} + +== Before you Begin + +* .NET SDK 6.0 or above +* https://hazelcast.com/products/viridian/[Hazelcast Viridian Cloud Account] +* An IDE + +== Start a Hazelcast Viridian Cloud Cluster + +1. Sign up for a Hazelcast Viridian Cloud account (free trial is available). +2. Log in to your Hazelcast Viridian Cloud account and start your trial by filling in the welcome questionnaire. +3. A Viridian cluster will be created automatically when you start your trial. +4. Press the Connect Cluster dialog and switch over to the Advanced setup tab for connection information needed below. +5. From the Advanced setup tab, download the keystore files and take note of your Cluster ID, Discovery Token and Password as you will need them later. + +== Setup a Hazelcast Client + +Create a new folder and navigate to it: + +[source] +---- +mkdir hazelcast-csharp-example +cd hazelcast-csharp-example +---- + +Create a new .NET Console project using the commandline tool: + +[source] +---- +dotnet new console +---- + +Add the Hazelcast.NET Client as a dependency: + +[source] +---- +dotnet add Hazelcast.Net +---- + +Extract the keystore files you downloaded from Viridian into this directory. The files you need for this tutorial are: + +[source,bash] +---- +client.pfx +---- + +To understand and use the client, review the https://hazelcast.github.io/hazelcast-csharp-client/versions.html[.NET API documentation] to better understand what is possible. + +== Understanding the .NET Client + +The following section creates and starts a Hazelcast client with default configuration, connects to your Viridian cluster before shutting the client down at the end. + +Create a C# file named “Program.cs” and put the following code inside it: + +[source,cs] +---- +using Hazelcast; + +using System; + +namespace Client +{ + internal static class Program + { + public static async Task Main(string[] args) + { + // Create a client connection + var options = new HazelcastOptionsBuilder() + .With(config => + { + // Your Viridian cluster name. + config.ClusterName = ""; + + // Your discovery token to connect Viridian cluster. + config.Networking.Cloud.DiscoveryToken = ""; + + // Configure SSL. + config.Networking.Ssl.Enabled = true; + config.Networking.Ssl.ValidateCertificateChain = false; + config.Networking.Ssl.CertificatePath = "client.pfx"; + config.Networking.Ssl.CertificatePassword = ""; + }) + .With(args) // Pass command line args to the client + .Build(); + + await using var client = await HazelcastClientFactory.StartNewClientAsync(options); + + // take actions + } +} +---- + +== Understanding the Hazelcast SQL API + +Hazelcast SQL API is a Calcite SQL based interface to allow you to interact with Hazelcast much like any other datastore. + +In the following example, we will create a map and insert into it, entries where the keys are ids and the values are defined as an object representing a city. + +[source,cs] +---- +using Hazelcast; +using Hazelcast.Serialization.Compact; + +namespace Client +{ + internal class CityDTO + { + public string City { get; set; } + public string Country { get; set; } + public int Population { get; set; } + } + + internal class CitySerializer : ICompactSerializer + { + public string TypeName => "CityDTO"; + + public CityDTO Read(ICompactReader reader) + { + return new CityDTO() + { + City = reader.ReadString("city"), + Country = reader.ReadString("country"), + Population = reader.ReadInt32("population") + }; + } + + public void Write(ICompactWriter writer, CityDTO value) + { + writer.WriteString("city", value.City); + writer.WriteString("country", value.Country); + writer.WriteInt32("population", value.Population); + } + } + + internal static class Program + { + public static async Task Main(string[] args) + { + // Create a client connection + var options = new HazelcastOptionsBuilder() + .With(config => + { + // Your Viridian cluster name. + config.ClusterName = ""; + + // Your discovery token to connect Viridian cluster. + config.Networking.Cloud.DiscoveryToken = ""; + + // Configure SSL. + config.Networking.Ssl.Enabled = true; + config.Networking.Ssl.ValidateCertificateChain = false; + config.Networking.Ssl.CertificatePath = "client.pfx"; + config.Networking.Ssl.CertificatePassword = ""; + + // Register Compact serializer of City class. + config.Serialization.Compact.AddSerializer(new CitySerializer()); + }) + .With(args) // Pass command line args to the client + .Build(); + + // Connect to your Hazelcast Cluster + await using var client = await HazelcastClientFactory.StartNewClientAsync(options); + + // Create a map on the cluster + await CreateMapping(client); + + // Add some data + await PopulateCities(client); + + // Output the data + await FetchCities(client); + } + + private static async Task CreateMapping(IHazelcastClient client) + { + // Mapping is required for your distributed map to be queried over SQL. + // See: https://docs.hazelcast.com/hazelcast/latest/sql/mapping-to-maps + + Console.Write("\nCreating the mapping..."); + + var mappingCommand = @"CREATE OR REPLACE MAPPING + cities ( + __key INT, + country VARCHAR, + city VARCHAR, + population INT) TYPE IMAP + OPTIONS ( + 'keyFormat' = 'int', + 'valueFormat' = 'compact', + 'valueCompactTypeName' = 'CityDTO')"; + + await client.Sql.ExecuteCommandAsync(mappingCommand); + + Console.Write("OK."); + } + + private static async Task PopulateCities(IHazelcastClient client) + { + var deleteQuery = @"DELETE FROM cities"; + + var insertQuery = @"INSERT INTO cities + (__key, city, country, population) VALUES + (1, 'London', 'United Kingdom', 9540576), + (2, 'Manchester', 'United Kingdom', 2770434), + (3, 'New York', 'United States', 19223191), + (4, 'Los Angeles', 'United States', 3985520), + (5, 'Istanbul', 'Türkiye', 15636243), + (6, 'Ankara', 'Türkiye', 5309690), + (7, 'Sao Paulo ', 'Brazil', 22429800)"; + + try + { + Console.Write("\nInserting data..."); + await client.Sql.ExecuteCommandAsync(deleteQuery); + await client.Sql.ExecuteCommandAsync(insertQuery); + } + catch (Exception ex) + { + Console.WriteLine("FAILED. "+ex.ToString()); + } + + Console.Write("OK."); + } + + private static async Task FetchCities(IHazelcastClient client) + { + Console.Write("\nFetching cities..."); + + await using var result = await client.Sql.ExecuteQueryAsync("SELECT __key, this FROM cities"); + Console.Write("OK."); + Console.WriteLine("\n--Results of 'SELECT __key, this FROM cities'"); + Console.WriteLine(String.Format("| {0,4} | {1,20} | {2,20} | {3,15} |","id", "country", "city", "population")); + + await foreach (var row in result) + { + var id = row.GetKey(); // Corresponds to '__key' + var c = row.GetValue(); // Corresponds to 'this' + + Console.WriteLine(string.Format("| {0,4} | {1,20} | {2,20} | {3,15} |", + id, + c.Country, + c.City, + c.Population)); + } + } + } +} +---- + +The output of this code is given below: + +[source,bash] +---- +Creating the mapping...OK. +Inserting data...OK. +Fetching cities...OK. +--Results of 'SELECT __key, this FROM cities' +| id | country | city | population | +| 2 | United Kingdom | Manchester | 2770434 | +| 6 | Türkiye | Ankara | 5309690 | +| 1 | United Kingdom | London | 9540576 | +| 7 | Brazil | Sao Paulo | 22429800 | +| 4 | United States | Los Angeles | 3985520 | +| 5 | Türkiye | Istanbul | 15636243 | +| 3 | United States | New York | 19223191 | +---- + +NOTE: Ordering of the keys is NOT enforced and results may NOT correspond to insertion order. + +== Understanding the Hazelcast Map API + +A Hazelcast Map is a distributed key-value store, similar to C# dictionary. You can store key-value pairs in a Hazelcast Map. + +In the following example, we will work with map entries where the keys are ids and the values are defined as an object representing a city. + +[source,cs] +---- +using Hazelcast; + +namespace Client +{ + internal static class Program + { + public static async Task Main(string[] args) + { + // Create a client connection + var options = new HazelcastOptionsBuilder() + .With(config => + { + // Your Viridian cluster name. + config.ClusterName = ""; + + // Your discovery token to connect Viridian cluster. + config.Networking.Cloud.DiscoveryToken = ""; + + // Configure SSL. + config.Networking.Ssl.Enabled = true; + config.Networking.Ssl.ValidateCertificateChain = false; + config.Networking.Ssl.CertificatePath = "client.pfx"; + config.Networking.Ssl.CertificatePassword = ""; + }) + .With(args) // Pass command line args to the client + .Build(); + + await using var client = await HazelcastClientFactory.StartNewClientAsync(options); + + // Create a map on the cluster + await using var citiesMap = await client.GetMapAsync("cities"); + + // Add some data + await citiesMap.PutAsync(1, "London"); + await citiesMap.PutAsync(2, "New York"); + await citiesMap.PutAsync(3, "Tokyo"); + + // Output the data + var entries = citiesMap.GetEntriesAsync(); + + foreach (var entry in entries.Result) + { + Console.WriteLine($"{entry.Key} -> {entry.Value}"); + } + } +} +---- + +Following line returns a map proxy object for the `cities` map: + +[source,cs] +---- + // Create a map on the cluster + await using var citiesMap = await client.GetMapAsync("cities"); +---- + +If `cities` doesn't exist, it will be automatically created. All the clients connected to the same cluster will have access to the same map. + +With these lines, client adds data to the `cities` map. The first parameter is the key of the entry, the second one is the value. + +[source,cs] +---- + // Add some data + await citiesMap.PutAsync(1, "London"); + await citiesMap.PutAsync(2, "New York"); + await citiesMap.PutAsync(3, "Tokyo"); +---- + +Then, we get the data using the `GetEntriesAsync()` method and iterate over the results. + +[source,cs] +---- + // Output the data + var entries = citiesMap.GetEntriesAsync(); + + foreach (var entry in entries.Result) + { + Console.WriteLine($"{entry.Key} -> {entry.Value}"); + } +---- + +The output of this code is given below: + +[source,bash] +---- +2 -> New York +1 -> London +3 -> Tokyo +---- + +NOTE: Ordering of the keys is NOT enforced and results may NOT correspond to entry order. + + +== Summary + +In this tutorial, you learned how to get started with the Hazelcast .NET Client and put data into a distributed map. + +== See Also + +There are a lot of things that you can do with the .NET Client. For more, such as how you can query a map with predicates and SQL, +check out our https://github.com/hazelcast/hazelcast-csharp-client[.NET Client repository] and our https://hazelcast.github.io/hazelcast-csharp-client/versions.html[.NET API documentation] to better understand what is possible. + +If you have any questions, suggestions, or feedback please do not hesitate to reach out to us via https://slack.hazelcast.com/[Hazelcast Community Slack]. +Also, please take a look at https://github.com/hazelcast/hazelcast-csharp-client/issues[the issue list] if you would like to contribute to the client. + + diff --git a/docs/modules/tutorials/pages/go-client-getting-started.adoc b/docs/modules/tutorials/pages/go-client-getting-started.adoc new file mode 100644 index 000000000..43838499a --- /dev/null +++ b/docs/modules/tutorials/pages/go-client-getting-started.adoc @@ -0,0 +1,317 @@ += Getting Started with the Hazelcast Go Client +:description: This tutorial will get you started with the Hazelcast Go client and manipulate a map. + +== What You'll Learn + +{description} + +== Before you Begin + +* Go 1.15 or above +* https://hazelcast.com/products/viridian/[Hazelcast Viridian Cloud Account] +* A text editor or IDE + +== Start a Hazelcast Viridian Cloud Cluster + +1. Sign up for a Hazelcast Viridian Cloud account (free trial is available). +2. Log in to your Hazelcast Viridian Cloud account and start your trial by filling in the welcome questionnaire. +3. A Viridian cluster will be created automatically when you start your trial. +4. Press the Connect Cluster dialog and switch over to the Advanced setup tab for connection information needed below. +5. From the Advanced setup tab, download the keystore files and take note of your Cluster ID, Discovery Token and Password as you will need them later. + +== Setup a Hazelcast Client + +Create a new folder and navigate to it: + +[source] +---- +mkdir hazelcast-go-example +cd hazelcast-go-example +---- + +Initialize a new go module: + +[source,bash] +---- +go mod init example +---- + +Install Hazelcast Go client's latest version as a dependency: + +[source,bash] +---- +go get github.com/hazelcast/hazelcast-go-client@latest +---- + +Extract the keystore files you downloaded from Viridian into this directory. The files you need for this tutorial are: + +[source,bash] +---- +ca.pem +cert.pem +key.pem +---- + +== Understanding the Go Client + +The following section creates and starts a Hazelcast client with default configuration, connects to your Viridian cluster before shutting the client down at the end. + +Create a Go file named “example.go” and put the following code inside it: + +[source,go] +---- +package main + +import ( + "context" + "fmt" + + "github.com/hazelcast/hazelcast-go-client" +) + +func main() { + + // Connection details for cluster + config := hazelcast.Config{} + config.Cluster.Name = "" + + config.Cluster.Cloud.Enabled = true + config.Cluster.Cloud.Token = "" + + config.Cluster.Network.SSL.SetCAPath("ca.pem") + config.Cluster.Network.SSL.AddClientCertAndEncryptedKeyPath("cert.pem", "key.pem", "") + + // create the client and connect to the cluster + client, err := hazelcast.StartNewClientWithConfig(context.TODO(), config) + // error checking is omitted for brevity + + fmt.Println("Welcome to your Hazelcast Viridian Cluster!") + + defer client.Shutdown(context.TODO()) + + if err != nil { + panic(err) + } +} +---- + +To run this Go script, use the following command: + +[source,bash] +---- +go run example.go +---- + +== Understanding the Hazelcast SQL API + +Hazelcast SQL API is a Calcite SQL based interface to allow you to interact with Hazelcast much like any other datastore. + +In the following example, we will create a map and insert into it, entries where the keys are ids and the values are defined as an object representing a city. + +[source,go] +---- +package main + +import ( + "context" + "fmt" + "reflect" + + "github.com/hazelcast/hazelcast-go-client" + "github.com/hazelcast/hazelcast-go-client/logger" + "github.com/hazelcast/hazelcast-go-client/serialization" +) + +type CityDTO struct { + city string + country string + population int32 +} + +type CitySerializer struct{} + +func (s CitySerializer) Type() reflect.Type { + return reflect.TypeOf(CityDTO{}) +} + +func (s CitySerializer) TypeName() string { + return "CityDTO" +} + +func (s CitySerializer) Write(writer serialization.CompactWriter, value interface{}) { + city := value.(CityDTO) + + writer.WriteString("City", &city.city) + writer.WriteString("Country", &city.country) + writer.WriteInt32("Population", city.population) +} + +func (s CitySerializer) Read(reader serialization.CompactReader) interface{} { + return CityDTO{ + city: *reader.ReadString("city"), + country: *reader.ReadString("country"), + population: reader.ReadInt32("population"), + } +} + +func createMapping(ctx context.Context, client hazelcast.Client) error { + fmt.Println("Creating the mapping...") + + // Mapping is required for your distributed map to be queried over SQL. + // See: https://docs.hazelcast.com/hazelcast/latest/sql/mapping-to-maps + mappingQuery := ` + CREATE OR REPLACE MAPPING + cities ( + __key INT, + country VARCHAR, + city VARCHAR, + population INT) TYPE IMAP + OPTIONS ( + 'keyFormat' = 'int', + 'valueFormat' = 'compact', + 'valueCompactTypeName' = 'CityDTO') + ` + + _, err := client.SQL().Execute(ctx, mappingQuery) + if err != nil { + return err + } + + fmt.Println("OK.\n") + return nil +} + +func populateCities(ctx context.Context, client hazelcast.Client) error { + fmt.Println("Inserting data...") + + // Mapping is required for your distributed map to be queried over SQL. + // See: https://docs.hazelcast.com/hazelcast/latest/sql/mapping-to-maps + insertQuery := ` + INSERT INTO cities + (__key, city, country, population) VALUES + (1, 'London', 'United Kingdom', 9540576), + (2, 'Manchester', 'United Kingdom', 2770434), + (3, 'New York', 'United States', 19223191), + (4, 'Los Angeles', 'United States', 3985520), + (5, 'Istanbul', 'Türkiye', 15636243), + (6, 'Ankara', 'Türkiye', 5309690), + (7, 'Sao Paulo ', 'Brazil', 22429800) + ` + + _, err := client.SQL().Execute(ctx, "DELETE from cities") + if err != nil { + return err + } + _, err = client.SQL().Execute(ctx, insertQuery) + if err != nil { + return err + } + + fmt.Println("OK.\n") + return nil +} + +func fetchCities(ctx context.Context, client hazelcast.Client) error { + fmt.Println("Fetching cities...") + + result, err := client.SQL().Execute(ctx, "SELECT __key, this FROM cities") + if err != nil { + return err + } + defer result.Close() + + fmt.Println("OK.") + fmt.Println("--Results of SELECT __key, this FROM cities") + fmt.Printf("| %4s | %20s | %20s | %15s |\n", "id", "country", "city", "population") + + iter, err := result.Iterator() + for iter.HasNext() { + row, err := iter.Next() + + key, err := row.Get(0) + cityDTO, err := row.Get(1) + + fmt.Printf("| %4d | %20s | %20s | %15d |\n", key.(int32), cityDTO.(CityDTO).country, cityDTO.(CityDTO).city, cityDTO.(CityDTO).population) + + if err != nil { + return err + } + } + + fmt.Println("\n!! Hint !! You can execute your SQL queries on your Viridian cluster over the management center. \n 1. Go to 'Management Center' of your Hazelcast Viridian cluster. \n 2. Open the 'SQL Browser'. \n 3. Try to execute 'SELECT * FROM cities'.") + return nil +} + +/////////////////////////////////////////////////////// + +func main() { + + // Connection details for cluster + config := hazelcast.Config{} + config.Cluster.Name = "" + + config.Cluster.Cloud.Enabled = true + config.Cluster.Cloud.Token = "" + + config.Cluster.Network.SSL.SetCAPath("ca.pem") + config.Cluster.Network.SSL.AddClientCertAndEncryptedKeyPath("cert.pem", "key.pem", "") + + // Register Compact Serializers + config.Serialization.Compact.SetSerializers(CitySerializer{}) + + // Other environment propreties + config.Logger.Level = logger.FatalLevel + + ctx := context.TODO() + // create the client and connect to the cluster + client, err := hazelcast.StartNewClientWithConfig(ctx, config) + if err != nil { + panic(err) + } + + // + if err := createMapping(ctx, *client); err != nil { + panic(fmt.Errorf("creating mapping: %w", err)) + } + if err := populateCities(ctx, *client); err != nil { + panic(fmt.Errorf("populating cities: %w", err)) + } + if err := fetchCities(ctx, *client); err != nil { + panic(fmt.Errorf("fetching cities: %w", err)) + } + + if err := client.Shutdown(ctx); err != nil { + panic(err) + } +} +---- + +The output of this code is given below: + +[source,bash] +---- +Creating the mapping...OK. +Inserting data...OK. +Fetching cities...OK. +--Results of 'SELECT __key, this FROM cities' +| id | country | city | population | +| 2 | United Kingdom | Manchester | 2770434 | +| 6 | Türkiye | Ankara | 5309690 | +| 1 | United Kingdom | London | 9540576 | +| 7 | Brazil | Sao Paulo | 22429800 | +| 4 | United States | Los Angeles | 3985520 | +| 5 | Türkiye | Istanbul | 15636243 | +| 3 | United States | New York | 19223191 | +---- + +== Summary + +In this tutorial, you learned how to get started with the Hazelcast Go Client, connect to a Viridian instance and put data into a distributed map. + +== See Also + +There are a lot of things that you can do with the Go Client. For more, such as how you can query a map with predicates and SQL, +check out our https://github.com/hazelcast/hazelcast-go-client[Go Client repository] and our https://pkg.go.dev/github.com/hazelcast/hazelcast-go-client[Go API documentation] to better understand what is possible. + +If you have any questions, suggestions, or feedback please do not hesitate to reach out to us via https://slack.hazelcast.com/[Hazelcast Community Slack]. +Also, please take a look at https://github.com/hazelcast/hazelcast-go-client/issues[the issue list] if you would like to contribute to the client. diff --git a/docs/modules/tutorials/pages/hazelcast-platform-operator-expose-externally.adoc b/docs/modules/tutorials/pages/hazelcast-platform-operator-expose-externally.adoc new file mode 100644 index 000000000..07c9c707a --- /dev/null +++ b/docs/modules/tutorials/pages/hazelcast-platform-operator-expose-externally.adoc @@ -0,0 +1,574 @@ += Connect to Hazelcast from Outside Kubernetes +:description: In this tutorial, you'll connect to a Hazelcast cluster running in Kubernetes from outside of the Kubernetes environment. + +== What You’ll Learn + +{description} + +== Before you Begin + +* Up and running https://kubernetes.io/[Kubernetes] cluster +* Kubernetes command-line tool, https://kubernetes.io/docs/tasks/tools/#kubectl[kubectl] +* Deployed xref:operator:ROOT:get-started.adoc[Hazelcast Platform Operator] + +WARNING: This tutorial uses LoadBalancer services to connect to Hazelcast from outside of the Kubernetes cluster. Therefore, it is essential to ensure that your Kubernetes cluster can assign public IPs to LoadBalancer services. This is particularly important if you are using a local Kubernetes cluster such as Minikube or Kind. + +== Introduction + +There are two available options for Expose Externally feature of Hazelcast Platform Operator: + +- *Unisocket* - client requests are load balanced between Hazelcast members. + +- *Smart* - client connects to all members and sends requests directly to the members owning the data. + +Let’s see both approaches. + +== Unisocket + +The first option is to use the `Unisocket` type. This option will use the standard Kubernetes mechanism that automatically load balances the traffic to Hazelcast members. + +.Hazelcast Unisocket Client +image::unisocket.jpg[Hazelcast Unisocket Client] + +=== Start the Hazelcast Cluster + +Run the following command to create the Hazelcast cluster with Expose Externally feature enabled using Unisocket type. + +[source, shell] +---- +kubectl apply -f - < +---- +-- + +Java:: ++ +-- +[source, java] +---- +include::ROOT:example$/operator-expose-externally/java-unisocket/src/main/java/com/hazelcast/Main.java[] +---- +-- + +NodeJS:: ++ +-- +[source, javascript] +---- +include::ROOT:example$/operator-expose-externally/nodejs-unisocket/client.js[] +---- +-- + +Go:: ++ +-- +[source, go] +---- +include::ROOT:example$/operator-expose-externally/go-unisocket/main.go[] +---- +-- + +Python:: ++ +-- +[source, python] +---- +include::ROOT:example$/operator-expose-externally/python-unisocket/main.py[] +---- +-- +.NET:: ++ +-- +[source, cs] +---- +include::ROOT:example$/operator-expose-externally/dotnet-unisocket/csharp_example.cs[] +---- +-- +==== + +Now you can start the application. + +[tabs] +==== + +CLC:: ++ +-- +Run the following command to fill a map. + +[source, bash] +---- +for i in {1..10}; +do + clc -c hz map set --name map1 key-$i value-$i; +done +---- + +Run the following command to check the map size. + +[source, bash] +---- +clc -c hz map size --name map1 +---- +-- + +Java:: ++ +-- +[source, bash] +---- +cd java-unisocket +mvn package +java -jar target/*jar-with-dependencies*.jar +---- + +You should see the following output. + +[source, shell] +---- +Successful connection! +Starting to fill the map with random entries. +Current map size: 2 +Current map size: 3 +Current map size: 4 +.... +.... +---- +-- + +NodeJS:: ++ +-- +[source, bash] +---- +cd nodejs-unisocket +npm install +npm start +---- + +You should see the following output. + +[source, shell] +---- +Successful connection! +Starting to fill the map with random entries. +Current map size: 2 +Current map size: 3 +Current map size: 4 +.... +.... +---- +-- + +Go:: ++ +-- +[source, bash] +---- +cd go-unisocket +go run main.go +---- + +You should see the following output. + +[source, shell] +---- +Successful connection! +Starting to fill the map with random entries. +Current map size: 2 +Current map size: 3 +Current map size: 4 +.... +.... +---- +-- + +Python:: ++ +-- +[source, bash] +---- +cd python-unisocket +pip install -r requirements.txt +python main.py +---- + +You should see the following output. + +[source, shell] +---- +Successful connection! +Starting to fill the map with random entries. +Current map size: 2 +Current map size: 3 +Current map size: 4 +.... +.... +---- +-- +.NET:: ++ +-- +[source, bash] +---- +cd dotnet-unisocket +dotnet build +dotnet run +---- + +You should see the following output. + +[source, shell] +---- +Successful connection! +Starting to fill the map with random entries. +Current map size: 2 +Current map size: 3 +Current map size: 4 +.... +.... +---- +-- + +==== + +== Smart Client + +The second option is to use the `Smart` type. With this option, each Hazelcast member will be exposed with its own service (it can be either `LoadBalancer` or `NodePort`). Hazelcast smart client is capable of mapping the given key with its owner member, which means that it sends the data directly to the member which contains the right data partition. + +.Hazelcast Smart Client +image::smart.jpg[Hazelcast Smart Client] + +=== Start the Hazelcast Cluster + +Run the following command to create the Hazelcast cluster with Expose Externally feature enabled using Smart type. + +[source, shell] +---- +kubectl apply -f - < +---- +-- + +Java:: ++ +-- +[source, java] +---- +include::ROOT:example$/operator-expose-externally/java/src/main/java/com/hazelcast/Main.java[] +---- +-- + +NodeJS:: ++ +-- +[source, javascript] +---- +include::ROOT:example$/operator-expose-externally/nodejs/client.js[] +---- +-- + +Go:: ++ +-- +[source, go] +---- +include::ROOT:example$/operator-expose-externally/go/main.go[] +---- +-- + +Python:: ++ +-- +[source, python] +---- +include::ROOT:example$/operator-expose-externally/python/main.py[] +---- +-- +.Net:: ++ +-- +[source, cs] +---- +include::ROOT:example$/operator-expose-externally/dotnet-unisocket/csharp_example.cs[] +---- +-- +==== + + +Now you can start the application. + +[tabs] +==== + +CLC:: ++ +-- +Run the following command to fill a map. + +[source, bash] +---- +for i in {1..10}; +do + clc -c hz map set --name map1 key-$i value-$i; +done +---- + +Run the following command to check the map size. + +[source, bash] +---- +clc -c hz map size --name map1 +---- +-- + +Java:: ++ +-- +[source, bash] +---- +cd java +mvn package +java -jar target/*jar-with-dependencies*.jar +---- + +You should see the following output. + +[source, shell] +---- +Successful connection! +Starting to fill the map with random entries. +Current map size: 2 +Current map size: 3 +Current map size: 4 +.... +.... +---- +-- + +NodeJS:: ++ +-- +[source, bash] +---- +cd nodejs +npm install +npm start +---- + +You should see the following output. + +[source, shell] +---- +Successful connection! +Starting to fill the map with random entries. +Current map size: 2 +Current map size: 3 +Current map size: 4 +.... +.... +---- +-- + +Go:: ++ +-- +[source, bash] +---- +cd go +go run main.go +---- + +You should see the following output. + +[source, shell] +---- +Successful connection! +Starting to fill the map with random entries. +Current map size: 2 +Current map size: 3 +Current map size: 4 +.... +.... +---- +-- + +Python:: ++ +-- +[source, bash] +---- +cd python +pip install -r requirements.txt +python main.py +---- + +You should see the following output. + +[source, shell] +---- +Successful connection! +Starting to fill the map with random entries. +Current map size: 2 +Current map size: 3 +Current map size: 4 +.... +.... +---- +-- +.NET:: ++ +-- +[source, bash] +---- +cd dotnet +dotnet build +dotnet run +---- + +You should see the following output. + +[source, shell] +---- +Successful connection! +Starting to fill the map with random entries. +Current map size: 2 +Current map size: 3 +Current map size: 4 +.... +.... +---- +-- + +==== + +== Clean Up + +To clean up the created resources remove the Hazelcast Custom Resource. + +[source, shell] +---- +kubectl delete hazelcast my-hazelcast +---- + +== See Also + +- xref:operator:ROOT:index.adoc[] +- xref:kubernetes-external-client.adoc[] diff --git a/docs/modules/tutorials/pages/java-client-getting-started.adoc b/docs/modules/tutorials/pages/java-client-getting-started.adoc new file mode 100644 index 000000000..2e95fa8bb --- /dev/null +++ b/docs/modules/tutorials/pages/java-client-getting-started.adoc @@ -0,0 +1,435 @@ += Getting Started with the Hazelcast Java Client +:description: This tutorial will get you started with the Hazelcast Java client and manipulate a map. + +== What You'll Learn + +{description} + +== Before you Begin + +Before starting this tutorial, make sure the following prerequisites are met: + +* JDK 11.0 or above +* https://hazelcast.com/products/viridian/[Hazelcast Viridian Cloud Account] +* An IDE + +== Start a Hazelcast Viridian Cloud Cluster + +1. Sign up for a Hazelcast Viridian Cloud account (free trial is available). +2. Log in to your Hazelcast Viridian Cloud account and start your trial by filling in the welcome questionnaire. +3. A Viridian cluster will be created automatically when you start your trial. +4. Press the Connect Cluster dialog and switch over to the Advanced setup tab for connection information needed below. +5. From the Advanced setup tab, download the keystore files and take note of your Cluster ID, Discovery Token and Password as you will need them later. + +== Setup a Hazelcast Client + +Create a new folder and navigate to it: + +[source] +---- +mkdir hazelcast-java-example +cd hazelcast-java-example +---- + +Download the latest version of Hazelcast Enterprise zip slim from https://hazelcast.com/get-started/download/[here] and extract the Hazelcast Enterprise jar into this directory: + +[source] +---- +hazelcast-enterprise-5.3.1.jar +---- + +Extract the keystore files you downloaded from Viridian into this directory. The files you need for this tutorial are: + +[source,bash] +---- +client.keystore +client.pfx +client.truststore +---- + +To understand and use the client, review the https://docs.hazelcast.com/hazelcast/5.3/clients/java#hide-nav[Java client documentation] to better understand what is possible. + +== Understanding the Java Client + +The following section creates and starts a Hazelcast client with default configuration, connects to your Viridian cluster before shutting the client down at the end. + +Create a Java file named “Example.java” and put the following code inside it: + +[source,java] +---- +import java.util.Properties; + +import com.hazelcast.client.HazelcastClient; +import com.hazelcast.client.config.ClientConfig; +import com.hazelcast.config.SSLConfig; +import com.hazelcast.core.HazelcastInstance; + +public class Example { + + public static void main(String[] args) throws Exception { + ClientConfig config = new ClientConfig(); + + // Your Viridian cluster name. + config.setClusterName(""); + + // Your discovery token to connect Viridian cluster. + config.getNetworkConfig().getCloudConfig() + .setDiscoveryToken("") + .setEnabled(true); + + // Configure SSL + ClassLoader classLoader = ClientWithSsl.class.getClassLoader(); + Properties props = new Properties(); + props.setProperty("javax.net.ssl.keyStore", classLoader.getResource("client.keystore").toURI().getPath()); + props.setProperty("javax.net.ssl.keyStorePassword", ""); + props.setProperty("javax.net.ssl.trustStore", + classLoader.getResource("client.truststore").toURI().getPath()); + props.setProperty("javax.net.ssl.trustStorePassword", ""); + config.getNetworkConfig().setSSLConfig(new SSLConfig().setEnabled(true).setProperties(props)); + + // Create client + HazelcastInstance client = HazelcastClient.newHazelcastClient(config); + + System.out.println("Welcome to your Hazelcast Viridian Cluster!") + + client.shutdown(); + } +} +---- + +== Understanding the Hazelcast SQL API + +Hazelcast SQL API is a Calcite SQL based interface to allow you to interact with Hazelcast much like any other datastore. + +In the following example, we will create a map and insert into it, entries where the keys are ids and the values are defined as an object representing a city. + +[source,java] +---- +import java.util.Properties; + +import com.hazelcast.client.HazelcastClient; +import com.hazelcast.client.config.ClientConfig; +import com.hazelcast.config.SSLConfig; +import com.hazelcast.core.HazelcastInstance; + +import com.hazelcast.nio.serialization.compact.CompactReader; +import com.hazelcast.nio.serialization.compact.CompactSerializer; +import com.hazelcast.nio.serialization.compact.CompactWriter; +import com.hazelcast.sql.SqlResult; +import com.hazelcast.sql.SqlRow; +import com.hazelcast.sql.SqlService; + +public class Example { + + public final class CityDTO { + + private final String country; + + private final String city; + + private final int population; + + public CityDTO(String country, String city, int population) { + this.country = country; + this.city = city; + this.population = population; + } + + public String getCountry() { + return country; + } + + public String getCity() { + return city; + } + + public int getPopulation() { + return population; + } + } + + public final class CitySerializer implements CompactSerializer { + @Override + public CityDTO read(CompactReader compactReader) { + return new CityDTO(compactReader.readString("country"), + compactReader.readString("city"), + compactReader.readInt32("population")); + } + + @Override + public void write(CompactWriter compactWriter, CityDTO city) { + compactWriter.writeString("country", city.getCountry()); + compactWriter.writeString("city", city.getCity()); + compactWriter.writeInt32("population", city.getPopulation()); + } + + @Override + public String getTypeName() { + return "CityDTO"; + } + + @Override + public Class getCompactClass() { + return CityDTO.class; + } + } + + public static void main(String[] args) throws Exception { + ClientConfig config = new ClientConfig(); + + // Connection details for cluster + config.setClusterName(""); + + config.getNetworkConfig().getCloudConfig() + .setDiscoveryToken("") + .setEnabled(true); + + ClassLoader classLoader = Example.class.getClassLoader(); + Properties props = new Properties(); + props.setProperty("javax.net.ssl.keyStore", classLoader.getResource("client.keystore").toURI().getPath()); + props.setProperty("javax.net.ssl.keyStorePassword", ""); + props.setProperty("javax.net.ssl.trustStore", classLoader.getResource("client.truststore").toURI().getPath()); + props.setProperty("javax.net.ssl.trustStorePassword", ""); + config.getNetworkConfig().setSSLConfig(new SSLConfig().setEnabled(true).setProperties(props)); + + // Register Compact Serializers + config.getSerializationConfig().getCompactSerializationConfig() + .addSerializer(new Example().new CitySerializer()); + + // Connect to your Hazelcast Cluster + HazelcastInstance client = HazelcastClient.newHazelcastClient(config); + + try { + // Create a map on the cluster + createMapping(client.getSql()); + + // Add some data + insertCities(client); + + // Output the data + fetchCities(client.getSql()); + + } finally { + client.shutdown(); + } + } + + private static void createMapping(SqlService sqlService) { + // See: https://docs.hazelcast.com/hazelcast/latest/sql/mapping-to-maps#compact-objects + System.out.print("\nCreating mapping..."); + + String mappingSql = "" + + "CREATE OR REPLACE MAPPING cities(" + + " __key INT," + + " country VARCHAR," + + " city VARCHAR," + + " population INT" + + ") TYPE IMap" + + " OPTIONS (" + + " 'keyFormat' = 'int'," + + " 'valueFormat' = 'compact'," + + " 'valueCompactTypeName' = 'Example$CityDTO'" + + " )"; + + try (SqlResult ignored = sqlService.execute(mappingSql)) { + System.out.print("OK."); + } catch (Exception ex) { + System.out.print("FAILED. " + ex.getMessage()); + } + } + + private static void insertCities(HazelcastInstance client) { + try { + String deleteQuery = "DELETE from cities"; + + String insertQuery = "INSERT INTO cities " + + "(__key, city, country, population) VALUES" + + "(1, 'London', 'United Kingdom', 9540576)," + + "(2, 'Manchester', 'United Kingdom', 2770434)," + + "(3, 'New York', 'United States', 19223191)," + + "(4, 'Los Angeles', 'United States', 3985520)," + + "(5, 'Istanbul', 'Türkiye', 15636243)," + + "(6, 'Ankara', 'Türkiye', 5309690)," + + "(7, 'Sao Paulo ', 'Brazil', 22429800)"; + + System.out.print("\nInserting data..."); + client.getSql().execute(deleteQuery); + client.getSql().execute(insertQuery); + System.out.print("OK."); + } catch (Exception ex) { + System.out.print("FAILED. " + ex.getMessage()); + } + } + + private static void fetchCities(SqlService sqlService) { + System.out.print("\nFetching cities..."); + + try (SqlResult result = sqlService.execute("SELECT __key, this FROM cities")) { + System.out.print("OK.\n"); + System.out.println("--Results of 'SELECT __key, this FROM cities'"); + + System.out.printf("%4s | %20s | %20s | %15s |%n", "id", "country", "city", "population"); + for (SqlRow row : result) { + int id = row.getObject("__key"); + CityDTO cityDTO = row.getObject("this"); + System.out.printf("%4s | %20s | %20s | %15s |%n", + id, + cityDTO.getCountry(), + cityDTO.getCity(), + cityDTO.getPopulation() + ); + } + } catch (Exception ex) { + System.out.print("FAILED. " + ex.getMessage()); + } + } +} +---- + +The output of this code is given below: + +[source,bash] +---- +Creating the mapping...OK. +Inserting data...OK. +Fetching cities...OK. +--Results of 'SELECT __key, this FROM cities' +| id | country | city | population | +| 2 | United Kingdom | Manchester | 2770434 | +| 6 | Türkiye | Ankara | 5309690 | +| 1 | United Kingdom | London | 9540576 | +| 7 | Brazil | Sao Paulo | 22429800 | +| 4 | United States | Los Angeles | 3985520 | +| 5 | Türkiye | Istanbul | 15636243 | +| 3 | United States | New York | 19223191 | +---- + +NOTE: Ordering of the keys is NOT enforced and results may NOT correspond to insertion order. + +== Understanding the Hazelcast IMap API + +A Hazelcast Map is a distributed key-value store, similar to Python dictionary. You can store key-value pairs in a Hazelcast Map. + +In the following example, we will work with map entries where the keys are ids and the values are defined as a string representing a city name. + +[source,java] +---- +import java.util.Map; +import java.util.Properties; +import java.util.Set; + +import com.hazelcast.client.HazelcastClient; +import com.hazelcast.client.config.ClientConfig; +import com.hazelcast.config.SSLConfig; +import com.hazelcast.core.HazelcastInstance; +import com.hazelcast.map.IMap; + +public class Example { + + public static void main(String[] args) throws Exception { + ClientConfig config = new ClientConfig(); + + // Your Viridian cluster name. + config.setClusterName(""); + + // Your discovery token to connect Viridian cluster. + config.getNetworkConfig().getCloudConfig() + .setDiscoveryToken("") + .setEnabled(true); + + // Configure SSL + ClassLoader classLoader = ClientWithSsl.class.getClassLoader(); + Properties props = new Properties(); + props.setProperty("javax.net.ssl.keyStore", classLoader.getResource("client.keystore").toURI().getPath()); + props.setProperty("javax.net.ssl.keyStorePassword", ""); + props.setProperty("javax.net.ssl.trustStore", classLoader.getResource("client.truststore").toURI().getPath()); + props.setProperty("javax.net.ssl.trustStorePassword", ""); + config.getNetworkConfig().setSSLConfig(new SSLConfig().setEnabled(true).setProperties(props)); + + // Create client + HazelcastInstance client = HazelcastClient.newHazelcastClient(config); + + try { + // Create a map on the cluster + IMap citiesMap = client.getMap("cities"); + + // Clear the map + citiesMap.clear(); + + // Add some data + citiesMap.put(1, "London"); + citiesMap.put(2, "New York"); + citiesMap.put(3, "Tokyo"); + + // Output the data + Set> entries = citiesMap.entrySet(); + + for (Map.Entry entry : entries) + { + System.out.println(entry.getKey() + " -> " + entry.getValue() ); + } + } finally { + client.shutdown(); + } + } +} +---- + +Following line returns a map proxy object for the `cities` map: + +[source,java] +---- + // Create a map on the cluster + IMap citiesMap = client.getMap("cities"); +---- + +If `cities` doesn't exist, it will be automatically created. All the clients connected to the same cluster will have access to the same map. + +With these lines, client adds data to the `cities` map. The first parameter is the key of the entry, the second one is the value. + +[source,java] +---- + // Add some data + citiesMap.put(1, "London"); + citiesMap.put(2, "New York"); + citiesMap.put(3, "Tokyo"); +---- + +Then, we get the data using the `entrySet()` method and iterate over the results. + +[source,java] +---- + // Output the data + Set> entries = citiesMap.entrySet(); + + for (Map.Entry entry : entries) + { + System.out.println(entry.getKey() + " -> " + entry.getValue() ); + } +---- + +The output of this code is given below: + +[source,bash] +---- +2 -> New York +1 -> London +3 -> Tokyo +---- + +NOTE: Ordering of the keys is NOT enforced and results may NOT correspond to entry order. + + +== Summary + +In this tutorial, you learned how to get started with the Hazelcast Java Client, connect to a Viridian instance and put data into a distributed map. + +== See Also + +There are a lot of things that you can do with the Java Client. For more, such as how you can query a map with predicates and SQL, +check out our https://github.com/hazelcast/hazelcast[Hazelcast repository] and our https://docs.hazelcast.com/hazelcast/5.3/clients/java#hide-nav[Java client documentation] to better understand what is possible. + +If you have any questions, suggestions, or feedback please do not hesitate to reach out to us via https://slack.hazelcast.com/[Hazelcast Community Slack]. +Also, please take a look at https://github.com/hazelcast/hazelcast/issues[the issue list] if you would like to contribute to the client. + + diff --git a/docs/modules/tutorials/pages/join-two-streams.adoc b/docs/modules/tutorials/pages/join-two-streams.adoc new file mode 100644 index 000000000..6f442afa8 --- /dev/null +++ b/docs/modules/tutorials/pages/join-two-streams.adoc @@ -0,0 +1,361 @@ += Stream-To-Stream Joins with SQL +:description: Learn how to join two streams of data and process the results, using SQL. + +{description} + +[.interactive-button] +link:https://gitpod.io/#https://github.com/hazelcast-guides/stream-to-stream-joins[Try it in your browser,window=_blank] + +== Context + +A stream of data is an ongoing delivery of data events. These events are often of the same type. For example, a stream may contain click events on a website. + +If you have two or more streams of related data, you can join them together on a related field, process them, and store the result. + +== Example Use Case + +You have two streams. One stream contains events for new orders and the other contains events for shipped orders. You need to join these two streams to find out which orders have been successfully shipped within seven days, and from which warehouse they were shipped. + +== Before you Begin + +Before starting this tutorial, make sure that you have the following prerequisites: + +* link:https://docs.docker.com/compose/install/[Docker Compose] +* link:https://git-scm.com/book/en/v2/Getting-Started-Installing-Git[Git] + +== Step 1. Clone the Project + +To set up the project, you need to download the code from GitHub. + +. Clone the GitHub repository. ++ +[tabs] +==== +HTTPS:: ++ +-- +```bash +git clone https://github.com/hazelcast-guides/stream-to-stream-joins.git +cd stream-to-stream-joins +``` +-- +SSH:: ++ +-- +```bash +git clone git@github.com:hazelcast-guides/stream-to-stream-joins.git +cd stream-to-stream-joins +``` +-- +==== + +== Step 2. Start the Docker Containers + +In this step, you'll use Docker Compose to start all the Docker containers, including a Kafka broker, Hazelcast Platform, and Management Center. + +```bash +docker compose up -d +``` + +You should see the following: + +``` +[+] Running 4/4 + ⠿ Container zookeeper Started 0.7s + ⠿ Container management-center Started 0.6s + ⠿ Container hazelcast Started 0.7s + ⠿ Container broker Started 1.2s +``` + +The Docker containers are running in detached mode. You can see that they are running, using the following command: + +``` +docker ps +``` + +To see the logs of your Hazelcast member, use the following command: + +``` +docker logs hazelcast +``` + +You should see that you have a single member running in the cluster. + +``` +Members {size:1, ver:1} [ + Member [172.19.0.4]:5701 - 15116025-342b-43c0-83f7-a2a90f0281ce this +] +``` + +== Step 3. Create Two Kafka Topics + +To create the Kafka topics, you'll use the `kafka-console-producer` script that's built into the Kafka broker. + +. Create the `orders` topic and add some records to it. ++ +```bash +docker exec -i broker kafka-console-producer --broker-list broker:9092 --topic orders < orders.jsonl +``` + +. Create the `shipments` topic and add some records to it. ++ +```bash +docker exec -i broker kafka-console-producer --broker-list broker:9092 --topic shipments < shipments.jsonl +``` + +== Step 4. Create Mappings to the Kafka Topics + +In this step, you'll use the SQL shell in Hazelcast to create a mapping to the Kafka topics. With this mapping, Hazelcast will be able to receive the event streams. + +. Open the SQL shell. ++ +```bash +docker exec -it hazelcast hz-cli sql +``` + +. Create a mapping to the `orders` topic. ++ +```sql +CREATE OR REPLACE MAPPING orders( + id INT, + order_ts TIMESTAMP WITH TIME ZONE, + total_amount DOUBLE, + customer_name VARCHAR) +TYPE Kafka +OPTIONS ( + 'keyFormat' = 'int', <1> + 'valueFormat' = 'json-flat', <2> + 'auto.offset.reset' = 'earliest', <3> + 'bootstrap.servers' = 'broker:9092'); <4> +``` ++ +<1> The kafka record key, which is the ID of the orders and shipments. +<2> Map the Kafka records to JSON, using the `json-flat` format. This format maps each top-level JSON field to its own column. +<3> Tell Hazelcast to read from the beginning of the topic so that you can read the values that you already added to it. +<4> The address of the Kafka broker that Hazelcast connects to. + +. Make sure that the mapping is correct by running a streaming query on the topic. ++ +```sql +SELECT * FROM orders; +``` ++ +``` ++------------+-------------------------+-------------------------+--------------------+ +| id|order_ts | total_amount|customer_name | ++------------+-------------------------+-------------------------+--------------------+ +| 1|2022-03-29T06:01:18Z | 133548.84|Amal | +| 2|2022-03-29T17:02:20Z | 164839.31|Alex | +| 3|2022-03-29T13:44:10Z | 90427.66|Hao | +| 4|2022-03-29T11:58:25Z | 33462.11|Cruz | +``` + +. Press kbd:[Ctrl+C] to exit the streaming query. + +. Create a mapping to the `shipments` topic. ++ +```sql +CREATE OR REPLACE MAPPING shipments( + id VARCHAR, + ship_ts TIMESTAMP WITH TIME ZONE, + order_id INT, + warehouse VARCHAR +) +TYPE Kafka +OPTIONS ( + 'keyFormat' = 'varchar', + 'valueFormat' = 'json-flat', + 'auto.offset.reset' = 'earliest', + 'bootstrap.servers' = 'broker:9092'); +``` + +. Make sure that the mapping is correct by running a streaming query on the topic. ++ +```sql +SELECT * FROM shipments; +``` ++ +``` ++--------------------+-------------------------+------------+--------------------+ +|id |ship_ts | order_id|warehouse | ++--------------------+-------------------------+------------+--------------------+ +|ship-ch83360 |2022-03-31T18:13:39Z | 1|UPS | +|ship-xf72808 |2022-03-31T02:04:13Z | 2|UPS | +|ship-kr47454 |2022-03-31T20:47:09Z | 3|DHL | +``` + +. Press kbd:[Ctrl+C] to exit the streaming query. + +== Step 5. Join the Two Streams + +In this step, you'll join the two streams to get insights about shipments that are sent within 7 days of the order. + +You can join streams in Hazelcast only on a table that defines a allowed lag for late events. Hazelcast drops events that are later than the defined lag and does not include them in the result set. + +. Drop late events when they are one minute or later behind the current latest event. ++ +```sql +CREATE OR REPLACE VIEW shipments_ordered AS + SELECT * FROM TABLE(IMPOSE_ORDER( + TABLE shipments, + DESCRIPTOR(ship_ts), <1> + INTERVAL '1' MINUTE)); <2> +``` ++ +```sql +CREATE OR REPLACE VIEW orders_ordered AS + SELECT * FROM TABLE(IMPOSE_ORDER( + TABLE orders, + DESCRIPTOR(order_ts), <1> + INTERVAL '1' MINUTE)); <2> +``` ++ +<1> The field that Hazelcast reads to compare to the lag. This field must be a timestamp. +<2> An allowed lag of one minute. + +. Join the two streams. This query finds orders that were shipped within 7 days of being placed. ++ +```sql +SELECT o.id AS order_id, + o.order_ts, + o.total_amount, + o.customer_name, + s.id AS shipment_id, + s.ship_ts, + s.warehouse +FROM orders_ordered o JOIN shipments_ordered s <1> +ON o.id = s.order_id AND s.ship_ts BETWEEN o.order_ts AND o.order_ts + INTERVAL '7' DAYS; <2> +``` ++ +<1> The inner join makes sure that results are output only for orders that have successfully shipped. The query must find a match on both sides of the join. +<2> A window duration of seven days ignores orders whose shipments don’t occur within 7 days of purchasing. Another added benefit of limiting this query to 7 days of data is that it limits the amount of memory that the query requires. + +``` ++------------+-------------------------+-------------------------+--------------------+--------------------+-------------------------+--------------------+ +| order_id|order_ts | total_amount|customer_name |shipment_id |ship_ts |warehouse | ++------------+-------------------------+-------------------------+--------------------+--------------------+-------------------------+--------------------+ +| 1|2022-03-29T06:01:18Z | 133548.84|Amal |ship-ch83360 |2022-03-31T18:13:39Z |UPS | +| 2|2022-03-29T17:02:20Z | 164839.31|Alex |ship-xf72808 |2022-03-31T02:04:13Z |UPS | +| 3|2022-03-29T13:44:10Z | 90427.66|Hao |ship-kr47454 |2022-03-31T20:47:09Z |DHL | +``` + +== Step 6. Create a Materialized View + +In this step, you'll define a job to run this streaming query in the background and store the results in a materialized view, using a Hazelcast map. + +. Create a mapping to a Hazelcast map called `orders_shipped_within_7_days`. ++ +```sql +CREATE OR REPLACE MAPPING orders_shipped_within_7_days( + __key INT, <1> + order_ts TIMESTAMP WITH TIME ZONE, <2> + total_amount DOUBLE, + customer_name VARCHAR, + shipment_id VARCHAR, + ship_ts TIMESTAMP WITH TIME ZONE, + warehouse VARCHAR +) +TYPE IMAP + OPTIONS ( + 'keyFormat' = 'int', <1> + 'valueFormat' = 'json-flat'); <2> +``` ++ +<1> The first column must be named `__key`. This column is mapped to the key of map entries. +<2> The other columns must appear in the same order as the streaming query results so that the data types are mapped correctly. + +. Create the job. ++ +```sql +CREATE JOB get_orders_shipped_within_7_days AS + SINK INTO orders_shipped_within_7_days <1> + SELECT o.id AS __key, <2> + o.order_ts, + o.total_amount, + o.customer_name, + s.id AS shipment_id, + s.ship_ts, + s.warehouse + FROM orders_ordered o JOIN shipments_ordered s <1> + ON o.id = s.order_id AND s.ship_ts BETWEEN o.order_ts AND o.order_ts + INTERVAL '7' DAYS; +``` ++ +<1> Insert the results into the `orders_shipped_within_7_days ` map. +<2> Make sure that the selected fields are in the same order as the mapping to the `orders_shipped_within_7_days ` map. + +. Query the map to make sure that the job is working. ++ +```sql +SELECT * FROM orders_shipped_within_7_days; +``` + +You should see the following: + +``` ++------------+-------------------------+-------------------------+--------------------+--------------------+-------------------------+--------------------+ +| __key|order_ts | total_amount|customer_name |shipment_id |ship_ts |warehouse | ++------------+-------------------------+-------------------------+--------------------+--------------------+-------------------------+--------------------+ +| 2|2022-03-29T17:02:20Z | 164839.31|Alex |ship-xf72808 |2022-03-31T02:04:13Z |UPS | +| 1|2022-03-29T06:01:18Z | 133548.84|Amal |ship-ch83360 |2022-03-31T18:13:39Z |UPS | +| 3|2022-03-29T13:44:10Z | 90427.66|Hao |ship-kr47454 |2022-03-31T20:47:09Z |DHL | ++------------+-------------------------+-------------------------+--------------------+--------------------+-------------------------+--------------------+ +``` + +If you left this query running, it would continue to add new results for orders shipped within 7 days. You can connect your applications to the Hazelcast cluster and query this map to get further insights. + +== Step 7. Clean Up + +Stop and remove your Docker containers. + +```bash +docker compose stop +docker compose rm +``` + +== Summary + +In this tutorial, you learned: + +- How to get deeper insights from two related streams by joining them together. +- How to run the streaming query in the background and store the results in a materialized view, using a job. + +== Next Steps + +.Run on Hazelcast {hazelcast-cloud} +[%collapsible] +==== +Learn how to xref:create-materialized-view-from-kafka.adoc[run streaming queries on Hazelcast {hazelcast-cloud}]. +==== + +.Persist mappings and maps +[%collapsible] +==== +By default, mappings and maps are not persisted. When you stop your cluster, all mappings and map data are deleted. +To persist this data, you can enable the xref:hazelcast:storage:persistence.adoc[Persistence] feature in the cluster configuration. Or, you can use Hazelcast {hazelcast-cloud}, which is persists this data by default. For an introduction to querying Kafka streams in Hazelcast {hazelcast-cloud}, see xref:tutorials:ROOT:create-materialized-view-from-kafka.adoc[Query Streams from Confluent Cloud]. +==== + +.Manage memory +[%collapsible] +==== +The materialized view would continue to store more and more results as new orders and shipment events are generated. To control the size of the map and the amount of memory it consumes, you can configure it with limits. See xref:hazelcast:data-structures:managing-map-memory.adoc[Managing Map Memory]. +==== + +.Manage jobs +[%collapsible] +==== +To manage your streaming job, see xref:hazelcast:pipelines:job-management.adoc[]. +==== + +.Explore Management Center +[%collapsible] +==== +To manage and monitor your cluster, you can use Management Center. This project runs Management Center at http://locahost:8080. See the xref:management-center:getting-started:overview.adoc[Management Center documentation] for details. +==== + +== See Also + +- xref:hazelcast:sql:sql-overview.adoc[SQL reference]. + +- xref:hazelcast:sql:querying-streams.adoc[]. + +- xref:hazelcast:sql:working-with-json.adoc[]. diff --git a/docs/modules/tutorials/pages/kubernetes.adoc b/docs/modules/tutorials/pages/kubernetes.adoc new file mode 100644 index 000000000..32903462d --- /dev/null +++ b/docs/modules/tutorials/pages/kubernetes.adoc @@ -0,0 +1,374 @@ += Deploy Hazelcast using Kubernetes +:description: In this tutorial, you will deploy a Hazelcast cluster to Kubernetes and connect it to a Kubernetes application. + +NOTE: Hazelcast now offers the Hazelcast Platform Operator. We recommend using this operator when working with Hazelcast in Kubernetes. For more info, see * link:https://guides.hazelcast.org/kubernetes-external-client/[Connect External Hazelcast Client to Kubernetes]. + +== What You’ll Learn + +{description} + +== Before you Begin + +* Up and running https://kubernetes.io/[Kubernetes] cluster (version 1.9 or higher) +* Kubernetes command line tool, https://kubernetes.io/docs/tasks/tools/install-kubectl/[kubectl] +* (optional) https://helm.sh/docs/intro/install/[Helm CLI] + +== Deploy Hazelcast cluster + +There are different ways of deploying Hazelcast to Kubernetes. For the production environment we recommend Hazelcast Platform Operator or Helm because these methods provide the complete Hazelcast experience, including the following aspects: + +* Automated deployment including the Hazelcast Management Center tool +* Automated lifecycle operations (scaling, upgrades) to avoid data loss +* Role-based access control + +[tabs] +==== + +Helm:: ++ +-- +[source, bash] +---- +helm repo add hazelcast https://hazelcast-charts.s3.amazonaws.com/ +helm repo update +helm install hz-hazelcast hazelcast/hazelcast +---- +-- + +Hazelcast Platform Operator:: ++ + +-- +[source, bash] +---- +helm repo add hazelcast https://hazelcast-charts.s3.amazonaws.com/ +helm repo update +helm install operator hazelcast/hazelcast-platform-operator --set installCRDs=true +cat << EOF | kubectl apply -f - +apiVersion: hazelcast.com/v1alpha1 +kind: Hazelcast +metadata: + name: hz-hazelcast +EOF +---- +-- + +Kubectl:: ++ + +-- +[source, bash] +---- +kubectl apply -f https://raw.githubusercontent.com/hazelcast/hazelcast/master/kubernetes-rbac.yaml + +kubectl run hz-hazelcast-0 --image=hazelcast/hazelcast:$HAZELCAST_VERSION -l "role=hazelcast" +kubectl run hz-hazelcast-1 --image=hazelcast/hazelcast:$HAZELCAST_VERSION -l "role=hazelcast" +kubectl run hz-hazelcast-2 --image=hazelcast/hazelcast:$HAZELCAST_VERSION -l "role=hazelcast" + +kubectl create service clusterip hz-hazelcast --tcp=5701 -o yaml --dry-run=client | kubectl set selector --local -f - "role=hazelcast" -o yaml | kubectl create -f - +---- +-- + +==== + +You can check that the Hazelcast cluster is up and running. + +[source, bash] +---- +kubectl logs hz-hazelcast-0 +... +Members {size:3, ver:3} [ + Member [10.216.6.7]:5701 - 6d2100e0-8dcf-4e7c-ab40-8e98e23475e3 this + Member [10.216.5.6]:5701 - 5ab4d554-fd7d-4929-8475-0ddf79a21076 + Member [10.216.8.6]:5701 - 7f7dd5f4-e732-4575-89d6-a6e823da38da +] +---- + +At this point you have started a Hazelcast cluster with 3 members. It is exposed with a service called `hz-hazelcast`. + +== Configure Hazelcast client + +Use Hazelcast client to connect to the running Hazelcast cluster from your applications. + +To use Hazelcast client, add Hazelcast client dependency to your application. + +[tabs] +==== + +Java:: ++ +-- +.pom.xml +[source, xml] +---- + + com.hazelcast + hazelcast + ${hazelcast.version} + +---- +-- + +NodeJS:: ++ +-- +[source, bash] +---- +npm install hazelcast-client +---- +-- + +Python:: ++ +-- +[source, bash] +---- +pip install hazelcast-python-client +---- +-- + +C++:: ++ +-- +[source, bash] +---- +curl -Lo hazelcast-cpp-client.tar.gz https://github.com/hazelcast/hazelcast-cpp-client/archive/v${VERSION}.tar.gz +tar xzf hazelcast-cpp-client.tar.gz +---- +-- + +Go:: ++ +-- +[source, bash] +---- +go get github.com/hazelcast/hazelcast-go-client@v1.0.0 +---- +-- + +==== + +Configure the Hazelcast client in your application to connect to the `hz-hazelcast` service. + +[tabs] +==== + +Java:: ++ +-- +[source, java] +---- +ClientConfig config = new ClientConfig(); +config.getNetworkConfig().addAddress("hz-hazelcast"); +---- +-- + +NodeJS:: ++ +-- +[source, javascript] +---- +const { Client } = require('hazelcast-client'); + +const clientConfig = { + network: { + clusterMembers: [ + 'hz-hazelcast' + ] + } +}; +const client = await Client.newHazelcastClient(clientConfig); +---- +-- + +Python:: ++ +-- +[source, python] +---- +import hazelcast + +client = hazelcast.HazelcastClient( + cluster_members=["hz-hazelcast"], +) +---- +-- + +C++:: ++ +-- +[source, cpp] +---- +#include + +hazelcast::client::client_config config; +config.get_network_config() + .add_address({"hz-hazelcast", 5701}) +auto hz = hazelcast::new_client(std::move(config)).get(); +---- +-- + +Go:: ++ +-- +[source, go] +---- +import ( + "log" + + "github.com/hazelcast/hazelcast-go-client" +) + +func main() { + config := hazelcast.Config{} + config.Cluster.Network.SetAddresses("hz-hazelcast:5701") + ctx := context.Background() + client, err := hazelcast.StartNewClientWithConfig(ctx, config) + if err != nil { + log.Fatal(err) + } +} +---- +-- + +==== + +Your application is now configured to automatically connect to the Hazelcast cluster once it's deployed to Kubernetes. + +== Deploy client application + +To deploy your application to Kubernetes cluster, you need first to dockerize it. + +[tabs] +==== + +Java:: ++ +-- +[source, bash] +---- +docker build -t hazelcastguides/hazelcast-client java +---- +-- + +NodeJS:: ++ +-- +[source, bash] +---- +docker build -t hazelcastguides/hazelcast-client nodejs +---- +-- + +Python:: ++ +-- +[source, bash] +---- +docker build -t hazelcastguides/hazelcast-client python +---- +-- + +C++:: ++ +-- +[source, bash] +---- +docker build -t hazelcastguides/hazelcast-client cpp +---- +-- + +Go:: ++ +-- +[source, bash] +---- +docker build -t hazelcastguides/hazelcast-client go +---- +-- + +==== + +[NOTE] +==== +If you use a remote Kubernetes cluster and you want to build your own Docker image then make sure that you also push your Docker image into the Docker registry. +==== + +[source, bash] +---- +kubectl run hazelcast-client --image=hazelcastguides/hazelcast-client +---- + +After a moment, check application logs to see it running in Kubernetes. + +[source, bash] +---- +kubectl logs hazelcast-client +... +Members {size:3, ver:3} [ + Member [10.216.6.7]:5701 - 6d2100e0-8dcf-4e7c-ab40-8e98e23475e3 this + Member [10.216.5.6]:5701 - 5ab4d554-fd7d-4929-8475-0ddf79a21076 + Member [10.216.8.6]:5701 - 7f7dd5f4-e732-4575-89d6-a6e823da38da +] +... +Successful connection! +Starting to fill the map with random entries. +Current map size: 71754 +Current map size: 71758 +Current map size: 71782 +Current map size: 71792 +... +---- + +To remove the client application, execute the following command. + +[source, bash] +---- +kubectl delete pod hazelcast-client +---- + +== Tear down Hazelcast cluster + +To delete Hazelcast cluster, run the following commands. + +[tabs] +==== + +Helm:: ++ +-- +[source, bash] +---- +helm uninstall hz-hazelcast +---- +-- + +Hazelcast Platform Operator:: ++ + +-- +[source, bash] +---- +kubectl delete hazelcast hz-hazelcast +helm uninstall operator +---- +-- + +Kubectl:: ++ + +-- +[source, bash] +---- +kubectl delete service hz-hazelcast +kubectl delete pod hz-hazelcast-0 hz-hazelcast-1 hz-hazelcast-2 +kubectl delete -f https://raw.githubusercontent.com/hazelcast/hazelcast/master/kubernetes-rbac.yaml +---- +-- +==== + +== See Also + +* link:https://guides.hazelcast.org/kubernetes-hpa/[Deploy Hazelcast Cluster with Kubernetes HPA] +* link:https://guides.hazelcast.org/kubernetes-external-client/[Connect External Hazelcast Client to Kubernetes] +* link:https://guides.hazelcast.org/kubernetes-sidecar/[Use Hazelcast as Sidecar Container] \ No newline at end of file diff --git a/docs/modules/tutorials/pages/nodejs-client-getting-started.adoc b/docs/modules/tutorials/pages/nodejs-client-getting-started.adoc new file mode 100644 index 000000000..828ea4159 --- /dev/null +++ b/docs/modules/tutorials/pages/nodejs-client-getting-started.adoc @@ -0,0 +1,565 @@ += Getting Started with the Hazelcast Node.js Client +:description: This tutorial will get you started with the Hazelcast Node.js client and manipulate a map. + +== What You'll Learn + +{description} + +== Before you Begin + +* Node.js 10.4 or above +* https://hazelcast.com/products/viridian/[Hazelcast Viridian Cloud Account] +* A text editor or IDE + +== Start a Hazelcast Viridian Cloud Cluster + +1. Sign up for a Hazelcast Viridian Cloud account (free trial is available). +2. Log in to your Hazelcast Viridian Cloud account and start your trial by filling in the welcome questionnaire. +3. A Viridian cluster will be created automatically when you start your trial. +4. Press the Connect Cluster dialog and switch over to the Advanced setup tab for connection information needed below. +5. From the Advanced setup tab, download the keystore files and take note of your Cluster ID, Discovery Token and Password as you will need them later. + +== Setup a Hazelcast Client + +Create a new folder and navigate to it: + +[source] +---- +mkdir hazelcast-nodejs-example +cd hazelcast-nodejs-example +---- + +Initialize a new npm package and choose default values when asked: + +[source,bash] +---- +npm init +---- + +Install Hazelcast Node.js client's latest version: + +[source,bash] +---- +npm install --save hazelcast-client +---- + +Extract the keystore files you downloaded from Viridian into this directory. The files you need for this tutorial are: + +[source,bash] +---- +ca.pem +cert.pem +key.pem +---- + +== Understanding the Node.js Client + +The following section creates and starts a Hazelcast client with default configuration, connects to your Viridian cluster before shutting the client down at the end. + +Create a JavaScript file named “index.js” and put the following code inside it: + +[source,javascript] +---- +'use strict'; + +const { Client } = require('hazelcast-client'); +const fs = require('fs'); +const path = require('path'); +const process = require('process'); +const sprintf= require('sprintf-js').sprintf; + +(async () => { + + const client = await Client.newHazelcastClient({ + clusterName: '', + + // Connection details for cluster + network: { + hazelcastCloud: { + discoveryToken: '', + }, + + ssl: { + enabled: true, + sslOptions: { + ca: [fs.readFileSync(path.resolve(path.join(__dirname, 'ca.pem')))], + cert: [fs.readFileSync(path.resolve(path.join(__dirname, 'cert.pem')))], + key: [fs.readFileSync(path.resolve(path.join(__dirname, 'key.pem')))], + passphrase: '', + checkServerIdentity: () => null + }, + }, + }, + + // Other environment propreties + properties: { + 'hazelcast.logging.level': 'WARN' // this property value is case-insensitive + }, + }); + + process.stdout.write('Welcome to your Hazelcast Viridian Cluster!'); + + await client.shutdown(); + +})().catch(err => { + process.stderr.write(`An error occured: ${err}\n`); +}); +---- + +To run this Node.js script, use the following command: + +[source,bash] +---- +node index.js +---- + +The majority of the client methods return promises using the https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Statements/async_function[async/await] syntax, +but you can use the regular https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Promise/then[then] / https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Promise/catch[catch] +syntax, too. + +== Understanding the Hazelcast SQL API + +Hazelcast SQL API is a Calcite SQL based interface to allow you to interact with Hazelcast much like any other datastore. + +In the following example, we will create a map and insert into it, entries where the keys are ids and the values are defined as an object representing a city. + +NOTE: SSL certificate files are available from the Python client download available from Viridian. + +[source,javascript] +---- +'use strict'; + +const { Client } = require('hazelcast-client'); +const fs = require('fs'); +const path = require('path'); +const process = require('process'); +const sprintf= require('sprintf-js').sprintf; + +class CityDTO { + constructor(city, country, population) { + this.city = city; + this.country = country; + this.population = population; + } +} + +class CitySerializer { + + getClass() { + return CityDTO; + } + + getTypeName() { + return 'CityDTO' + } + + write(writer, cityDTO) { + writer.writeString('city', cityDTO.city); + writer.writeString('country', cityDTO.country); + writer.writeInt32('population', cityDTO.population); + } + + read(reader) { + const city = reader.readString('city'); + const country = reader.readString('country'); + const population = reader.readInt32('population'); + + return new CityDTO(city, country, population); + } +} + +async function createMapping(client) { + process.stdout.write('Creating the mapping...'); + + // Mapping is required for your distributed map to be queried over SQL. + // See: https://docs.hazelcast.com/hazelcast/latest/sql/mapping-to-maps + const mappingQuery = ` + CREATE OR REPLACE MAPPING + cities ( + __key INT, + country VARCHAR, + city VARCHAR, + population INT) TYPE IMAP + OPTIONS ( + 'keyFormat' = 'int', + 'valueFormat' = 'compact', + 'valueCompactTypeName' = 'CityDTO') + `; + + await client.getSql().execute(mappingQuery); + process.stdout.write('OK.\n'); +} + +async function populateCities(client) { + process.stdout.write('Inserting data...'); + + // Mapping is required for your distributed map to be queried over SQL. + // See: https://docs.hazelcast.com/hazelcast/latest/sql/mapping-to-maps + const insertQuery = ` + INSERT INTO cities + (__key, city, country, population) VALUES + (1, 'London', 'United Kingdom', 9540576), + (2, 'Manchester', 'United Kingdom', 2770434), + (3, 'New York', 'United States', 19223191), + (4, 'Los Angeles', 'United States', 3985520), + (5, 'Istanbul', 'Türkiye', 15636243), + (6, 'Ankara', 'Türkiye', 5309690), + (7, 'Sao Paulo ', 'Brazil', 22429800) + `; + + try { + await client.getSql().execute('DELETE from cities'); + await client.getSql().execute(insertQuery); + + process.stdout.write('OK.\n'); + } catch (error) { + process.stderr.write('FAILED.\n', error) + } +} + +async function fetchCities(client) { + process.stdout.write('Fetching cities...'); + + const sqlResultAll = await client.sqlService.execute('SELECT __key, this FROM cities', [], { returnRawResult: true }); + + process.stdout.write('OK.\n'); + process.stdout.write('--Results of SELECT __key, this FROM cities\n'); + process.stdout.write(sprintf('| %4s | %20s | %20s | %15s |\n', 'id', 'country', 'city', 'population')); + + // NodeJS client does lazy deserialization. In order to update schema table on the client, + // it's required to get a map value. + const cities = await client.getMap('cities'); + await cities.get(1); + + for await (const row of sqlResultAll) { + const id = row.getObject('__key'); + const cityDTO = row.getObject('this'); + process.stdout.write(sprintf('| %4d | %20s | %20s | %15d |\n', id, cityDTO.country, cityDTO.city, cityDTO.population)); + } + + process.stdout.write('\n!! Hint !! You can execute your SQL queries on your Viridian cluster over the management center. \n 1. Go to "Management Center" of your Hazelcast Viridian cluster. \n 2. Open the "SQL Browser". \n 3. Try to execute "SELECT * FROM cities".\n'); +} + +/////////////////////////////////////////////////////// + +(async () => { + + const client = await Client.newHazelcastClient({ + clusterName: '', + + // Connection details for cluster + network: { + hazelcastCloud: { + discoveryToken: '', + }, + + ssl: { + enabled: true, + sslOptions: { + ca: [fs.readFileSync(path.resolve(path.join(__dirname, 'ca.pem')))], + cert: [fs.readFileSync(path.resolve(path.join(__dirname, 'cert.pem')))], + key: [fs.readFileSync(path.resolve(path.join(__dirname, 'key.pem')))], + passphrase: '', + checkServerIdentity: () => null + }, + }, + }, + + // Register Compact Serializers + serialization: { + compact: { + serializers: [new CitySerializer()], + }, + defaultNumberType:"integer", + }, + + // Other environment propreties + properties: { + 'hazelcast.logging.level': 'WARN' // this property value is case-insensitive + }, + }); + + await createMapping(client); + await populateCities(client); + await fetchCities(client); + + await client.shutdown(); + +})().catch(err => { + process.stderr.write(`An error occured: ${err}\n`); +}); +---- + +The output of this code is given below: + +[source,bash] +---- +Connection Successful! +Creating the mapping...OK. +Inserting data...OK. +Fetching cities...OK. +--Results of 'SELECT __key, this FROM cities' +| id | country | city | population | +| 2 | United Kingdom | Manchester | 2770434 | +| 6 | Türkiye | Ankara | 5309690 | +| 1 | United Kingdom | London | 9540576 | +| 7 | Brazil | Sao Paulo | 22429800 | +| 4 | United States | Los Angeles | 3985520 | +| 5 | Türkiye | Istanbul | 15636243 | +| 3 | United States | New York | 19223191 | +---- + +NOTE: Ordering of the keys is NOT enforced and results may NOT correspond to insertion order. + +== Understanding the Hazelcast Map API + +A Hazelcast Map is a distributed key-value store, similar to Node map. You can store key-value pairs in a Hazelcast Map. + +In the following example, we will work with map entries where the keys are ids and the values are defined as a string representing a city name. + +[source,javascript] +---- +'use strict'; + +const { Client } = require('hazelcast-client'); +const fs = require('fs'); +const path = require('path'); +const process = require('process'); +const sprintf= require('sprintf-js').sprintf; + +#################################### + +(async () => { + + const client = await Client.newHazelcastClient({ + clusterName: '', + + // Connection details for cluster + network: { + hazelcastCloud: { + discoveryToken: '', + }, + + ssl: { + enabled: true, + sslOptions: { + ca: [fs.readFileSync(path.resolve(path.join(__dirname, 'ca.pem')))], + cert: [fs.readFileSync(path.resolve(path.join(__dirname, 'cert.pem')))], + key: [fs.readFileSync(path.resolve(path.join(__dirname, 'key.pem')))], + passphrase: '', + checkServerIdentity: () => null + }, + }, + }, + + // Register Compact Serializers + serialization: { + compact: { + serializers: [new CitySerializer()], + }, + defaultNumberType:"integer", + }, + + // Other environment propreties + properties: { + 'hazelcast.logging.level': 'WARN' // this property value is case-insensitive + }, + }); + + // + var citiesMap = await client.getMap('cities'); + + // Clear the map + await citiesMap.clear(); + + // Add some data + await citiesMap.put(1, 'London'); + await citiesMap.put(2, 'New York'); + await citiesMap.put(3, 'Tokyo'); + + // Output the data + const entries = await citiesMap.entrySet(); + + for (const [key, value] of entries) { + process.stdout.write(`${key} -> ${value}\n`); + } + + await client.shutdown(); + +})().catch(err => { + process.stderr.write(`An error occured: ${err}\n`); +}); +---- + +Following line returns a map proxy object for the `cities` map: + +[source, javascript] +---- +var citiesMap = await client.getMap('cities'); +---- + +If `cities` doesn't exist, it will be automatically created. All the clients connected to the same cluster will have access to the same map. + +With these lines, client adds data to the `cities` map. The first parameter is the key of the entry, the second one is the value. + +[source, python] +---- +await citiesMap.put(1, 'London'); +await citiesMap.put(2, 'New York'); +await citiesMap.put(3, 'Tokyo'); +---- + +Then, we get the data using the `entrySet()` method and iterate over the results. + +[source, javascript] +---- +const entries = await citiesMap.entrySet(); + +for (const [key, value] of entries) { + process.stdout.write(`${key} -> ${value}\n`); +} +---- + +Finally, `client.shutdown()` terminates our client and release its resources. + +The output of this code is given below: + +[source,bash] +---- +2 -> New York +1 -> London +3 -> Tokyo +---- + +NOTE: Ordering of the keys is NOT enforced and results may NOT correspond to entry order. + +== Adding a Listener to the Map + +You can add an entry listener using the `addEntryListener()` method available on the map proxy object. +This will allow you to listen to certain events that happen in the map across the cluster. + +The first argument to the `addEntryListener()` method is an object that is used to define listeners. +In this example, we register listeners for the `added`, `removed` and `updated` events. + +The second argument to the `addEntryListener()` method is `includeValue`. +This boolean parameter, if set to true, ensures the entry event contains the entry value. + +This enables your code to listen to map events of that particular map. + +[source, javascript] +---- +'use strict'; + +const { Client } = require('hazelcast-client'); +const fs = require('fs'); +const path = require('path'); +const process = require('process'); +const sprintf= require('sprintf-js').sprintf; + +#################################### + +(async () => { + + const client = await Client.newHazelcastClient({ + clusterName: '', + + // Connection details for cluster + network: { + hazelcastCloud: { + discoveryToken: '', + }, + + ssl: { + enabled: true, + sslOptions: { + ca: [fs.readFileSync(path.resolve(path.join(__dirname, 'ca.pem')))], + cert: [fs.readFileSync(path.resolve(path.join(__dirname, 'cert.pem')))], + key: [fs.readFileSync(path.resolve(path.join(__dirname, 'key.pem')))], + passphrase: '', + checkServerIdentity: () => null + }, + }, + }, + + // Register Compact Serializers + serialization: { + compact: { + serializers: [new CitySerializer()], + }, + defaultNumberType:"integer", + }, + + // Other environment propreties + properties: { + 'hazelcast.logging.level': 'WARN' // this property value is case-insensitive + }, + }); + + // + var citiesMap = await client.getMap('cities'); + + citiesMap.addEntryListener({ + added: (event) => { + process.stdout.write(`Entry added with key: ${event.key}, value: ${event.value}\n`) + }, + removed: (event) => { + process.stdout.write(`Entry removed with key: ${event.key}\n`); + }, + updated: (event) => { + process.stdout.write(`Entry updated with key: ${event.key}, old value: ${event.oldValue}, new value: ${event.value}\n`) + }, + }, undefined, true); + + // Clear the map + await citiesMap.clear(); + + // Add some data + await citiesMap.put(1, 'London'); + await citiesMap.put(2, 'New York'); + await citiesMap.put(3, 'Tokyo'); + + await citiesMap.remove(1); + await citiesMap.replace(2, 'Paris'); + + // Output the data + const entries = await citiesMap.entrySet(); + + for (const [key, value] of entries) { + process.stdout.write(`${key} -> ${value}\n`); + } + + await client.shutdown(); + +})().catch(err => { + process.stderr.write(`An error occured: ${err}\n`); +}); +---- + +First, the map is cleared, which will trigger removed events if there are some entries in the map. Then, entries are added, and they are logged. After that, we remove one of the entries and update the other one. Then, we log the entries again. + +The output is as follows. + +[source, bash] +---- +Entry added with key: 1, value: London +Entry added with key: 2, value: New York +Entry added with key: 3, value: Tokyo +Entry removed with key: 1 +Entry updated with key: 2, old value: New York, new value: Paris +2 -> Paris +3 -> Tokyo +---- + +The value of the first entry becomes "null" since it is removed. + +== Summary + +In this tutorial, you learned how to get started with the Hazelcast Node.js Client, connect to a Viridian instance and put data into a distributed map. + +== See Also + +There are a lot of things that you can do with the Node.js Client. For more, such as how you can query a map with predicates and SQL, +check out our https://github.com/hazelcast/hazelcast-nodejs-client[Node.js Client repository] and our http://hazelcast.github.io/hazelcast-nodejs-client/[Node.js API documentation] to better understand what is possible. + +If you have any questions, suggestions, or feedback please do not hesitate to reach out to us via https://slack.hazelcast.com/[Hazelcast Community Slack]. +Also, please take a look at https://github.com/hazelcast/hazelcast-nodejs-client/issues[the issue list] if you would like to contribute to the client. \ No newline at end of file diff --git a/docs/modules/tutorials/pages/python-client-getting-started.adoc b/docs/modules/tutorials/pages/python-client-getting-started.adoc new file mode 100644 index 000000000..cd5afe18d --- /dev/null +++ b/docs/modules/tutorials/pages/python-client-getting-started.adoc @@ -0,0 +1,419 @@ += Getting Started with the Hazelcast Python Client +:description: This tutorial will get you started with the Hazelcast Python client and manipulate a map. + +== What You'll Learn + +{description} + +== Before you Begin + +* Python 3.6 or above +* https://hazelcast.com/products/viridian/[Hazelcast Viridian Cloud Account] +* A text editor or IDE + +== Start a Hazelcast Viridian Cloud Cluster + +1. Sign up for a Hazelcast Viridian Cloud account (free trial is available). +2. Log in to your Hazelcast Viridian Cloud account and start your trial by filling in the welcome questionnaire. +3. A Viridian cluster will be created automatically when you start your trial. +4. Press the Connect Cluster dialog and switch over to the Advanced setup tab for connection information needed below. +5. From the Advanced setup tab, download the keystore files and take note of your Cluster ID, Discovery Token and Password as you will need them later. + +== Setup a Hazelcast Client + +Create a new folder and navigate to it: + +[source] +---- +mkdir hazelcast-python-example +cd hazelcast-python-example +---- + +Download the Hazelcast Python Client library using pip: + +[source] +---- +python -m pip install hazelcast-python-client +---- + +Extract the keystore files you downloaded from Viridian into this directory. The files you need for this tutorial are: + +[source,bash] +---- +ca.pem +cert.pem +key.pem +---- + +== Understanding the Python Client + +The following section creates and starts a Hazelcast client with default configuration, connects to your Viridian cluster before shutting the client down at the end. + +Create a Python file named “example.py” and put the following code inside it: + +[source,python] +---- +import hazelcast +import os + +#################################### + +# Connect to your Hazelcast Cluster +client = hazelcast.HazelcastClient( + # Viridian Cluster Name and Token + cluster_name="", + cloud_discovery_token="", + + # configure SSL + ssl_enabled=True, + ssl_cafile=os.path.abspath("ca.pem"), + ssl_certfile=os.path.abspath("cert.pem"), + ssl_keyfile=os.path.abspath("key.pem"), + ssl_password="", +) + +# take actions +print("Welcome to your Hazelcast Viridian Cluster!") + +# Shutdown the client connection +client.shutdown() +---- + +To understand and use the client, review the https://hazelcast.readthedocs.io/en/stable/client.html#hazelcast.client.HazelcastClient[Python API documentation] to better understand what is possible. + +== Understanding the Hazelcast SQL API + +Hazelcast SQL API is a Calcite SQL based interface to allow you to interact with Hazelcast much like any other datastore. + +In the following example, we will create a map and insert into it, entries where the keys are ids and the values are defined as an object representing a city. + +[source,python] +---- +import hazelcast +from hazelcast import HazelcastClient +from hazelcast.serialization.api import CompactReader, CompactSerializer, CompactWriter +import os +import typing + +class City: + def __init__(self, country: str, city: str, population: int) -> None: + self.country = country + self.city = city + self.population = population + +class CitySerializer(CompactSerializer[City]): + def read(self, reader: CompactReader) -> City: + city = reader.read_string("city") + country = reader.read_string("country") + population = reader.read_int32("population") + return City(country, city, population) + + def write(self, writer: CompactWriter, obj: City) -> None: + writer.write_string("country", obj.country) + writer.write_string("city", obj.city) + writer.write_int32("population", obj.population) + + def get_type_name(self) -> str: + return "city" + + def get_class(self) -> typing.Type[City]: + return City + +def create_mapping(client: HazelcastClient) -> None: + print("Creating the mapping...", end="") + # See: https://docs.hazelcast.com/hazelcast/latest/sql/mapping-to-maps + mapping_query = """ + CREATE OR REPLACE MAPPING + cities ( + __key INT, + country VARCHAR, + city VARCHAR, + population INT) TYPE IMAP + OPTIONS ( + 'keyFormat' = 'int', + 'valueFormat' = 'compact', + 'valueCompactTypeName' = 'city') + """ + client.sql.execute(mapping_query).result() + print("OK.") + +def populate_cities(client: HazelcastClient) -> None: + print("Inserting data...", end="") + + insert_query = """ + INSERT INTO cities + (__key, city, country, population) VALUES + (1, 'London', 'United Kingdom', 9540576), + (2, 'Manchester', 'United Kingdom', 2770434), + (3, 'New York', 'United States', 19223191), + (4, 'Los Angeles', 'United States', 3985520), + (5, 'Istanbul', 'Türkiye', 15636243), + (6, 'Ankara', 'Türkiye', 5309690), + (7, 'Sao Paulo ', 'Brazil', 22429800) + """ + + try: + client.sql.execute('DELETE from cities').result() + client.sql.execute(insert_query).result() + print("OK.") + except Exception as e: + print(f"FAILED: {e!s}.") + +def fetch_cities(client: HazelcastClient) -> None: + print("Fetching cities...", end="") + result = client.sql.execute("SELECT __key, this FROM cities").result() + print("OK.") + + print("--Results of 'SELECT __key, this FROM cities'") + print(f"| {'id':>4} | {'country':<20} | {'city':<20} | {'population':<15} |") + + for row in result: + city = row["this"] + print( + f"| {row['__key']:>4} | {city.country:<20} | {city.city:<20} | {city.population:<15} |" + ) + +#################################### + +# Connect to your Hazelcast Cluster +client = hazelcast.HazelcastClient( + # Viridian Cluster Name and Token + cluster_name="", + cloud_discovery_token="", + + # configure SSL + ssl_enabled=True, + ssl_cafile=os.path.abspath("ca.pem"), + ssl_certfile=os.path.abspath("cert.pem"), + ssl_keyfile=os.path.abspath("key.pem"), + ssl_password="", + + # Register Compact serializer of City class + compact_serializers=[CitySerializer()], +) + +# Create a map on the cluster +create_mapping(client) + +# Add some data +populate_cities(client) + +# Output the data +fetch_cities(client) + +# Shutdown the client connection +client.shutdown() +---- + +The output of this code is given below: + +[source,bash] +---- +Creating the mapping...OK. +Inserting data...OK. +Fetching cities...OK. +--Results of 'SELECT __key, this FROM cities' +| id | country | city | population | +| 2 | United Kingdom | Manchester | 2770434 | +| 6 | Türkiye | Ankara | 5309690 | +| 1 | United Kingdom | London | 9540576 | +| 7 | Brazil | Sao Paulo | 22429800 | +| 4 | United States | Los Angeles | 3985520 | +| 5 | Türkiye | Istanbul | 15636243 | +| 3 | United States | New York | 19223191 | +---- + +NOTE: Ordering of the keys is NOT enforced and results may NOT correspond to insertion order. + +== Understanding the Hazelcast Map API + +A Hazelcast Map is a distributed key-value store, similar to Python dictionary. You can store key-value pairs in a Hazelcast Map. + +In the following example, we will work with map entries where the keys are ids and the values are defined as a string representing a city name. + +[source,python] +---- +import hazelcast +import os + +#################################### + +# Connect to your Hazelcast Cluster +client = hazelcast.HazelcastClient( + # Viridian Cluster Name and Token + cluster_name="", + cloud_discovery_token="", + + # configure SSL + ssl_enabled=True, + ssl_cafile=os.path.abspath("ca.pem"), + ssl_certfile=os.path.abspath("cert.pem"), + ssl_keyfile=os.path.abspath("key.pem"), + ssl_password="", +) + +# Create a map on the cluster +cities_map = client.get_map('cities').blocking() + +# Clear the map +cities_map.clear() + +# Add some data +cities_map.put(1, "London") +cities_map.put(2, "New York") +cities_map.put(3, "Tokyo") + +# Output the data +entries = cities_map.entry_set() + +for key, value in entries: + print(f"{key} -> {value}") + +# Shutdown the client connection +client.shutdown() +---- + +Following line returns a map proxy object for the `cities` map: + +[source, python] +---- +cities_map = client.get_map('cities').blocking() +---- + +If `cities` doesn't exist, it will be automatically created. All the clients connected to the same cluster will have access to the same map. + +You may wonder why we have used `blocking()` method over the `get_map()`. This returns a version of this proxy with only blocking +(sync) method calls, which is better for getting started. For async calls, please check our https://hazelcast.readthedocs.io/en/stable/#usage[API documentation]. + +With these lines, client adds data to the `cities` map. The first parameter is the key of the entry, the second one is the value. + +[source, python] +---- +cities_map.put(1, "London") +cities_map.put(2, "New York") +cities_map.put(3, "Tokyo") +---- + +Then, we get the data using the `entry_set()` method and iterate over the results. + +[source, python] +---- +entries = cities_map.entry_set() + +for key, value in entries: + print(f"{key} -> {value}") +---- + +Finally, `client.shutdown()` terminates our client and release its resources. + +The output of this code is given below: + +[source,bash] +---- +2 -> New York +1 -> London +3 -> Tokyo +---- + +NOTE: Ordering of the keys is NOT enforced and results may NOT correspond to entry order. + +== Adding a Listener to the Map + +You can add an entry listener using the `add_entry_listener()` method available on the map proxy object. +This will allow you to listen to certain events that happen in the map across the cluster. + +The first argument to the `add_entry_listener()` method is `includeValue`. +This boolean parameter, if set to true, ensures the entry event contains the entry value. + +The second argument to the `add_entry_listener()` method is an object that is used to define listeners. +In this example, we register listeners for the `added`, `removed` and `updated` events. + +This enables your code to listen to map events of that particular map. + +[source, python] +---- +import hazelcast +import os + +def entry_added(event): + print(f"Entry added with key: {event.key}, value: {event.value}") + +def entry_removed(event): + print(f"Entry removed with key: {event.key}") + +def entry_updated(event): + print(f"Entry updated with key: {event.key}, old value: {event.old_value}, new value: {event.value}") + +#################################### + +# Connect to your Hazelcast Cluster +client = hazelcast.HazelcastClient( + # Viridian Cluster Name and Token + cluster_name="", + cloud_discovery_token="", + + # configure SSL + ssl_enabled=True, + ssl_cafile=os.path.abspath("ca.pem"), + ssl_certfile=os.path.abspath("cert.pem"), + ssl_keyfile=os.path.abspath("key.pem"), + ssl_password="", +) + +# Create a map on the cluster +cities_map = client.get_map('cities').blocking() + +# Add listeners +cities_map.add_entry_listener( + include_value=True, added_func=entry_added, removed_func=entry_removed, updated_func=entry_updated +) + +# Clear the map +cities_map.clear() + +# Add some data +cities_map.set(1, "London") +cities_map.set(2, "New York") +cities_map.set(3, "Tokyo") + +cities_map.remove(1) +cities_map.replace(2, "Paris") + +# Output the data +entries = cities_map.entry_set() + +for key, value in entries: + print(f"{key} -> {value}") + +# Shutdown the client connection +client.shutdown() +---- + +First, the map is cleared, which will trigger removed events if there are some entries in the map. Then, entries are added, and they are logged. After that, we remove one of the entries and update the other one. Then, we log the entries again. + +The output is as follows. + +[source, bash] +---- +Entry added with key: 1, value: London +Entry added with key: 2, value: New York +Entry added with key: 3, value: Tokyo +Entry removed with key: 1 +Entry updated with key: 2, old value: New York, new value: Paris +2 -> Paris +3 -> Tokyo +---- + +The value of the first entry becomes `None` since it is removed. + +== Summary + +In this tutorial, you learned how to get started with the Hazelcast Python Client, connect to a Viridian instance and put data into a distributed map. + +== See Also + +There are a lot of things that you can do with the Python Client. For more, such as how you can query a map with predicates and SQL, +check out our https://github.com/hazelcast/hazelcast-python-client[Python Client repository] and our https://hazelcast.readthedocs.io/en/stable/client.html#hazelcast.client.HazelcastClient[Python API documentation] to better understand what is possible. + +If you have any questions, suggestions, or feedback please do not hesitate to reach out to us via https://slack.hazelcast.com/[Hazelcast Community Slack]. +Also, please take a look at https://github.com/hazelcast/hazelcast-python-client/issues[the issue list] if you would like to contribute to the client. diff --git a/docs/modules/tutorials/pages/stream-from-kafka-kerberos.adoc b/docs/modules/tutorials/pages/stream-from-kafka-kerberos.adoc new file mode 100644 index 000000000..b85e36d81 --- /dev/null +++ b/docs/modules/tutorials/pages/stream-from-kafka-kerberos.adoc @@ -0,0 +1,158 @@ += Connect Hazelcast to Kafka Clusters Secured with Kerberos +:description: Learn how to connect Hazelcast Jet pipelines to Kafka clusters that are secured with Kerberos authentication. + +{description} + +[.interactive-button] +link:https://gitpod.io/#https://github.com/hazelcast-guides/kafka-kerberos[Try it in your browser,window=_blank] + +== Context + +When Kafka brokers are secured with Kerberos authentication, your Hazelcast cluster must acquire session keys from the Kerberos server before the Hazelcast cluster can communicate with the Kafka brokers. + +In this example, you'll learn how to configure the Hazelcast Kafka connector to connect to a Kafka broker that's secured with Kerberos authentication. + +== Before you Begin + +Before starting this tutorial, make sure that you have the following prerequisites: + +* link:https://docs.docker.com/compose/install/[Docker Compose] +* link:https://git-scm.com/book/en/v2/Getting-Started-Installing-Git[Git] +* xref:hazelcast:deploy:versioning-compatibility.adoc[JDK 8 - 11] +* link:https://maven.apache.org/download.cgi[Maven] +* Knowledge of link:https://en.wikipedia.org/wiki/Kerberos_(protocol)[Kerberos] +* Knowledge of link:https://en.wikipedia.org/wiki/Apache_Kafka[Kafka] + +== Step 1. Clone the Project + +To set up the project, you need to download the code from GitHub. + +Clone the GitHub repository. + +[tabs] +==== +HTTPS:: ++ +-- +```bash +git clone https://github.com/hazelcast-guides/kafka-kerberos.git +cd kafka-kerberos +``` +-- +SSH:: ++ +-- +```bash +git clone git@github.com:hazelcast-guides/kafka-kerberos.git +cd kafka-kerberos +``` +-- +==== + +== Step 2. Start the Docker Containers + +In this step, you'll use Docker Compose to start all the Docker containers, including a Kafka broker, Kerberos server, Hazelcast Platform, and Management Center. + +```bash +docker compose up -d +``` + +The Docker containers are running in detatched mode. You can see that they are running, using the following command: + +``` +docker ps +``` + +To see the logs of your Hazelcast member, use the following command: + +``` +docker logs hazelcast +``` + +You should see that you have a single member running in the cluster. + +``` +Members {size:1, ver:1} [ + Member [172.19.0.4]:5701 - 15116025-342b-43c0-83f7-a2a90f0281ce this +] +``` + +== Step 3. Create a Kafka Topic + +To create a Kafka topic, you'll use the `kafka-console-producer` script that's built into the Kafka broker. + +Create the `orders` topic and add some records to it. + +```bash +docker exec -i broker kafka-console-producer --broker-list broker:9092 --topic orders < orders.jsonl --producer.config /etc/kafka/kafka-client.properties +``` + +The `kafka-client.properties` file contains the Kafka client configuration that allows Kerberos to authenticate Kafka clients. The Kafka clients assume the `jduke@KERBEROS.EXAMPLE` SPN (service principal name), which is registered on the Kerberos server. + +== Step 4. Connect Hazelcast to the Kafka Broker + +Now that your Kafka topic has some records, you can configure the Kafka connector in Hazelcast to consume those records. In this step, you'll create a Jet job that reads from the Kafka topic and writes the data to a Hazelcast map called `sink_orders`. This Kafka connector is configured to assume the same SPN as the producer in the previous step. + +. Change into the `jet-pipeline` directory. + +. Package the Java file into a JAR. ++ +```bash +mvn package +``` + +. Submit the JAR to your Hazelcast member. Replace the `$PATH_TO_PROJECT` placeholder with the absolute path to the `kafka-kerberos` directory. ++ +```bash +docker run -it --network kafka-kerberos_default -v $PATH_TO_PROJECT/jet-pipeline/target:/jars --rm hazelcast/hazelcast:5.1.4 hz-cli -t hazelcast:5701 submit -c com.example.hazelcast.jet.kafka.KafkaSourceWithClientServerHazelcast /jars/jet-kafka-1.0.0.jar +``` + +You should see the following in the console: + +``` +Submitting JAR '/jars/jet-kafka-1.0.0.jar' with arguments [] +Orders added to map +=================== +``` + +Your Hazelcast cluster connects to the Kerberos server, acquires a session key and reads records from the `orders` topic. Your Hazelcast cluster now contains a map called `sink_orders` that contains the orders. + +== Step 5. Verify that the Connection Succeeded + +In this step, you'll verify that the Kerberos server authenticated Hazelcast and the Kafka broker allowed the Hazelcast Kafka connector to read from the `orders` topic. + +. Go to `localhost:8080` and enable dev mode in Management Center. + +. Open the SQL Browser. + +. In the *select a map* dropdown, select *sink_orders (No SQL Mapping)* to auto-generate the `CREATE MAPPING` command. + +. Click *Execute Query* + +. Delete the previous command from the SQL editor and enter the following: ++ +```sql +SELECT * FROM sink_orders; +``` + +. Click *Execute Query*. + +The `sink_orders` map contains all the records in the `orders` topic. + +== Related Resources + +Kafka and Kerberos: + +- For details about how to configure Kafka clients with Kerberos, see the link:https://docs.confluent.io/platform/current/kafka/authentication_sasl/authentication_sasl_gssapi.html#clients[Confluent documentation]. + +- link:https://docs.confluent.io/platform/current/kafka/authentication_sasl/authentication_sasl_gssapi.html#configuring-gssapi[Configuring GSSAPI] + +- link:https://developer.confluent.io/learn-kafka/security/authentication-ssl-and-sasl-ssl/[Kafka security course] + +Hazelcast: + +- xref:hazelcast:sql:sql-overview.adoc[] +- xref:hazelcast:sql:mapping-to-kafka.adoc[] +- xref:hazelcast:sql:mapping-to-maps.adoc[] +- xref:hazelcast:sql:querying-streams.adoc[] + diff --git a/docs/modules/tutorials/pages/tutorials-index.adoc b/docs/modules/tutorials/pages/tutorials-index.adoc new file mode 100644 index 000000000..fa953a59a --- /dev/null +++ b/docs/modules/tutorials/pages/tutorials-index.adoc @@ -0,0 +1,17 @@ += Tutorials + + + +== Hazelcast Platform +* xref:tutorials:kubernetes.adoc[] +* xref:tutorials:java-client-getting-started.adoc[] +* xref:tutorials:csharp-client-getting-started.adoc[] +* xref:tutorials:go-client-getting-started.adoc[] +* xref:tutorials:nodejs-client-getting-started.adoc[] +* xref:tutorials:python-client-getting-started.adoc[] +* xref:tutorials:cpsubsystem.adoc[] +* xref:tutorials:join-two-streams.adoc[] +* xref:tutorials:stream-from-kafka-kerberos.adoc[] + +== Hazelcast Cloud +* xref:tutorials:hazelcast-platform-operator-expose-externally.adoc[] \ No newline at end of file diff --git a/docs/modules/wan/pages/failures.adoc b/docs/modules/wan/pages/failures.adoc index 6a498ced9..f7b8f1e7b 100644 --- a/docs/modules/wan/pages/failures.adoc +++ b/docs/modules/wan/pages/failures.adoc @@ -22,7 +22,7 @@ the partition, so that the same partitions are always sent to the same target me == WAN Failure Detection -If using the Hazelcast {enterprise-product-name} edition class `WanBatchReplication` +If using the Hazelcast {enterprise-product-name} class `WanBatchReplication` (see the xref:defining-wan-replication.adoc[Defining WAN replication section]), the WAN replication catches any exceptions when sending the WAN events to the endpoint. In the case of an exception, the endpoint is removed from the endpoint list to which WAN replicates and the WAN events are resent to