Use SO_LINGER with timeout 0 to avoid thousands of server sockets in …

…TIME_WAIT when using small reconnect intervals (#260) * Use SO_LINGER with timeout 0 to avoid thousands of server sockets in TIME_WAIT when using small reconnect intervals * Added test for small reconnect interval (1 command) * CI adjustments for tcp connection limit and reuse * Skipping reconnect interval test on cluster given it's not supported
RedisLabs · Jul 17, 2024 · cf45f2c · cf45f2c
1 parent c7d2b57
commit cf45f2c
Show file tree

Hide file tree

Showing 3 changed files with 49 additions and 6 deletions.
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -46,6 +46,12 @@ jobs:
         sudo apt-get install redis
         sudo service redis-server stop
 
+    - name: Increase connection limit
+      run: |
+        sudo sysctl -w net.ipv4.tcp_fin_timeout=10
+        sudo sysctl -w net.ipv4.tcp_tw_reuse=1
+        ulimit -n 40960
+
     - name: Generate TLS test certificates
       if: matrix.platform == 'ubuntu-latest'
       run: |

diff --git a/shard_connection.cpp b/shard_connection.cpp
@@ -231,12 +231,18 @@ int shard_connection::setup_socket(struct connect_info* addr) {
             return -1;
         }
 
-        // configure socket behavior
-        struct linger ling = {0, 0};
-        int flags = 1;
+
         int error = setsockopt(sockfd, SOL_SOCKET, SO_KEEPALIVE, (void *) &flags, sizeof(flags));
         assert(error == 0);
 
+        /*
+        * Configure socket behavior:
+        * If l_onoff is non-zero and l_linger is zero:
+        *   The socket will discard any unsent data and the close() call will return immediately.
+        */
+        struct linger ling;
+        ling.l_onoff = 1;  // Enable SO_LINGER
+        ling.l_linger = 0; // Discard any unsent data and close immediately
         error = setsockopt(sockfd, SOL_SOCKET, SO_LINGER, (void *) &ling, sizeof(ling));
         assert(error == 0);
 
@@ -264,7 +270,7 @@ int shard_connection::connect(struct connect_info* addr) {
     // setup socket
     int sockfd = setup_socket(addr);
     if (sockfd < 0) {
-        fprintf(stderr, "Failed to setup socket: %s", strerror(errno));
+        fprintf(stderr, "Failed to setup socket: %s\n", strerror(errno));
         return -1;
     }
 
@@ -478,8 +484,10 @@ void shard_connection::process_response(void)
             // client manage connection & disconnection of shard
             m_conns_manager->disconnect();
             ret = m_conns_manager->connect();
-            assert(ret == 0);
-
+            if (ret != 0) {
+                benchmark_error_log("failed to reconnect.\n");
+                exit(1);
+            }
             return;
         }
     }

diff --git a/tests/tests_oss_simple_flow.py b/tests/tests_oss_simple_flow.py
@@ -350,6 +350,35 @@ def test_default_set_get_1_1(env):
     # assert same number of gets and sets
     env.assertEqual(merged_command_stats['cmdstat_set']['calls'], merged_command_stats['cmdstat_get']['calls'])
 
+# run each test on different env
+def test_short_reconnect_interval(env):
+    # cluster mode dose not support reconnect-interval option
+    env.skipOnCluster()
+    benchmark_specs = {"name": env.testName, "args": ['--reconnect-interval=1']}
+    addTLSArgs(benchmark_specs, env)
+    config = get_default_memtier_config()
+    master_nodes_list = env.getMasterNodesList()
+    overall_expected_request_count = get_expected_request_count(config)
+
+    add_required_env_arguments(benchmark_specs, config, env, master_nodes_list)
+
+    # Create a temporary directory
+    test_dir = tempfile.mkdtemp()
+
+    config = RunConfig(test_dir, env.testName, config, {})
+    ensure_clean_benchmark_folder(config.results_dir)
+
+    benchmark = Benchmark.from_json(config, benchmark_specs)
+
+    # benchmark.run() returns True if the return code of memtier_benchmark was 0
+    memtier_ok = benchmark.run()
+
+    master_nodes_connections = env.getOSSMasterNodesConnectionList()
+    merged_command_stats = {'cmdstat_set': {'calls': 0}, 'cmdstat_get': {'calls': 0}}
+    overall_request_count = agg_info_commandstats(master_nodes_connections, merged_command_stats)
+    assert_minimum_memtier_outcomes(config, env, memtier_ok, overall_expected_request_count, overall_request_count)
+
+
 # run each test on different env
 def test_default_set_get_3_runs(env):
     run_count = 3