From 8be8ea877a3af51e979bfd46fcabb2dbabc12240 Mon Sep 17 00:00:00 2001
From: Wondertan <hlibwondertan@gmail.com>
Date: Sun, 3 Nov 2024 02:11:58 +0100
Subject: [PATCH] another bitswap param revision

---
 share/shwap/p2p/bitswap/bitswap.go | 125 +++++++++++++++++------------
 1 file changed, 73 insertions(+), 52 deletions(-)

diff --git a/share/shwap/p2p/bitswap/bitswap.go b/share/shwap/p2p/bitswap/bitswap.go
index 840978b81e..64c7cfcc9f 100644
--- a/share/shwap/p2p/bitswap/bitswap.go
+++ b/share/shwap/p2p/bitswap/bitswap.go
@@ -17,71 +17,89 @@ import (
 	"github.com/libp2p/go-libp2p/core/protocol"
 )
 
-var dontHaveTimeoutConfig = &client.DontHaveTimeoutConfig{
-	DontHaveTimeout:            5 * time.Second,
-	MaxExpectedWantProcessTime: 7 * time.Second,
-	MaxTimeout:                 30 * time.Second,
-	PingLatencyMultiplier:      3,
-	MessageLatencyAlpha:        0.2,
-	MessageLatencyMultiplier:   4,
-}
+// THESE VALUES HAVE TO BE REVISED ON EVERY SINGLE MAX BLOCK SIZE BUMP
+// CURRENT BLOCK SIZE TARGET: 8MB (256 EDS)
 
 // Client constants
 const (
-	// simulateDontHaves emulates DONT_HAVE message from a peer after 5 second timeout.
-	// This necessary as we configure servers to not send DONT_HAVEs.
+	// simulateDontHaves emulates DONT_HAVE message from a peer after dynamically estimated timeout.
 	// Simulating protects from malicious peers and ensure Bitswap tries new peers if originally
-	// selected one is slow.
-	// TODO(@Wondertan): PR to bitswap to make this timeout configurable
-	//  Higher timeout increases the probability of successful reconstruction,
-	//  as peers from who we get >=64 DONT_HAVEs are kicked from Bitswap session.
+	// selected one is slow or not responding.
+	// Dynamic timeout, in our case, can be treated as optimization allowing to move on a next peer faster.
+	// See simulateDontHaveConfig for more details.
 	simulateDontHaves = true
-	// providerSearchDelay defines the initial delay before Bitswap client starts aggressive
-	// broadcasting of WANTs to all the peers. We offset this for longer than the default to minimize
+	// broadcastDelay defines the initial delay before Bitswap client starts aggressive
+	// broadcasting of live WANTs to all the peers. We offset this for longer than the default to minimize
 	// unnecessary broadcasting as in most cases we already have peers connected with needed data on
 	// a new request.
-	providerSearchDelay = time.Second * 10
-	// rebroadcastDelay is similar to the providerSearchDelay, but it targets DHT/ContentRouting
+	broadcastDelay = time.Second * 10
+	// disablePerPeerRetries disables rebroadcasting of WANTs with no response in peer message queue.
+	// We keep it enabled to account for case where maxServerWantListsPerPeer gets exceeded, loosing WANTs.
+	disablePerPeerRetries = false
+	// provSearchDelay is similar to the broadcastDelay, but it targets DHT/ContentRouting
 	// peer discovery and a gentle broadcast of a single random live WANT to all connected peers.
-	// Considering no DHT usage and broadcasting configured by providerSearchDelay, we set
-	// rebroadcastDelay to max value, effectively disabling it
-	rebroadcastDelay = 1<<63 - 1
+	// Considering no DHT usage and broadcasting configured by broadcastDelay, we set
+	// provSearchDelay to max value, effectively disabling it
+	provSearchDelay = 1<<63 - 1
 )
 
 // Server constants
 const (
-	// providesEnabled dictates Bitswap Server not to provide content to DHT/ContentRouting as we don't use it
-	providesEnabled = false
-	// sendDontHaves prevents Bitswap Server from sending DONT_HAVEs while keeping peers on hold instead:
-	//  * Clients simulate DONT_HAVEs after timeout anyway
-	//  * Servers may not have data immediately and this gives an opportunity to subscribe
-	//  * This is necessary for reconstruction. See https://github.com/celestiaorg/celestia-node/issues/732
+	// sendDontHaves dictates Bitswap Server to send DONT_HAVE messages to peers when they requested block is not
+	// available locally. This is useful for clients to quickly move on to another peer.
+	// This breaks reconstruction, unless we make reconstruction case detectable on the Server side blocking Bitswap
+	// from serving DONT_HAVE messages in Blockstore, which would be the goal.
 	sendDontHaves = true
 	// maxServerWantListsPerPeer defines the limit for maximum possible cached wants/requests per peer
-	// in the Bitswap. Exceeding this limit will cause Bitswap server to drop requested wants leaving
-	// client stuck for sometime.
-	// Thus, we make the limit a bit generous, so we minimize the chances of this happening.
+	// in the Bitswap. Exceeding this limit will cause Bitswap server to drop requested WANTs leaving
+	// client stuck for some time.
 	// This is relevant until https://github.com/ipfs/boxo/pull/629#discussion_r1653362485 is fixed.
-	// TODO: This value has to be increased together with increasing blocksizes
-	maxServerWantListsPerPeer = 8096
-	// targetMessageSize defines how much data Bitswap will aim to pack within a single message, before
-	// splitting it up in multiple. Bitswap first looks up the size of the requested data across
-	// multiple requests and only after reads up the data in portions one-by-one targeting the
-	// targetMessageSize.
-	//
-	// Bigger number will speed transfers up if reading data from disk is fast. In our case, the
-	// Bitswap's size lookup via [Blockstore] will already cause underlying cache to keep the data,
-	// so reading up data is fast, and we can aim to pack as much as we can.
-	targetMessageSize = 1 << 20 // 1MB
-	// outstandingBytesPerPeer limits number of bytes queued for work for a peer across multiple requests.
-	// We set it to be equal to targetMessageSize * N, so there can max N messages being prepared for
-	// a peer at once.
-	outstandingBytesPerPeer = targetMessageSize * 4
+	// 8192 is 512 sampling requests of size 16.
+	maxServerWantListsPerPeer = 8192
+	// targetResponseSize defines soft-limit of how much data server packs into a response.
+	// More data means more compute and time spend while serving a single peer under load, and thus increasing serving
+	// latency for other peers.
+	// We set it to 65KiB, which fits a Row of 8MB block with additional metadata.
+	targetResponseSize = 65 << 10
+	// responseWorkersCount is the number of workers packing responses on the server.
+	// More workers mean more parallelism and faster responses, but also more memory usage.
+	responseWorkersCount = 32
+	// maxWorkPerPeer sets maximum concurrent processing work allowed for a peer.
+	// We set it to be equal to targetResponseSize * responseWorkersCount, so a single peer
+	// can utilize all workers. Note that when there are more peers, prioritization still ensures
+	// even spread across all peers.
+	maxWorkPerPeer = targetResponseSize * responseWorkersCount
 	// replaceHasWithBlockMaxSize configures Bitswap to use Has method instead of GetSize to check existence
 	// of a CID in Blockstore.
 	replaceHasWithBlockMaxSize = 0
+	// providesEnabled dictates Bitswap Server not to provide content to DHT/ContentRouting as we don't use it
+	providesEnabled = false
 )
 
+// simulateDontHaveConfig contains the configuration for Bitswap's DONT_HAVE simulation.
+// This relies on latency to determine when to simulate a DONT_HAVE from a peer.
+var simulateDontHaveConfig = &client.DontHaveTimeoutConfig{
+	// MaxTimeout is the limit cutoff time for the dynamic timeout estimation.
+	MaxTimeout: 30 * time.Second,
+	// MessageLatencyMultiplier is the multiplier for the message latency to account for errors
+	// THIS IS THE MOST IMPORTANT KNOB and is tricky to get right due to high variance in latency
+	// and particularly request processing time.
+	MessageLatencyMultiplier: 4,
+	// MessageLatencyAlpha is the alpha value for the exponential moving average of message latency.
+	// It's a 0-1 value. More prefers recent latencies, less prefers older measurements.
+	MessageLatencyAlpha: 0.2,
+
+	// Below are less important knobs and target initial timeouts for when there is no latency data yet
+	//
+	// DontHaveTimeout is default timeout used until ping latency is measured
+	DontHaveTimeout: 10 * time.Second,
+	// time estimate for how long it takes to process a WANT/Block message
+	// used for ping timeout calculation
+	MaxExpectedWantProcessTime: 7 * time.Second,
+	// multiplier to account for errors with ping latency estimates
+	PingLatencyMultiplier: 3,
+}
+
 // NewNetwork constructs Bitswap network for Shwap protocol composition.
 func NewNetwork(host host.Host, prefix protocol.ID) network.BitSwapNetwork {
 	prefix = shwapProtocolID(prefix)
@@ -103,13 +121,16 @@ func NewClient(
 ) *client.Client {
 	opts := []client.Option{
 		client.SetSimulateDontHavesOnTimeout(simulateDontHaves),
-		client.ProviderSearchDelay(providerSearchDelay),
-		client.RebroadcastDelay(delay.Fixed(rebroadcastDelay)),
+		client.WithDontHaveTimeoutConfig(simulateDontHaveConfig),
+		client.WithDisabledMessageQueueRebroadcast(disablePerPeerRetries),
 		// Prevents Has calls to Blockstore for metric that counts duplicates
 		// Unnecessary for our use case, so we can save some disk lookups.
 		client.WithoutDuplicatedBlockStats(),
-		client.WithDontHaveTimeoutConfig(dontHaveTimeoutConfig),
-		client.WithDisabledMessageQueueRebroadcast(true),
+
+		// These two options have mixed up named. One should be another and vice versa.
+		// TODO(@Wondertan): Make an issue
+		client.ProviderSearchDelay(broadcastDelay),
+		client.RebroadcastDelay(delay.Fixed(provSearchDelay)),
 	}
 	return client.New(
 		ctx,
@@ -130,10 +151,10 @@ func NewServer(
 		server.ProvideEnabled(providesEnabled),
 		server.SetSendDontHaves(sendDontHaves),
 		server.MaxQueuedWantlistEntriesPerPeer(maxServerWantListsPerPeer),
-		server.WithTargetMessageSize(targetMessageSize),
-		server.MaxOutstandingBytesPerPeer(outstandingBytesPerPeer),
+		server.WithTargetMessageSize(targetResponseSize),
+		server.MaxOutstandingBytesPerPeer(maxWorkPerPeer),
 		server.WithWantHaveReplaceSize(replaceHasWithBlockMaxSize),
-		server.TaskWorkerCount(32),
+		server.TaskWorkerCount(responseWorkersCount),
 	}
 	return server.New(ctx, net, bstore, opts...)
 }