From 152d48468e27345ab40571c43833ecc72900b227 Mon Sep 17 00:00:00 2001
From: Josh Deprez <jd@buildkite.com>
Date: Tue, 6 Jun 2023 16:58:30 +1000
Subject: [PATCH 1/3] Lock library

The agent API exists not only to implement the lock subcommands, but to provide a service useful for agent internal locking purposes, build commands, hooks, and plugins. Some of that might be implemented in Go, therefore there should be a useful library available.

While writing the library, I recalled that the previous implementation does not handle double-releasing a lock appropriately - suppose `lock release` is called manually as well as on an EXIT trap, while another process acquires the same lock. To fix this, the lock package generates a token stored in the lock when acquired, and must match in order to unlock. This is printed by `lock acquire` and is now a required argument to `lock release`.
---
 clicommand/lock_acquire.go |  31 ++----
 clicommand/lock_common.go  |  20 +---
 clicommand/lock_do.go      |  55 +++-------
 clicommand/lock_done.go    |  14 +--
 clicommand/lock_get.go     |  10 +-
 clicommand/lock_release.go |  21 ++--
 lock/lock.go               | 218 +++++++++++++++++++++++++++++++++++++
 7 files changed, 260 insertions(+), 109 deletions(-)
 create mode 100644 lock/lock.go

diff --git a/clicommand/lock_acquire.go b/clicommand/lock_acquire.go
index c2e1d1003f..58a13a3aad 100644
--- a/clicommand/lock_acquire.go
+++ b/clicommand/lock_acquire.go
@@ -7,7 +7,7 @@ import (
 	"time"
 
 	"github.com/buildkite/agent/v3/cliconfig"
-	"github.com/buildkite/agent/v3/internal/agentapi"
+	"github.com/buildkite/agent/v3/lock"
 	"github.com/urfave/cli"
 )
 
@@ -23,9 +23,9 @@ Description:
 
 Examples:
 
-   $ buildkite-agent lock acquire llama
+   $ token=$(buildkite-agent lock acquire llama)
    $ critical_section()
-   $ buildkite-agent lock release llama
+   $ buildkite-agent lock release llama "${token}"
 
 `
 
@@ -87,27 +87,18 @@ func lockAcquireAction(c *cli.Context) error {
 		ctx = cctx
 	}
 
-	cli, err := agentapi.NewClient(ctx, agentapi.LeaderPath(cfg.SocketsPath))
+	cli, err := lock.NewClient(ctx, cfg.SocketsPath)
 	if err != nil {
 		fmt.Fprintf(c.App.ErrWriter, lockClientErrMessage, err)
 		os.Exit(1)
 	}
 
-	for {
-		_, done, err := cli.LockCompareAndSwap(ctx, key, "", "acquired")
-		if err != nil {
-			fmt.Fprintf(c.App.ErrWriter, "Error performing compare-and-swap: %v\n", err)
-			os.Exit(1)
-		}
-
-		if done {
-			return nil
-		}
-
-		// Not done.
-		if err := sleep(ctx, 100*time.Millisecond); err != nil {
-			fmt.Fprintf(c.App.ErrWriter, "Exceeded deadline or context cancelled: %v\n", err)
-			os.Exit(1)
-		}
+	token, err := cli.Lock(ctx, key)
+	if err != nil {
+		fmt.Fprintf(c.App.ErrWriter, "Could not acquire lock: %v\n", err)
+		os.Exit(1)
 	}
+
+	fmt.Fprintln(c.App.Writer, token)
+	return nil
 }
diff --git a/clicommand/lock_common.go b/clicommand/lock_common.go
index 6900c027c3..651d2288cc 100644
--- a/clicommand/lock_common.go
+++ b/clicommand/lock_common.go
@@ -1,11 +1,6 @@
 package clicommand
 
-import (
-	"context"
-	"time"
-
-	"github.com/urfave/cli"
-)
+import "github.com/urfave/cli"
 
 const lockClientErrMessage = `Could not connect to Agent API: %v
 This command can only be used when at least one agent is running with the
@@ -34,16 +29,3 @@ var lockCommonFlags = []cli.Flag{
 		EnvVar: "BUILDKITE_SOCKETS_PATH",
 	},
 }
-
-// sleep sleeps in a context-aware way. The only non-nil errors returned are
-// from ctx.Err.
-func sleep(ctx context.Context, d time.Duration) error {
-	t := time.NewTimer(d)
-	defer t.Stop()
-	select {
-	case <-t.C:
-		return nil
-	case <-ctx.Done():
-		return ctx.Err()
-	}
-}
diff --git a/clicommand/lock_do.go b/clicommand/lock_do.go
index 2c21416e2c..aaff25b0cf 100644
--- a/clicommand/lock_do.go
+++ b/clicommand/lock_do.go
@@ -7,7 +7,7 @@ import (
 	"time"
 
 	"github.com/buildkite/agent/v3/cliconfig"
-	"github.com/buildkite/agent/v3/internal/agentapi"
+	"github.com/buildkite/agent/v3/lock"
 	"github.com/urfave/cli"
 )
 
@@ -96,51 +96,22 @@ func lockDoAction(c *cli.Context) error {
 		ctx = cctx
 	}
 
-	cli, err := agentapi.NewClient(ctx, agentapi.LeaderPath(cfg.SocketsPath))
+	cli, err := lock.NewClient(ctx, cfg.SocketsPath)
 	if err != nil {
 		fmt.Fprintf(c.App.ErrWriter, lockClientErrMessage, err)
 		os.Exit(1)
 	}
 
-	for {
-		state, err := cli.LockGet(ctx, key)
-		if err != nil {
-			fmt.Fprintf(c.App.ErrWriter, "Error performing get: %v\n", err)
-			os.Exit(1)
-		}
-
-		switch state {
-		case "":
-			// Try to acquire the lock by changing to state 1
-			_, done, err := cli.LockCompareAndSwap(ctx, key, "", "doing")
-			if err != nil {
-				fmt.Fprintf(c.App.ErrWriter, "Error performing compare-and-swap: %v\n", err)
-				os.Exit(1)
-			}
-			if done {
-				// Lock acquired, exit 0.
-				fmt.Fprintln(c.App.Writer, "do")
-				return nil
-			}
-			// Lock not acquired (perhaps something else acquired it).
-			// Go through the loop again.
-
-		case "doing":
-			// Work in progress - wait until state 2.
-			if err := sleep(ctx, 100*time.Millisecond); err != nil {
-				fmt.Fprintf(c.App.ErrWriter, "Exceeded deadline or context cancelled: %v\n", err)
-				os.Exit(1)
-			}
-
-		case "done":
-			// Work completed!
-			fmt.Fprintln(c.App.Writer, "done")
-			return nil
-
-		default:
-			// Invalid state.
-			fmt.Fprintf(c.App.ErrWriter, "Lock in invalid state %q for do-once\n", state)
-			os.Exit(1)
-		}
+	do, err := cli.DoOnceStart(ctx, key)
+	if err != nil {
+		fmt.Fprintf(c.App.ErrWriter, "Couldn't start do-once lock: %v\n", err)
+		os.Exit(1)
+	}
+
+	if do {
+		fmt.Fprintln(c.App.Writer, "do")
+		return nil
 	}
+	fmt.Fprintln(c.App.Writer, "done")
+	return nil
 }
diff --git a/clicommand/lock_done.go b/clicommand/lock_done.go
index f8ccc45722..83286ccb46 100644
--- a/clicommand/lock_done.go
+++ b/clicommand/lock_done.go
@@ -6,7 +6,7 @@ import (
 	"os"
 
 	"github.com/buildkite/agent/v3/cliconfig"
-	"github.com/buildkite/agent/v3/internal/agentapi"
+	"github.com/buildkite/agent/v3/lock"
 	"github.com/urfave/cli"
 )
 
@@ -72,20 +72,14 @@ func lockDoneAction(c *cli.Context) error {
 
 	ctx := context.Background()
 
-	cli, err := agentapi.NewClient(ctx, agentapi.LeaderPath(cfg.SocketsPath))
+	cli, err := lock.NewClient(ctx, cfg.SocketsPath)
 	if err != nil {
 		fmt.Fprintf(c.App.ErrWriter, lockClientErrMessage, err)
 		os.Exit(1)
 	}
 
-	val, done, err := cli.LockCompareAndSwap(ctx, key, "doing", "done")
-	if err != nil {
-		fmt.Fprintf(c.App.ErrWriter, "Error performing compare-and-swap: %v\n", err)
-		os.Exit(1)
-	}
-
-	if !done {
-		fmt.Fprintf(c.App.ErrWriter, "Lock in invalid state %q to mark complete\n", val)
+	if err := cli.DoOnceEnd(ctx, key); err != nil {
+		fmt.Fprintf(c.App.ErrWriter, "Couldn't complete do-once lock: %v\n", err)
 		os.Exit(1)
 	}
 	return nil
diff --git a/clicommand/lock_get.go b/clicommand/lock_get.go
index d47c0c2c70..fbd4dfb162 100644
--- a/clicommand/lock_get.go
+++ b/clicommand/lock_get.go
@@ -6,13 +6,13 @@ import (
 	"os"
 
 	"github.com/buildkite/agent/v3/cliconfig"
-	"github.com/buildkite/agent/v3/internal/agentapi"
+	"github.com/buildkite/agent/v3/lock"
 	"github.com/urfave/cli"
 )
 
 const lockGetHelpDescription = `Usage:
 
-   buildkite-agent lock get [key]
+    buildkite-agent lock get [key]
 
 Description:
    Retrieves the value of a lock key. Any key not in use returns an empty 
@@ -73,15 +73,15 @@ func lockGetAction(c *cli.Context) error {
 
 	ctx := context.Background()
 
-	cli, err := agentapi.NewClient(ctx, agentapi.LeaderPath(cfg.SocketsPath))
+	cli, err := lock.NewClient(ctx, cfg.SocketsPath)
 	if err != nil {
 		fmt.Fprintf(c.App.ErrWriter, lockClientErrMessage, err)
 		os.Exit(1)
 	}
 
-	v, err := cli.LockGet(ctx, key)
+	v, err := cli.Get(ctx, key)
 	if err != nil {
-		fmt.Fprintf(c.App.ErrWriter, "Error from leader client: %v\n", err)
+		fmt.Fprintf(c.App.ErrWriter, "Couldn't get lock state: %v\n", err)
 		os.Exit(1)
 	}
 
diff --git a/clicommand/lock_release.go b/clicommand/lock_release.go
index 7f1474b120..99abf9f0c1 100644
--- a/clicommand/lock_release.go
+++ b/clicommand/lock_release.go
@@ -6,7 +6,7 @@ import (
 	"os"
 
 	"github.com/buildkite/agent/v3/cliconfig"
-	"github.com/buildkite/agent/v3/internal/agentapi"
+	"github.com/buildkite/agent/v3/lock"
 	"github.com/urfave/cli"
 )
 
@@ -20,9 +20,9 @@ Description:
 
 Examples:
 
-   $ buildkite-agent lock acquire llama
+   $ token=$(buildkite-agent lock acquire llama)
    $ critical_section()
-   $ buildkite-agent lock release llama
+   $ buildkite-agent lock release llama "${token}"
 
 `
 
@@ -41,11 +41,11 @@ var LockReleaseCommand = cli.Command{
 }
 
 func lockReleaseAction(c *cli.Context) error {
-	if c.NArg() != 1 {
+	if c.NArg() != 2 {
 		fmt.Fprint(c.App.ErrWriter, lockReleaseHelpDescription)
 		os.Exit(1)
 	}
-	key := c.Args()[0]
+	key, token := c.Args()[0], c.Args()[1]
 
 	// Load the configuration
 	cfg := LockReleaseConfig{}
@@ -70,21 +70,16 @@ func lockReleaseAction(c *cli.Context) error {
 
 	ctx := context.Background()
 
-	cli, err := agentapi.NewClient(ctx, agentapi.LeaderPath(cfg.SocketsPath))
+	cli, err := lock.NewClient(ctx, cfg.SocketsPath)
 	if err != nil {
 		fmt.Fprintf(c.App.ErrWriter, lockClientErrMessage, err)
 		os.Exit(1)
 	}
 
-	val, done, err := cli.LockCompareAndSwap(ctx, key, "acquired", "")
-	if err != nil {
-		fmt.Fprintf(c.App.ErrWriter, "Error performing compare-and-swap: %v\n", err)
+	if err := cli.Unlock(ctx, key, token); err != nil {
+		fmt.Fprintf(c.App.ErrWriter, "Could not release lock: %v\n", err)
 		os.Exit(1)
 	}
 
-	if !done {
-		fmt.Fprintf(c.App.ErrWriter, "Lock in invalid state %q to release\n", val)
-		os.Exit(1)
-	}
 	return nil
 }
diff --git a/lock/lock.go b/lock/lock.go
new file mode 100644
index 0000000000..922918d6e7
--- /dev/null
+++ b/lock/lock.go
@@ -0,0 +1,218 @@
+// Package lock provides a client for the Agent API locking service. This is
+// intended to be used both internally by the agent itself, as well as by
+// authors of binary hooks or other programs.
+package lock
+
+import (
+	"context"
+	"crypto/rand"
+	"errors"
+	"fmt"
+	"os"
+	"sync"
+	"time"
+
+	"github.com/buildkite/agent/v3/internal/agentapi"
+)
+
+// For local sockets, we can afford to be fairly chatty. 100ms is an arbitrary
+// choice which is simultaneously "a long time" (for a computer) and
+// "the blink of an eye" (for a human).
+const localSocketSleepDuration = 100 * time.Millisecond
+
+// Client implements a client library for the Agent API locking service.
+type Client struct {
+	client *agentapi.Client
+}
+
+// NewClient creates a new machine-scope lock service client.
+func NewClient(ctx context.Context, socketsDir string) (*Client, error) {
+	cli, err := agentapi.NewClient(ctx, agentapi.LeaderPath(socketsDir))
+	if err != nil {
+		return nil, err
+	}
+	return &Client{client: cli}, nil
+}
+
+// Get retrieves the current state of a lock.
+func (c *Client) Get(ctx context.Context, key string) (string, error) {
+	return c.client.LockGet(ctx, key)
+}
+
+// Locker returns a sync.Mutex-like object that uses the client to perform
+// locking. Any errors encountered by the client while locking or unlocking
+// (for example, the agent running the API stops running) will cause a panic
+// (because sync.Locker has no other way to report an error). For greater
+// flexibility, use Client's Lock and Unlock methods directly.
+func (c *Client) Locker(key string) sync.Locker {
+	return &locker{
+		client: c,
+		key:    key,
+	}
+}
+
+// Lock blocks until the lock for the given key is acquired. It returns a
+// token or an error. The token must be passed to Unlock in order to unlock the
+// lock later on.
+func (c *Client) Lock(ctx context.Context, key string) (string, error) {
+	otp := make([]byte, 16)
+	if _, err := rand.Read(otp); err != nil {
+		return "", err
+	}
+	token := fmt.Sprintf("acquired(pid=%d,otp=%x)", os.Getpid(), otp)
+
+	for {
+		_, done, err := c.client.LockCompareAndSwap(ctx, key, "", token)
+		if err != nil {
+			return "", fmt.Errorf("cas: %w", err)
+		}
+
+		if done {
+			return token, nil
+		}
+
+		// Not done.
+		if err := sleep(ctx, localSocketSleepDuration); err != nil {
+			return "", err
+		}
+	}
+}
+
+// Unlock unlocks the lock for the given key. To prevent different processes
+// accidentally unlocking the same lock, token must match the current lock value.
+func (c *Client) Unlock(ctx context.Context, key, token string) error {
+	val, done, err := c.client.LockCompareAndSwap(ctx, key, token, "")
+	if err != nil {
+		return fmt.Errorf("cas: %w", err)
+	}
+	if !done {
+		if val == "" {
+			return errors.New("already unlocked")
+		}
+		return fmt.Errorf("invalid lock state %q", val)
+	}
+	return nil
+}
+
+// DoOnce is similar to sync.Once. In the absence of an error, it does one of
+// two things:
+//   - Calls f, and returns when done.
+//   - Waits until another invocation with this key has completed, and
+//     then returns.
+//
+// Like sync.Once, if f panics, DoOnce considers it to have returned.
+func (c *Client) DoOnce(ctx context.Context, key string, f func()) (err error) {
+	do, err := c.DoOnceStart(ctx, key)
+	if err != nil {
+		return err
+	}
+	if !do {
+		// Already done
+		return nil
+	}
+
+	// To do like sync.Once on panic, we must mark it done from a defer.
+	defer func() {
+		err = c.DoOnceEnd(ctx, key)
+	}()
+
+	// Lock acquired, do the work.
+	f()
+	return nil
+}
+
+// DoOnceStart begins a do-once section. It reports if the operation to perform
+// exactly once should proceed.
+func (c *Client) DoOnceStart(ctx context.Context, key string) (bool, error) {
+	// The work could already be done, so start by getting the current state.
+	state, err := c.client.LockGet(ctx, key)
+	if err != nil {
+		return false, err
+	}
+	for {
+		switch state {
+		case "":
+			// Try to acquire the lock by transitioning to state "doing"
+			st, done, err := c.client.LockCompareAndSwap(ctx, key, "", "doing")
+			if err != nil {
+				return false, fmt.Errorf("cas: %w", err)
+			}
+			if !done {
+				// Lock not acquired (perhaps something else acquired it).
+				state = st
+				continue
+			}
+
+			return true, nil
+
+		case "doing":
+			// Work in progress - wait until state "done".
+			if err := sleep(ctx, localSocketSleepDuration); err != nil {
+				return false, err
+			}
+			st, err := c.client.LockGet(ctx, key)
+			if err != nil {
+				return false, err
+			}
+			state = st
+
+		case "done":
+			// Work already completed!
+			return false, nil
+
+		default:
+			// Invalid state.
+			return false, fmt.Errorf("invalid lock state %q", state)
+		}
+	}
+}
+
+// DoOnceEnd marks a do-once section as completed.
+func (c *Client) DoOnceEnd(ctx context.Context, key string) error {
+	st, done, err := c.client.LockCompareAndSwap(ctx, key, "doing", "done")
+	if err != nil {
+		return fmt.Errorf("cas: %w", err)
+	}
+	if !done {
+		return fmt.Errorf("invalid lock state %q", st)
+	}
+	return nil
+}
+
+type locker struct {
+	client     *Client
+	mu         sync.Mutex
+	key, token string
+}
+
+func (l *locker) Lock() {
+	l.mu.Lock()
+	// sync.Mutex waits forever for a lock, so we do the same thing (i.e. the
+	// background context).
+	token, err := l.client.Lock(context.Background(), l.key)
+	if err != nil {
+		panic(err)
+	}
+	l.token = token
+}
+
+func (l *locker) Unlock() {
+	if err := l.client.Unlock(context.Background(), l.key, l.token); err != nil {
+		panic(err)
+	}
+	l.token = ""
+	l.mu.Unlock()
+}
+
+// sleep sleeps in a context-aware way. The only non-nil errors returned are
+// from ctx.Err.
+func sleep(ctx context.Context, d time.Duration) error {
+	t := time.NewTimer(d)
+	defer t.Stop()
+	select {
+	case <-t.C:
+		return nil
+	case <-ctx.Done():
+		return ctx.Err()
+	}
+}

From f19bb4b65fb2fbfc248b1c397c6732ed00b1fcc1 Mon Sep 17 00:00:00 2001
From: Josh Deprez <jd@buildkite.com>
Date: Tue, 6 Jun 2023 17:05:21 +1000
Subject: [PATCH 2/3] Lock library: add tests

Also two minor fixes (t.Errorf instead of Fatalf in the logger; t.Cleanup instead of defer for svr.Close) in the agentapi tests I stole code from.
---
 internal/agentapi/client_server_test.go |   8 +-
 lock/lock_test.go                       | 144 ++++++++++++++++++++++++
 2 files changed, 148 insertions(+), 4 deletions(-)
 create mode 100644 lock/lock_test.go

diff --git a/internal/agentapi/client_server_test.go b/internal/agentapi/client_server_test.go
index 55492fa02f..251e6d7016 100644
--- a/internal/agentapi/client_server_test.go
+++ b/internal/agentapi/client_server_test.go
@@ -16,14 +16,14 @@ var testSocketCounter uint32
 
 func testSocketPath() string {
 	id := atomic.AddUint32(&testSocketCounter, 1)
-	return filepath.Join(os.TempDir(), fmt.Sprintf("test-%d-%d", os.Getpid(), id))
+	return filepath.Join(os.TempDir(), fmt.Sprintf("internal_agentapi_test-%d-%d", os.Getpid(), id))
 }
 
 func testLogger(t *testing.T) logger.Logger {
 	t.Helper()
 	logger := logger.NewConsoleLogger(
 		logger.NewTextPrinter(os.Stderr),
-		func(c int) { t.Fatalf("exit(%d)", c) },
+		func(c int) { t.Errorf("exit(%d)", c) },
 	)
 	return logger
 }
@@ -53,7 +53,7 @@ func TestPing(t *testing.T) {
 	t.Cleanup(canc)
 
 	svr, cli := testServerAndClient(t, ctx)
-	defer svr.Close()
+	t.Cleanup(func() { svr.Close() })
 
 	if err := cli.Ping(ctx); err != nil {
 		t.Errorf("cli.Ping(ctx) = %v", err)
@@ -66,7 +66,7 @@ func TestLockOperations(t *testing.T) {
 	t.Cleanup(canc)
 
 	svr, cli := testServerAndClient(t, ctx)
-	defer svr.Close()
+	t.Cleanup(func() { svr.Close() })
 
 	const key = "llama"
 
diff --git a/lock/lock_test.go b/lock/lock_test.go
new file mode 100644
index 0000000000..a7b5d34f3a
--- /dev/null
+++ b/lock/lock_test.go
@@ -0,0 +1,144 @@
+package lock
+
+import (
+	"context"
+	"fmt"
+	"os"
+	"path/filepath"
+	"sync"
+	"sync/atomic"
+	"testing"
+	"time"
+
+	"github.com/buildkite/agent/v3/internal/agentapi"
+	"github.com/buildkite/agent/v3/logger"
+)
+
+var testSocketCounter uint32
+
+func testSocketPath() string {
+	id := atomic.AddUint32(&testSocketCounter, 1)
+	return filepath.Join(os.TempDir(), fmt.Sprintf("lock_test-%d-%d", os.Getpid(), id))
+}
+
+func testLogger(t *testing.T) logger.Logger {
+	t.Helper()
+	logger := logger.NewConsoleLogger(
+		logger.NewTextPrinter(os.Stderr),
+		func(c int) { t.Errorf("exit(%d)", c) },
+	)
+	return logger
+}
+
+func testServerAndClient(t *testing.T, ctx context.Context) (*agentapi.Server, *Client) {
+	t.Helper()
+	sockPath, logger := testSocketPath(), testLogger(t)
+	svr, err := agentapi.NewServer(sockPath, logger)
+	if err != nil {
+		t.Fatalf("NewServer(%q, logger) = error %v", sockPath, err)
+	}
+	if err := svr.Start(); err != nil {
+		t.Fatalf("svr.Start() = %v", err)
+	}
+
+	cli, err := agentapi.NewClient(ctx, sockPath)
+	if err != nil {
+		t.Fatalf("NewClient(ctx, %q) = error %v", sockPath, err)
+	}
+
+	// lock.NewClient takes the socket *directory*. Rather than temporarily
+	// symlink the socket created above, I've manually created a client.
+	return svr, &Client{client: cli}
+}
+
+func TestLockUnlock(t *testing.T) {
+	t.Parallel()
+	ctx, canc := context.WithTimeout(context.Background(), 10*time.Second)
+	t.Cleanup(canc)
+
+	svr, cli := testServerAndClient(t, ctx)
+	t.Cleanup(func() { svr.Close() })
+
+	// Lock it
+	token, err := cli.Lock(ctx, "llama")
+	if err != nil {
+		t.Errorf("Client.Lock(ctx, llama) error = %v", err)
+	}
+	if token == "" {
+		t.Errorf("Client.Lock(ctx, llama) = %q, want non-empty token", token)
+	}
+
+	// Try unlocking with the wrong token
+	if err := cli.Unlock(ctx, "llama", "wrong token"); err == nil {
+		t.Errorf("Client.Unlock(ctx, llama, wrong token) = %v, want non-nil error", err)
+	}
+
+	// Unlock with the correct token
+	if err := cli.Unlock(ctx, "llama", token); err != nil {
+		t.Errorf("Client.Unlock(ctx, llama, %q) = %v, want nil", token, err)
+	}
+
+	// Unlocking it again, even with the right token, should fail
+	if err := cli.Unlock(ctx, "llama", token); err == nil {
+		t.Errorf("Client.Unlock(ctx, llama, %q) = %v, want non-nil error", token, err)
+	}
+}
+
+func TestLocker(t *testing.T) {
+	t.Parallel()
+	ctx, canc := context.WithTimeout(context.Background(), 10*time.Second)
+	t.Cleanup(canc)
+
+	svr, cli := testServerAndClient(t, ctx)
+	t.Cleanup(func() { svr.Close() })
+
+	// This constitutes a test by virtue of Lock/Unlock panicking on any
+	// internal error.
+	l := cli.Locker("llama")
+
+	var wg sync.WaitGroup
+	var locks int
+	for i := 0; i < 10; i++ {
+		wg.Add(1)
+		go func() {
+			l.Lock()
+			locks++
+			l.Unlock()
+			wg.Done()
+		}()
+	}
+
+	wg.Wait()
+
+	if got, want := locks, 10; got != want {
+		t.Errorf("locks = %d, want %d", got, want)
+	}
+}
+
+func TestDoOnce(t *testing.T) {
+	t.Parallel()
+	ctx, canc := context.WithTimeout(context.Background(), 10*time.Second)
+	t.Cleanup(canc)
+
+	svr, cli := testServerAndClient(t, ctx)
+	t.Cleanup(func() { svr.Close() })
+
+	var wg sync.WaitGroup
+	var calls atomic.Int32
+	for i := 0; i < 10; i++ {
+		wg.Add(1)
+		go func() {
+			if err := cli.DoOnce(ctx, "once", func() {
+				calls.Add(1)
+			}); err != nil {
+				t.Errorf("Client.DoOnce(ctx, once, inc) = %v", err)
+			}
+			wg.Done()
+		}()
+	}
+
+	wg.Wait()
+	if got, want := calls.Load(), int32(1); got != want {
+		t.Errorf("calls.Load() = %d, want %d", got, want)
+	}
+}

From a3727b318cc310626c668a22c86ef3b07de5f904 Mon Sep 17 00:00:00 2001
From: Josh Deprez <jd@buildkite.com>
Date: Thu, 8 Jun 2023 13:50:22 +1000
Subject: [PATCH 3/3] Locking: add comments and expand help text

---
 clicommand/lock_acquire.go | 6 ++++++
 clicommand/lock_do.go      | 3 +++
 clicommand/lock_done.go    | 3 +++
 clicommand/lock_get.go     | 5 ++++-
 clicommand/lock_release.go | 9 +++++++--
 lock/lock.go               | 4 ++++
 6 files changed, 27 insertions(+), 3 deletions(-)

diff --git a/clicommand/lock_acquire.go b/clicommand/lock_acquire.go
index 58a13a3aad..e515ea80f9 100644
--- a/clicommand/lock_acquire.go
+++ b/clicommand/lock_acquire.go
@@ -20,6 +20,12 @@ Description:
    forever) until it can acquire the lock, if the lock is already held by
    another process. If multiple processes are waiting for the same lock, there
    is no ordering guarantee of which one will be given the lock next.
+   
+   To prevent separate processes unlocking each other, the output from ′lock
+   acquire′ should be stored, and passed to ′lock release′.
+   
+   Note that this subcommand is only available when an agent has been started
+   with the ′agent-api′ experiment enabled.
 
 Examples:
 
diff --git a/clicommand/lock_do.go b/clicommand/lock_do.go
index aaff25b0cf..5c0811eae5 100644
--- a/clicommand/lock_do.go
+++ b/clicommand/lock_do.go
@@ -20,6 +20,9 @@ Description:
    wait for completion of some shared work, where only one process should do
    the work. 
    
+   Note that this subcommand is only available when an agent has been started
+   with the ′agent-api′ experiment enabled.
+   
    ′lock do′ will do one of two things:
    
    - Print 'do'. The calling process should proceed to do the work and then
diff --git a/clicommand/lock_done.go b/clicommand/lock_done.go
index 83286ccb46..08c7c0335d 100644
--- a/clicommand/lock_done.go
+++ b/clicommand/lock_done.go
@@ -17,6 +17,9 @@ const lockDoneHelpDescription = `Usage:
 Description:
    Completes a do-once lock. This should only be used by the process performing
    the work.
+   
+   Note that this subcommand is only available when an agent has been started
+   with the ′agent-api′ experiment enabled.
 
 Examples:
 
diff --git a/clicommand/lock_get.go b/clicommand/lock_get.go
index fbd4dfb162..a15d5467c9 100644
--- a/clicommand/lock_get.go
+++ b/clicommand/lock_get.go
@@ -12,12 +12,15 @@ import (
 
 const lockGetHelpDescription = `Usage:
 
-    buildkite-agent lock get [key]
+   buildkite-agent lock get [key]
 
 Description:
    Retrieves the value of a lock key. Any key not in use returns an empty 
    string.
    
+   Note that this subcommand is only available when an agent has been started
+   with the ′agent-api′ experiment enabled.
+   
    ′lock get′ is generally only useful for inspecting lock state, as the value
    can change concurrently. To acquire or release a lock, use ′lock acquire′ and
    ′lock release′.
diff --git a/clicommand/lock_release.go b/clicommand/lock_release.go
index 99abf9f0c1..24cea89b60 100644
--- a/clicommand/lock_release.go
+++ b/clicommand/lock_release.go
@@ -12,11 +12,16 @@ import (
 
 const lockReleaseHelpDescription = `Usage:
 
-   buildkite-agent lock release [key]
+   buildkite-agent lock release [key] [token]
 
 Description:
    Releases the lock for the given key. This should only be called by the
-   process that acquired the lock.
+   process that acquired the lock. To help prevent different processes unlocking
+   each other unintentionally, the output from ′lock acquire′ is required as the
+   second argument.
+   
+   Note that this subcommand is only available when an agent has been started
+   with the ′agent-api′ experiment enabled.
 
 Examples:
 
diff --git a/lock/lock.go b/lock/lock.go
index 922918d6e7..7ccbd389f2 100644
--- a/lock/lock.go
+++ b/lock/lock.go
@@ -55,6 +55,10 @@ func (c *Client) Locker(key string) sync.Locker {
 // token or an error. The token must be passed to Unlock in order to unlock the
 // lock later on.
 func (c *Client) Lock(ctx context.Context, key string) (string, error) {
+	// The token generation only has to avoid making the same token twice to
+	// prevent separate processes unlocking each other.
+	// Using crypto/rand to generate 16 bytes is possibly overkill - it's not a
+	// goal to be cryptographically secure - but ensures the result.
 	otp := make([]byte, 16)
 	if _, err := rand.Read(otp); err != nil {
 		return "", err