Skip to content

Commit fc63483

Browse files
chore: Queue/Trigger the replay using sloctl [PC-13394] (#237)
## Motivation We're adding a new way to create a Replay. The new Replay is added to the queue and there's no need to wait for the reimport to complete. ## Related changes A new way of creating a Replay. Considering the number of ongoing Replays is limited, `sloctl` queues Replays if the limit is exceeded. Replay queues is an experimental feature, currently unavailable to all organizations. We're working on improving and expanding its availability. --------- Co-authored-by: paulina-n9 <[email protected]>
1 parent 88b21b3 commit fc63483

File tree

3 files changed

+90
-37
lines changed

3 files changed

+90
-37
lines changed

internal/replay.go

+87-34
Original file line numberDiff line numberDiff line change
@@ -46,16 +46,11 @@ func (r *RootCmd) NewReplayCmd() *cobra.Command {
4646
cmd := &cobra.Command{
4747
Use: "replay",
4848
Short: "Retrieve historical SLI data and recalculate their SLO error budgets.",
49-
Long: "Replay pulls in the historical data while your SLO collects new data in real-time. " +
50-
"The historical and current data are merged, producing an error budget calculated for the entire period. " +
51-
"Refer to https://docs.nobl9.com/replay for more details on Replay.\n\n" +
52-
"The 'replay' command allows you to import data for multiple SLOs in bulk. " +
53-
"Before running the Replays it will verify if the SLOs you've provided are eligible for Replay. " +
54-
"It will only run a single Replay simultaneously (current limit for concurrent Replays). " +
55-
"When any Replay fails, it will attempt the import for the next SLO. " +
56-
"Importing data takes time: Replay for a single SLO may take several minutes up to an hour. " +
57-
"During that time, the command keeps on running, periodically checking the status of Replay. " +
58-
"If you cancel the program execution at any time, the current Replay in progress will not be revoked.",
49+
Long: "`sloctl replay` creates Replays to retrieve historical data for SLOs. " +
50+
"Use it to replay SLOs one-by-one or in bulk. Historical data retrieval is time-consuming: " +
51+
"replaying a single SLO can take up to an hour. Considering the number of ongoing Replays is limited, " +
52+
"`sloctl` queues Replays if the limit is exceeded. Replay queues is an experimental feature, currently " +
53+
"unavailable to all organizations. We're working on improving and expanding its availability.",
5954
Example: replayExample,
6055
Args: replay.arguments,
6156
PersistentPreRun: func(cmd *cobra.Command, args []string) {
@@ -95,31 +90,73 @@ func (r *ReplayCmd) RunReplays(cmd *cobra.Command, replays []ReplayConfig) (fail
9590
return 0, err
9691
}
9792

93+
arePlaylistEnabled := r.arePlaylistEnabled(cmd.Context())
94+
95+
if arePlaylistEnabled {
96+
cmd.Println(colorstring.Color("[yellow]- Your organization has access to Replay queues!"))
97+
cmd.Println(colorstring.Color("[yellow]- To learn more about Replay queues, follow this link: " +
98+
"https://docs.nobl9.com/replay-canary/ [reset]"))
99+
}
100+
98101
failedIndexes := make([]int, 0)
99102
for i, replay := range replays {
100103
cmd.Println(colorstring.Color(fmt.Sprintf(
101104
"[cyan][%d/%d][reset] SLO: %s, Project: %s, From: %s, To: %s",
102105
i+1, len(replays), replay.SLO, replay.Project,
103106
replay.From.Format(timeLayout), time.Now().In(replay.From.Location()).Format(timeLayout))))
104107

105-
spinner := NewSpinner("Importing data...")
106-
spinner.Go()
107-
err = r.runReplay(cmd.Context(), replay)
108-
spinner.Stop()
108+
if arePlaylistEnabled {
109+
cmd.Println("Replay is added to the queue...")
110+
err = r.runReplay(cmd.Context(), replay)
109111

110-
if err != nil {
111-
cmd.Println(colorstring.Color("[red]Import failed:[reset] " + err.Error()))
112-
failedIndexes = append(failedIndexes, i)
113-
continue
112+
if err != nil {
113+
cmd.Println(colorstring.Color("[red]Failed to add Replay to the queue:[reset] " + err.Error()))
114+
failedIndexes = append(failedIndexes, i)
115+
continue
116+
}
117+
cmd.Println(colorstring.Color("[green]Replay has been successfully added to the queue![reset]"))
118+
} else {
119+
spinner := NewSpinner("Importing data...")
120+
spinner.Go()
121+
err = r.runReplayWithStatusCheck(cmd.Context(), replay)
122+
spinner.Stop()
123+
124+
if err != nil {
125+
cmd.Println(colorstring.Color("[red]Import failed:[reset] " + err.Error()))
126+
failedIndexes = append(failedIndexes, i)
127+
continue
128+
}
129+
cmd.Println(colorstring.Color("[green]Import succeeded![reset]"))
114130
}
115-
cmd.Println(colorstring.Color("[green]Import succeeded![reset]"))
116131
}
117132
if len(replays) > 0 {
118133
r.printSummary(cmd, replays, failedIndexes)
119134
}
120135
return len(failedIndexes), nil
121136
}
122137

138+
func (r *ReplayCmd) arePlaylistEnabled(ctx context.Context) bool {
139+
data, _, err := r.doRequest(
140+
ctx,
141+
http.MethodGet,
142+
endpointPlanInfo,
143+
"*",
144+
nil,
145+
nil)
146+
if err != nil {
147+
return true
148+
}
149+
var pc PlaylistConfiguration
150+
if err = json.Unmarshal(data, &pc); err != nil {
151+
return true
152+
}
153+
return pc.EnabledPlaylists
154+
}
155+
156+
type PlaylistConfiguration struct {
157+
EnabledPlaylists bool `json:"enabledPlaylists"`
158+
}
159+
123160
type ReplayConfig struct {
124161
Project string `json:"project" validate:"required"`
125162
SLO string `json:"slo" validate:"required"`
@@ -264,7 +301,7 @@ func (r *ReplayCmd) verifySLOs(ctx context.Context, replays []ReplayConfig) erro
264301

265302
// Find non-existent or RBAC protected SLOs.
266303
// We're also filling the Data Source spec here for ReplayConfig.
267-
data, err := r.doRequest(
304+
data, _, err := r.doRequest(
268305
ctx,
269306
http.MethodGet,
270307
endpointGetSLO,
@@ -352,10 +389,10 @@ outer:
352389

353390
const replayStatusCheckInterval = 30 * time.Second
354391

355-
func (r *ReplayCmd) runReplay(ctx context.Context, config ReplayConfig) error {
356-
_, err := r.doRequest(ctx, http.MethodPost, endpointReplayPost, config.Project, nil, config.ToReplay(time.Now()))
392+
func (r *ReplayCmd) runReplayWithStatusCheck(ctx context.Context, config ReplayConfig) error {
393+
err := r.runReplay(ctx, config)
357394
if err != nil {
358-
return errors.Wrap(err, "failed to start new Replay")
395+
return err
359396
}
360397
ticker := time.NewTicker(replayStatusCheckInterval)
361398
for {
@@ -379,6 +416,21 @@ func (r *ReplayCmd) runReplay(ctx context.Context, config ReplayConfig) error {
379416
}
380417
}
381418

419+
func (r *ReplayCmd) runReplay(ctx context.Context, config ReplayConfig) error {
420+
_, httpCode, err := r.doRequest(ctx, http.MethodPost, endpointReplayPost, config.Project,
421+
nil, config.ToReplay(time.Now()),
422+
)
423+
if err != nil {
424+
switch httpCode {
425+
case 409:
426+
return errors.Errorf("Replay for SLO: '%s' in project: '%s' already exist", config.SLO, config.Project)
427+
default:
428+
return errors.Wrap(err, "failed to start new Replay")
429+
}
430+
}
431+
return nil
432+
}
433+
382434
func (r *ReplayCmd) getReplayAvailability(
383435
ctx context.Context,
384436
config ReplayConfig,
@@ -392,7 +444,7 @@ func (r *ReplayCmd) getReplayAvailability(
392444
"durationUnit": {durationUnit},
393445
"durationValue": {strconv.Itoa(durationValue)},
394446
}
395-
data, err := r.doRequest(ctx, http.MethodGet, endpointReplayGetAvailability, config.Project, values, nil)
447+
data, _, err := r.doRequest(ctx, http.MethodGet, endpointReplayGetAvailability, config.Project, values, nil)
396448
if err != nil {
397449
return
398450
}
@@ -406,7 +458,7 @@ func (r *ReplayCmd) getReplayStatus(
406458
ctx context.Context,
407459
config ReplayConfig,
408460
) (string, error) {
409-
data, err := r.doRequest(
461+
data, _, err := r.doRequest(
410462
ctx,
411463
http.MethodGet,
412464
fmt.Sprintf(endpointReplayGetStatus, config.SLO),
@@ -429,6 +481,7 @@ const (
429481
endpointReplayList = "/timetravel/list"
430482
endpointReplayGetStatus = "/timetravel/%s"
431483
endpointReplayGetAvailability = "/internal/timemachine/availability"
484+
endpointPlanInfo = "/internal/plan-info"
432485
endpointGetSLO = "/get/slo"
433486
)
434487

@@ -437,30 +490,30 @@ func (r *ReplayCmd) doRequest(
437490
method, endpoint, project string,
438491
values url.Values,
439492
payload interface{},
440-
) ([]byte, error) {
493+
) (data []byte, httpCode int, err error) {
441494
var body io.Reader
442495
if payload != nil {
443496
buf := new(bytes.Buffer)
444497
if err := json.NewEncoder(buf).Encode(payload); err != nil {
445-
return nil, err
498+
return nil, 0, err
446499
}
447500
body = buf
448501
}
449502
header := http.Header{sdk.HeaderProject: []string{project}}
450503
req, err := r.client.CreateRequest(ctx, method, endpoint, header, values, body)
451504
if err != nil {
452-
return nil, err
505+
return nil, 0, err
453506
}
454507
resp, err := r.client.HTTP.Do(req)
455508
if err != nil {
456-
return nil, err
509+
return nil, 0, err
457510
}
458511
defer func() { _ = resp.Body.Close() }()
512+
data, err = io.ReadAll(resp.Body)
459513
if resp.StatusCode >= 300 {
460-
data, _ := io.ReadAll(resp.Body)
461-
return nil, errors.Errorf("bad response (status: %d): %s", resp.StatusCode, string(data))
514+
return nil, resp.StatusCode, errors.Errorf("bad response (status: %d): %s", resp.StatusCode, string(data))
462515
}
463-
return io.ReadAll(resp.Body)
516+
return data, resp.StatusCode, err
464517
}
465518

466519
func (r *ReplayCmd) replayUnavailabilityReasonExplanation(
@@ -502,14 +555,14 @@ func (r *ReplayCmd) replayUnavailabilityReasonExplanation(
502555

503556
func (r *ReplayCmd) printSummary(cmd *cobra.Command, replays []ReplayConfig, failedIndexes []int) {
504557
if len(failedIndexes) == 0 {
505-
cmd.Printf("\nSuccessfully imported data for all %d SLOs.\n", len(replays))
558+
cmd.Printf("\nSuccessfully finished operations for all %d SLOs.\n", len(replays))
506559
} else {
507560
failedDetails := make([]string, 0, len(failedIndexes))
508561
for _, i := range failedIndexes {
509562
fr, _ := json.Marshal(replays[i])
510563
failedDetails = append(failedDetails, string(fr))
511564
}
512-
cmd.Printf("\nSuccessfully imported data for %d and failed for %d SLOs:\n - %s\n",
565+
cmd.Printf("\nSuccessfully finished operations for %d and failed for %d SLOs:\n - %s\n",
513566
len(replays)-len(failedIndexes), len(failedIndexes), strings.Join(failedDetails, "\n - "))
514567
}
515568
}

internal/replay_delete.go

+2-2
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ type deleteReplayRequest struct {
5656
func (r *ReplayCmd) deleteAllReplays(cmd *cobra.Command) error {
5757
cmd.Println(colorstring.Color("[yellow]Deleting all queued Replays[reset]"))
5858

59-
_, err := r.doRequest(
59+
_, _, err := r.doRequest(
6060
cmd.Context(),
6161
http.MethodDelete,
6262
endpointReplayDelete,
@@ -87,7 +87,7 @@ func (r *ReplayCmd) deleteReplaysForSLO(cmd *cobra.Command, sloName string) erro
8787
),
8888
)
8989

90-
_, err := r.doRequest(
90+
_, _, err := r.doRequest(
9191
cmd.Context(),
9292
http.MethodDelete,
9393
endpointReplayDelete,

internal/replay_list.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ type ReplayQueueItem struct {
3636
func (r *ReplayCmd) listAllReplays(cmd *cobra.Command) error {
3737
cmd.Println(colorstring.Color("[yellow]Listing all Replays[reset]"))
3838

39-
response, err := r.doRequest(
39+
response, _, err := r.doRequest(
4040
cmd.Context(),
4141
http.MethodGet,
4242
endpointReplayList,

0 commit comments

Comments
 (0)