Skip to content

Commit f3752b5

Browse files
authored
Adds repair command, updates theme (#15)
- Adds repair to CLI docs Closes #13 Also: - Adds makefile & staging script - Updates theme to latest version.
1 parent f8cd23f commit f3752b5

File tree

9 files changed

+400
-2
lines changed

9 files changed

+400
-2
lines changed

.gitignore

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,4 +6,5 @@ public
66
.vscode
77
node_modules
88
**/.DS_Store
9-
package-lock.json
9+
package-lock.json
10+
staging.env

Makefile

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
help:
2+
3+
@echo "Run 'make build' to build the docs, or make-stage to stage the docs."
4+
5+
.PHONY: help build stage
6+
7+
build:
8+
9+
@echo "Building docs. Output goes to `public/`"
10+
@hugo
11+
12+
stage:
13+
@(./stage.sh)

assets/tools/repair.sh

Lines changed: 142 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,142 @@
1+
#!/usr/bin/env bash
2+
#
3+
# This file is part of MinIO DirectPV
4+
# Copyright (c) 2024 MinIO, Inc.
5+
#
6+
# This program is free software: you can redistribute it and/or modify
7+
# it under the terms of the GNU Affero General Public License as published by
8+
# the Free Software Foundation, either version 3 of the License, or
9+
# (at your option) any later version.
10+
#
11+
# This program is distributed in the hope that it will be useful,
12+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
13+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14+
# GNU Affero General Public License for more details.
15+
#
16+
# You should have received a copy of the GNU Affero General Public License
17+
# along with this program. If not, see <http://www.gnu.org/licenses/>.
18+
19+
#
20+
# This script repairs faulty drives
21+
#
22+
23+
set -e
24+
25+
ME=$(basename "$0"); export ME
26+
27+
declare -a drive_ids
28+
29+
# usage: is_uuid <value>
30+
function is_uuid() {
31+
[[ "$1" =~ ^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$ ]]
32+
}
33+
34+
# usage: get_suspend_value <drive-id>
35+
function get_suspend_value() {
36+
# shellcheck disable=SC2016
37+
kubectl get directpvvolumes "${1}" \
38+
-o go-template='{{range $k,$v := .metadata.labels}}{{if eq $k "directpv.min.io/suspend"}}{{$v}}{{end}}{{end}}'
39+
}
40+
41+
# usage: is_suspended <drive-id>
42+
function is_suspended() {
43+
value=$(get_suspend_value "${1}")
44+
[[ "${value,,}" = "true" ]]
45+
}
46+
47+
# usage: get_volumes <drive-id>
48+
function get_volumes() {
49+
kubectl get directpvvolumes \
50+
--selector="directpv.min.io/drive=${1}" \
51+
-o go-template='{{range .items}}{{.metadata.name}}{{ " " | print }}{{end}}'
52+
}
53+
54+
# usage: get_pod_name <volume-id>
55+
function get_pod_name() {
56+
# shellcheck disable=SC2016
57+
kubectl get directpvvolumes "${1}" \
58+
-o go-template='{{range $k,$v := .metadata.labels}}{{if eq $k "directpv.min.io/pod.name"}}{{$v}}{{end}}{{end}}'
59+
}
60+
61+
# usage: get_pod_namespace <volume-id>
62+
function get_pod_namespace() {
63+
# shellcheck disable=SC2016
64+
kubectl get directpvvolumes "${1}" \
65+
-o go-template='{{range $k,$v := .metadata.labels}}{{if eq $k "directpv.min.io/pod.namespace"}}{{$v}}{{end}}{{end}}'
66+
}
67+
68+
function init() {
69+
if [[ $# -eq 0 ]]; then
70+
cat <<EOF
71+
NAME:
72+
${ME} - This script repairs faulty drives.
73+
USAGE:
74+
${ME} <DRIVE-ID> ...
75+
ARGUMENTS:
76+
DRIVE-ID Faulty drive ID.
77+
EXAMPLE:
78+
# Repair drive af3b8b4c-73b4-4a74-84b7-1ec30492a6f0.
79+
$ ${ME} af3b8b4c-73b4-4a74-84b7-1ec30492a6f0
80+
EOF
81+
exit 255
82+
fi
83+
84+
if ! which kubectl >/dev/null 2>&1; then
85+
echo "kubectl not found; please install"
86+
exit 255
87+
fi
88+
89+
if ! kubectl directpv --version >/dev/null 2>&1; then
90+
echo "kubectl directpv not found; please install"
91+
exit 255
92+
fi
93+
94+
for drive in "$@"; do
95+
if ! is_uuid "${drive}"; then
96+
echo "invalid drive ID ${drive}"
97+
exit 255
98+
fi
99+
if [[ ! ${drive_ids[*]} =~ ${drive} ]]; then
100+
drive_ids+=( "${drive}" )
101+
fi
102+
done
103+
}
104+
105+
# usage: repair <drive-id>
106+
function repair() {
107+
drive_id="$1"
108+
109+
pods_deleted=true
110+
if ! is_suspended "${drive_id}"; then
111+
kubectl directpv suspend "${drive_id}"
112+
113+
# shellcheck disable=SC2207
114+
volumes=( $(get_volumes "${drive_id}") )
115+
for volume in "${volumes[@]}"; do
116+
pod_name=$(get_pod_name "${volume}")
117+
pod_namespace=$(get_pod_namespace "${volume}")
118+
119+
if ! kubectl delete pod "${pod_name}" --namespace "${pod_namespace}"; then
120+
echo "unable to delete pod '${pod_name}' using volume '${volume}'; please delete the pod manually"
121+
pods_deleted=false
122+
fi
123+
done
124+
else
125+
echo "drive ${drive_id} already suspended"
126+
fi
127+
128+
if [ "${pods_deleted}" == "true" ]; then
129+
kubectl directpv repair "${drive_id}"
130+
else
131+
echo "delete pods manually and retry again for drive ${drive_id}"
132+
fi
133+
}
134+
135+
function main() {
136+
for drive in "${drive_ids[@]}"; do
137+
repair "${drive}"
138+
done
139+
}
140+
141+
init "$@"
142+
main "$@"

content/command-line/_index.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,7 @@ The available commands include:
6767
| [`label volumes`](label-volumes.md) | Set labels to volumes |
6868
| [`list-drives`](list-drives.md) | List drives |
6969
| [`list-volumes`](list-volumes.md) | List volumes |
70+
| [`repair`](repair.md) | Repair faulty XFS-formatted drives |
7071
| [`resume-drives`](resume-drives.md) | Resume suspended drives |
7172
| [`resume-volumes`](resume-volumes.md) | Resume suspended volumes |
7273
| [`suspend-drives`](suspend-drives.md) | Suspend drives |

content/command-line/repair.md

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
---
2+
title: repair
3+
date: 2024-08-26
4+
lastmod: :git
5+
draft: false
6+
tableOfContents: true
7+
---
8+
9+
## Description
10+
11+
{{< admonition title="Irrevocable Data Loss" type="warning" >}}
12+
This command completely and irreversibly erases any data that may exist on the selected drive(s).
13+
{{< /admonition >}}
14+
15+
DirectPV supports using the `xfs_repair` utility to attempt to repair managed drives that report XFS filesystem errors, faults, or corruption.
16+
This command has no guarantee of success or complete recovery.
17+
18+
The `repair` command creates one-time Kubernetes `Job` with the pod name as `repair-<DRIVE-ID>`.
19+
Kubernetes automatically removes this job five minutes after completion.
20+
21+
Progress and status of the drive repair can be viewed using `kubectl log` command.
22+
23+
Before beginning a repair, you must first [suspend the drive]({{< relref "/command-line/suspend-drives.md" >}}).
24+
25+
To retrieve the ID of the drive to repair, use [list drives]({{< relref "/command-line/list-drives.md" >}})
26+
27+
## Syntax
28+
29+
```sh
30+
kubectl directpv repair DRIVE [flags]
31+
```
32+
33+
## Parameters
34+
35+
### Flags
36+
37+
| **Flag** | **Description** |
38+
|---------------------------|-------------------------------------------------------------------------|
39+
| `--dry-run` | See the output of the command without actually changing any drives. |
40+
| `--force` | Force log zeroing. |
41+
| `--disable-prefetch` | Disable prefetching of inode and directory blocks. |
42+
43+
### Global Flags
44+
45+
You can use the following global DirectPV flags with `kubectl directpv init`:
46+
47+
| **Flag** | **Description** |
48+
|---------------------------|--------------------------------------------------------|
49+
| `--kubeconfig` \<string\> | Path to the `kube.config` file to use for CLI requests |
50+
| `--quiet` | Suppress printing error messages |
51+
52+
## Example
53+
54+
### Repair a drive
55+
56+
The following begins a repair operation on the specified drive.
57+
58+
```sh {.copy}
59+
kubectl directpv repair 3b562992-f752-4a41-8be4-4e688ae8cd4c
60+
```

content/resource-management/drives.md

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -161,6 +161,24 @@ $ kubectl directpv remove --drives=vdb --nodes=node1
161161

162162
Refer to the [remove command]({{< relref "command-line/remove.md" >}}) for more information.
163163

164+
## Repair Drives
165+
166+
{{< admonition title="Data Loss" type="caution" >}}
167+
THIS IS DANGEROUS OPERATION WHICH LEADS TO DATA LOSS.
168+
{{< /admonition >}}
169+
170+
DirectPV supports using the `xfs_repair` utility to attempt to repair managed drives that report XFS filesystem errors, faults, or corruption.
171+
This command has no guarantee of success or complete recovery.
172+
173+
The `repair` command creates one-time Kubernetes `Job` with the pod name as `repair-<DRIVE-ID>`.
174+
Kubernetes automatically removes this job five minutes after completion.
175+
176+
Progress and status of the drive repair can be viewed using `kubectl log` command.
177+
178+
Before beginning a repair, you must first [suspend the drive](#suspend-drives).
179+
180+
Use the [repair.sh]({{< relref "/resource-management/scripts.md#repair.sh" >}}) script to repair a faulty drive.
181+
164182

165183
## Suspend drives
166184

0 commit comments

Comments
 (0)