-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathmissing-repl-replacements
executable file
·169 lines (136 loc) · 4.29 KB
/
missing-repl-replacements
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
#!/usr/bin/env bash
#
# Usage:
# missing-repl-replacements
#
# Given a set of data objects that are missing their replicas, it looks for
# other data objects that have replicas with the same checksum. It reads the set
# of data objects from stdin, one per line. Each entry is expected to be an
# absolute, logical path. The candidate list is written to stdout, one candidate
# per line. A candidate is a tab-separated list having the following form.
#
# <data obj> <bad resc> <bad repl> <candidate resc> <candidate repl>
#
# Here <data obj> is the absolute, logical path to the data object read from
# stdin; <bad resc> is the name of the storage resource hosting the bad replica;
# <bad repl> is the absolute, physical path to the bad replica; <candidate resc>
# is the name of the storage resource hosting a candidate replica; and
# <candidate repl> is a replica that has the same checksum as the bad replica.
set -o errexit -o nounset -o pipefail
EXEC_DIR="$(dirname "$(realpath --canonicalize-existing "$0")")"
readonly EXEC_DIR
readonly CHKSUM_CHUNK_LEN=500
declare -A ObjForChksum RescOf ReplOf
main() {
printf 'Looking up checkums\n' >&2
local obj
while IFS= read -r obj; do
map_checksum "$obj"
done
printf 'Finding copies\n' >&2
local sameFiles
sameFiles="$(find_copies)"
printf 'Finding self candidates\n' >&2
local selfCandidates
selfCandidates="$(find_self_candidates)"
printf 'Eliminating selves from candidates\n' >&2
local candidates
readarray -t candidates <<< "$(comm -23 <(sort <<< "$sameFiles") <(sort <<< "$selfCandidates"))"
printf 'Resolving replicas\n' >&2
if [[ -n "${candidates[*]}" ]]; then
local obj needyResc missingRepl
local selectedResc=''
local selectedRepl=''
local lastObj=''
local candidate
for candidate in "${candidates[@]}"; do
local chksum resc repl
IFS=$'\t' read -r chksum resc repl <<< "$candidate"
obj="${ObjForChksum[$chksum]}"
needyResc="${RescOf[$obj]}"
if [[ "$obj" != "$lastObj" && -n "$lastObj" ]]; then
show_selected_mapping \
"$lastObj" \
"${RescOf[$lastObj]}" \
"${ReplOf[$lastObj]}" \
"$selectedResc" \
"$selectedRepl"
selectedResc=''
selectedRepl=''
fi
if select_copy "$needyResc" "$selectedResc" "$resc"; then
selectedResc="$resc"
selectedRepl="$repl"
fi
lastObj="$obj"
done
if [[ -n "$selectedResc" ]]; then
show_selected_mapping \
"$lastObj" "$needyResc" "${ReplOf[$obj]}" "$selectedResc" "$selectedRepl"
fi
fi
}
find_copies() {
local chksums=("${!ObjForChksum[@]}")
local chunkStart
for chunkStart in $(seq 0 "$CHKSUM_CHUNK_LEN" ${#chksums[@]}); do
local chksumList
chksumList="$(mk_query_list "${chksums[@]:$chunkStart:$CHKSUM_CHUNK_LEN}")"
local query
printf -v query \
'select DATA_CHECKSUM, DATA_RESC_NAME, DATA_PATH where DATA_CHECKSUM in (%s)\n' \
"$chksumList"
"$EXEC_DIR"/quest $'%s\t%s\t%s' <<< "$query"
done
}
find_self_candidates() {
local chksum
for chksum in "${!ObjForChksum[@]}"; do
local obj="${ObjForChksum[$chksum]}"
printf '%s\t%s\t%s\n' "$chksum" "${RescOf[$obj]}" "${ReplOf[$obj]}"
done
}
map_checksum() {
local objPath="$1"
local coll objName
coll="$(dirname "$objPath")"
objName="$(basename "$objPath")"
local chksum resc repl
if IFS=$'\t' read -r chksum resc repl <<< "$(retrieve_chksum_and_loc "$coll" "$objName")"; then
if [[ -n "$chksum" && -n "$resc" && -n "$repl" ]]; then
ObjForChksum[$chksum]="$objPath"
RescOf[$objPath]="$resc"
ReplOf[$objPath]="$repl"
fi
fi
}
retrieve_chksum_and_loc() {
local collPath="$1"
local dataName="$2"
"$EXEC_DIR"/quest $'%s\t%s\t%s' <<EOF
select DATA_CHECKSUM, DATA_RESC_NAME, DATA_PATH
where COLL_NAME = '$collPath' and DATA_NAME = '$dataName'
EOF
}
select_copy() {
local needyResc="$1"
local curCopyResc="$2"
local candidateResc="$3"
[[ "$candidateResc" == "$needyResc" ]] \
|| [[ "$candidateResc" != corral4 && "$curCopyResc" != "$needyResc" ]] \
|| [[ "$candidateResc" == corral4 && -z "$curCopyResc" ]]
}
show_selected_mapping() {
local obj="$1"
local needyResc="$2"
local missingRepl="$3"
local providingResc="$4"
local selectedRepl="$5"
printf '%s\t%s\t%s\t%s\t%s\n' \
"$obj" "$needyResc" "$missingRepl" "$providingResc" "$selectedRepl"
}
mk_query_list() {
local elmts=("$@")
printf $'\'%s\'\n' "${elmts[@]}" | paste --serial --delimiter=,
}
main "$@"