Skip to content

Commit 3d18b39

Browse files
committed
Enhancement: Ability to specify which bricks to read from in an EC setup
Allows client to specify which bricks to read from in an erasure coded setup. This can be done by setiing an extended attribute glusterfs.ec.readmask with value as colon separated brick numbers. Signed-off-by: sbk173 <[email protected]>
1 parent a9f3973 commit 3d18b39

File tree

8 files changed

+337
-46
lines changed

8 files changed

+337
-46
lines changed

tests/basic/ec/ec-inode-read-mask.t

Lines changed: 166 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,166 @@
1+
#!/bin/bash
2+
. $(dirname $0)/../../include.rc
3+
. $(dirname $0)/../../volume.rc
4+
. $(dirname $0)/../../ec.rc
5+
6+
EC_READMASK_XATTR="glusterfs.ec.readmask"
7+
8+
# Parsing XML file to get count of READ calls made on bricks
9+
get_brick_reads() {
10+
local xml_file="$1"
11+
local -n result_array=$2 # Reference to output array
12+
13+
local index=0
14+
local brick_reads=0
15+
local inside_fop=0
16+
local read_hits=0
17+
local brick_found=0
18+
19+
while IFS= read -r line; do
20+
if [[ $line =~ "<brickName>" ]]; then
21+
if ((brick_found)); then
22+
result_array[index]=$brick_reads
23+
((index++))
24+
fi
25+
brick_found=1
26+
brick_reads=0
27+
fi
28+
29+
if [[ $line =~ "<fop>" ]]; then
30+
inside_fop=1
31+
read_hits=0
32+
fi
33+
34+
if [[ $inside_fop -eq 1 && $line =~ "<name>READ</name>" ]]; then
35+
read_hits=1
36+
fi
37+
38+
if [[ $inside_fop -eq 1 && $line =~ "<hits>" ]]; then
39+
if ((read_hits)); then
40+
brick_reads=$(echo "$line" | sed -E 's/.*<hits>([0-9]+)<\/hits>.*/\1/')
41+
fi
42+
fi
43+
44+
if [[ $line =~ "</fop>" ]]; then
45+
inside_fop=0
46+
fi
47+
done < "$xml_file"
48+
49+
if ((brick_found)); then
50+
result_array[index]=$brick_reads
51+
fi
52+
}
53+
54+
#Function to compare read_count arrays and verify that only specified bricks have modified read values
55+
compare_arrays() {
56+
local -n arr1=$1
57+
local -n arr2=$2
58+
local -a indices=("${@:3}")
59+
60+
local length1=${#arr1[@]}
61+
local length2=${#arr2[@]}
62+
63+
64+
if [[ $length1 -ne $length2 ]]; then
65+
echo "Array lengths differ"
66+
return 1
67+
fi
68+
69+
70+
local -A changed_indices
71+
72+
for i in "${!arr1[@]}"; do
73+
if [[ "${arr1[i]}" -ne "${arr2[i]}" ]]; then
74+
changed_indices[$i]=1
75+
fi
76+
done
77+
78+
for i in "${!changed_indices[@]}"; do
79+
if [[ ! " ${indices[@]} " =~ " $i " ]]; then
80+
echo "Unexpected change at index $i"
81+
return 1
82+
fi
83+
done
84+
85+
echo "Only specified indices changed"
86+
return 0
87+
}
88+
89+
validate_read() {
90+
local mask="$1"
91+
local space_sep_values="${mask//:/ }"
92+
93+
$CLI volume profile $V0 info --xml > $tmpdir/preread.xml
94+
local -a brick_reads_array_old
95+
get_brick_reads $tmpdir/preread.xml brick_reads_array_old
96+
echo ${brick_reads_array_old[@]}
97+
98+
# Set readmask to bricks 0, 1, 3, 5, 8, 9
99+
if ! setfattr -n "$EC_READMASK_XATTR" -v "$mask" "$M0/newfile"; then
100+
echo "Failed to set readmask xattr"
101+
return 1
102+
fi
103+
104+
dd if="$M0/newfile" of=/dev/null iflag=direct bs=4M
105+
106+
sleep 1
107+
108+
$CLI volume profile $V0 info --xml > "$tmpdir/after_mask.xml"
109+
local -a brick_reads_array_new
110+
get_brick_reads "$tmpdir/after_mask.xml" brick_reads_array_new
111+
echo "After readmask: ${brick_reads_array_new[@]}"
112+
113+
compare_arrays brick_reads_array_new brick_reads_array_old "${space_sep_values[@]}"
114+
return $?
115+
}
116+
117+
#Setup
118+
cleanup
119+
TEST glusterd
120+
TEST pidof glusterd
121+
TEST $CLI volume info
122+
123+
TEST mkdir -p $B0/${V0}{0,1,2,3,4,5,6,7,8,9}
124+
TEST $CLI volume create $V0 disperse 10 redundancy 4 $H0:$B0/${V0}{0,1,2,3,4,5,6,7,8,9}
125+
126+
EXPECT "$V0" volinfo_field $V0 'Volume Name'
127+
EXPECT 'Created' volinfo_field $V0 'Status'
128+
EXPECT '10' brick_count $V0
129+
130+
TEST $CLI volume start $V0
131+
EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'Started' volinfo_field $V0 'Status'
132+
133+
# Mount FUSE with caching disabled
134+
TEST $GFS -s $H0 --volfile-id $V0 $M0
135+
EXPECT_WITHIN $CHILD_UP_TIMEOUT "10" ec_child_up_count $V0 0
136+
137+
TEST $CLI volume profile $V0 start
138+
139+
140+
# Create file
141+
TEST dd if=/dev/urandom of=$M0/newfile bs=4M count=5
142+
143+
# Read without setting readmask xattr should not fail
144+
TEST dd if=$M0/newfile of=/dev/null iflag=direct bs=4M
145+
146+
# Create temporary directory
147+
tmpdir=$(mktemp -d -t ${0##*/}.XXXXXX)
148+
149+
150+
# Test 1: Read with mask set to bricks 0, 1, 3, 5, 8, 9
151+
TEST validate_read "0:1:3:5:8:9"
152+
153+
# Test 2: Read with mask set to bricks 4, 5, 6, 7, 8, 9
154+
TEST validate_read "4:5:6:7:8:9"
155+
156+
# Test 3: setfattr wont set invalid read_masks
157+
TEST ! setfattr -n $EC_READMASK_XATTR -v "1:sm:snb:adi:as" $M0/newfile
158+
159+
TEST ! setfattr -n $EC_READMASK_XATTR -v "0:1::2ab" $M0/newfile
160+
161+
# Test 4: setfattr wont set read_mask in case insufficient bricks are provided
162+
TEST ! setfattr -n $EC_READMASK_XATTR -v "0:1:2:3" $M0/newfile
163+
TEST ! setfattr -n $EC_READMASK_XATTR -v "4:5" $M0/newfile
164+
165+
rm -rf "$tmpdir"
166+
cleanup;

xlators/cluster/ec/src/ec-helpers.c

Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -734,6 +734,40 @@ ec_inode_get(inode_t *inode, xlator_t *xl)
734734
return ctx;
735735
}
736736

737+
void
738+
ec_inode_readmask_set(inode_t *inode, xlator_t *xl, uintptr_t read_mask)
739+
{
740+
ec_inode_t *ctx = NULL;
741+
742+
LOCK(&inode->lock);
743+
744+
ctx = __ec_inode_get(inode, xl);
745+
746+
if (ctx)
747+
ctx->read_mask = read_mask;
748+
749+
UNLOCK(&inode->lock);
750+
751+
}
752+
753+
uintptr_t
754+
ec_inode_readmask_get(inode_t *inode, xlator_t *xl)
755+
{
756+
ec_inode_t *ctx = NULL;
757+
uintptr_t read_mask = 0;
758+
759+
LOCK(&inode->lock);
760+
761+
ctx = __ec_inode_get(inode, xl);
762+
763+
if (ctx)
764+
read_mask = ctx->read_mask;
765+
766+
UNLOCK(&inode->lock);
767+
768+
return read_mask;
769+
}
770+
737771
ec_fd_t *
738772
__ec_fd_get(fd_t *fd, xlator_t *xl)
739773
{
@@ -834,3 +868,71 @@ ec_is_metadata_fop (int32_t lock_kind, glusterfs_fop_t fop)
834868
}
835869
return _gf_false;
836870
}*/
871+
872+
gf_boolean_t
873+
ec_is_readmask_xattr(dict_t *dict)
874+
{
875+
data_t *dict_data = NULL;
876+
if (dict_lookup(dict, EC_XATTR_READMASK, &dict_data) == 0){
877+
return _gf_true;
878+
}
879+
return _gf_false;
880+
}
881+
882+
uintptr_t
883+
ec_parse_read_mask(ec_t *ec, char *read_mask_str, uintptr_t *read_mask_ptr, int32_t *op_errno_ptr, uint64_t msgid)
884+
{
885+
char *mask = NULL;
886+
char *maskptr = NULL;
887+
char *saveptr = NULL;
888+
char *id_str = NULL;
889+
uintptr_t read_mask = 0;
890+
int id = 0;
891+
int ret = -1;
892+
893+
mask = gf_strdup(read_mask_str);
894+
if (!mask) {
895+
*op_errno_ptr = ENOMEM;
896+
goto out;
897+
}
898+
maskptr = mask;
899+
900+
for (;;) {
901+
id_str = strtok_r(maskptr, ":", &saveptr);
902+
if (id_str == NULL)
903+
break;
904+
if (gf_string2int(id_str, &id)) {
905+
gf_msg(ec->xl->name, GF_LOG_ERROR, 0, msgid,
906+
"In read-mask \"%s\" id %s is not a valid integer",
907+
read_mask_str, id_str);
908+
909+
*op_errno_ptr = EINVAL;
910+
goto out;
911+
}
912+
913+
if ((id < 0) || (id >= ec->nodes)) {
914+
gf_msg(ec->xl->name, GF_LOG_ERROR, 0, msgid,
915+
"In read-mask \"%s\" id %d is not in range [0 - %d]",
916+
read_mask_str, id, ec->nodes - 1);
917+
918+
*op_errno_ptr = EINVAL;
919+
goto out;
920+
}
921+
read_mask |= (1UL << id);
922+
maskptr = NULL;
923+
}
924+
925+
if (gf_bits_count(read_mask) < ec->fragments) {
926+
gf_msg(ec->xl->name, GF_LOG_ERROR, 0, msgid,
927+
"read-mask \"%s\" should contain at least %d ids", read_mask_str,
928+
ec->fragments);
929+
930+
*op_errno_ptr = EINVAL;
931+
goto out;
932+
}
933+
*read_mask_ptr = read_mask;
934+
ret = 0;
935+
out:
936+
GF_FREE(mask);
937+
return ret;
938+
}

xlators/cluster/ec/src/ec-helpers.h

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,11 @@ __ec_fd_get(fd_t *fd, xlator_t *xl);
7070
ec_fd_t *
7171
ec_fd_get(fd_t *fd, xlator_t *xl);
7272

73+
void
74+
ec_inode_readmask_set(inode_t *inode, xlator_t *xl, uintptr_t read_mask);
75+
uintptr_t
76+
ec_inode_readmask_get(inode_t *inode, xlator_t *xl);
77+
7378
static inline uint32_t
7479
ec_adjust_size_down(ec_t *ec, uint64_t *value, gf_boolean_t scale)
7580
{
@@ -192,4 +197,10 @@ ec_filter_internal_xattrs(dict_t *xattr);
192197
int32_t
193198
ec_launch_replace_heal(ec_t *ec);
194199

200+
gf_boolean_t
201+
ec_is_readmask_xattr(dict_t *dict);
202+
203+
uintptr_t
204+
ec_parse_read_mask(ec_t * ec, char *read_mask_str, uintptr_t *read_mask_ptr, int32_t *op_errno_ptr, uint64_t msgid);
205+
195206
#endif /* __EC_HELPERS_H__ */

xlators/cluster/ec/src/ec-inode-read.c

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1372,9 +1372,14 @@ ec_manager_readv(ec_fop_data_t *fop, int32_t state)
13721372
return EC_STATE_DISPATCH;
13731373

13741374
case EC_STATE_DISPATCH:
1375-
if (ec->read_mask) {
1375+
uintptr_t inode_read_mask = ec_inode_readmask_get(fop->fd->inode, fop->xl);
1376+
if (inode_read_mask != 0) {
1377+
fop->mask &= inode_read_mask;
1378+
}
1379+
else if (ec->read_mask) {
13761380
fop->mask &= ec->read_mask;
13771381
}
1382+
13781383
ec_dispatch_min(fop);
13791384

13801385
return EC_STATE_PREPARE_ANSWER;

xlators/cluster/ec/src/ec-messages.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,6 @@ GLFS_MSGID(EC, EC_MSG_INVALID_CONFIG, EC_MSG_HEAL_FAIL,
5656
EC_MSG_EXTENSION_UNKNOWN, EC_MSG_EXTENSION_UNSUPPORTED,
5757
EC_MSG_EXTENSION_FAILED, EC_MSG_NO_GF, EC_MSG_MATRIX_FAILED,
5858
EC_MSG_DYN_CREATE_FAILED, EC_MSG_DYN_CODEGEN_FAILED,
59-
EC_MSG_THREAD_CLEANUP_FAILED, EC_MSG_FD_BAD);
59+
EC_MSG_THREAD_CLEANUP_FAILED, EC_MSG_FD_BAD, EC_MSG_INVALID_READMASK);
6060

6161
#endif /* !_EC_MESSAGES_H_ */

xlators/cluster/ec/src/ec-types.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -187,6 +187,7 @@ struct _ec_inode {
187187
struct list_head heal;
188188
ec_stripe_list_t stripe_cache;
189189
uint64_t bad_version;
190+
uintptr_t read_mask;
190191
};
191192

192193
typedef int32_t (*fop_heal_cbk_t)(call_frame_t *, void *, xlator_t *, int32_t,

0 commit comments

Comments
 (0)