Skip to content

Commit 9b2fd08

Browse files
committed
HPCC-33146 Add backoff if vault authentication fails and check other vaults
Signed-off-by: Gavin Halliday <[email protected]>
1 parent 322cb61 commit 9b2fd08

File tree

3 files changed

+72
-36
lines changed

3 files changed

+72
-36
lines changed

helm/hpcc/templates/_helpers.tpl

+3
Original file line numberDiff line numberDiff line change
@@ -686,6 +686,9 @@ vaults:
686686
{{- if (hasKey $vault "writeTimeout") }}
687687
writeTimeout: {{ $vault.writeTimeout }}
688688
{{- end }}
689+
{{- if (hasKey $vault "backoffTimeout") }}
690+
backoffTimeout: {{ $vault.backoffTimeout }}
691+
{{- end }}
689692
{{- end -}}
690693
{{- end -}}
691694
{{- end -}}

helm/hpcc/values.schema.json

+4
Original file line numberDiff line numberDiff line change
@@ -925,6 +925,10 @@
925925
"writeTimeout": {
926926
"description": "optional timeout (in ms) for socket writing to vault",
927927
"type": "number"
928+
},
929+
"backoffTimeout": {
930+
"description": "optional time (in ms) to back-off connecting to a vault that cannot be reached, or fails to authenticate",
931+
"type": "number"
928932
}
929933
},
930934
"required": [ "name", "url" ],

system/jlib/jsecrets.cpp

+65-36
Original file line numberDiff line numberDiff line change
@@ -606,6 +606,8 @@ class CVault
606606
bool verify_server = true;
607607
unsigned retries = 3;
608608
unsigned retryWait = 1000;
609+
timestamp_type backoffTimeoutUs = 0; // Default to no backoff
610+
std::atomic<timestamp_type> backoffFailureTs{0};
609611
timeval connectTimeout = {0, 0};
610612
timeval readTimeout = {0, 0};
611613
timeval writeTimeout = {0, 0};
@@ -648,6 +650,9 @@ class CVault
648650
retries = (unsigned) vault->getPropInt("@retries", retries);
649651
retryWait = (unsigned) vault->getPropInt("@retryWait", retryWait);
650652

653+
if (vault->hasProp("@backoffTimeout"))
654+
backoffTimeoutUs = vault->getPropInt64("@backoffTimeout") * 1000ULL;
655+
651656
setTimevalMS(connectTimeout, (time_t) vault->getPropInt("@connectTimeout"));
652657
setTimevalMS(readTimeout, (time_t) vault->getPropInt("@readTimeout"));
653658
setTimevalMS(writeTimeout, (time_t) vault->getPropInt("@writeTimeout"));
@@ -894,55 +899,79 @@ class CVault
894899
}
895900
bool requestSecretAtLocation(CVaultKind &rkind, StringBuffer &content, const char *location, const char *secretCacheKey, const char *version, bool permissionDenied)
896901
{
897-
checkAuthentication(permissionDenied);
898-
if (isEmptyString(location))
902+
//If a previous request failed, then do not retry until backoffTimeoutUs has passed.
903+
if (backoffFailureTs)
899904
{
900-
OERRLOG("Vault %s cannot get secret at location without a location", name.str());
901-
return false;
902-
}
903-
904-
httplib::Client cli(schemeHostPort.str());
905-
httplib::Headers headers = {
906-
{ "X-Vault-Token", clientToken.str() }
907-
};
905+
if (getTimeStampNowValue() - backoffFailureTs < backoffTimeoutUs)
906+
return false;
908907

909-
unsigned numRetries = 0;
910-
initClient(cli, headers, numRetries);
911-
httplib::Result res = cli.Get(location, headers);
912-
while (!res && numRetries--)
913-
{
914-
OERRLOG("Retrying vault %s get secret, communication error %d location %s", name.str(), res.error(), location);
915-
if (retryWait)
916-
Sleep(retryWait);
917-
res = cli.Get(location, headers);
908+
//Clear the last failure - to avoid the check on the timestamp.
909+
backoffFailureTs = 0;
918910
}
919911

920-
if (res)
912+
bool backoff = false;
913+
try
921914
{
922-
if (res->status == 200)
915+
checkAuthentication(permissionDenied);
916+
if (isEmptyString(location))
923917
{
924-
rkind = kind;
925-
content.append(res->body.c_str());
926-
return true;
918+
OERRLOG("Vault %s cannot get secret at location without a location", name.str());
919+
return false;
927920
}
928-
else if (res->status == 403)
921+
922+
httplib::Client cli(schemeHostPort.str());
923+
httplib::Headers headers = {
924+
{ "X-Vault-Token", clientToken.str() }
925+
};
926+
927+
unsigned numRetries = 0;
928+
initClient(cli, headers, numRetries);
929+
httplib::Result res = cli.Get(location, headers);
930+
while (!res && numRetries--)
929931
{
930-
//try again forcing relogin, but only once. Just in case the token was invalidated but hasn't passed expiration time (for example max usage count exceeded).
931-
if (permissionDenied==false)
932-
return requestSecretAtLocation(rkind, content, location, secretCacheKey, version, true);
933-
OERRLOG("Vault %s permission denied accessing secret (check namespace=%s?) %s.%s location %s [%d](%d) - response: %s", name.str(), vaultNamespace.str(), secretCacheKey, version ? version : "", location, res->status, res.error(), res->body.c_str());
932+
OERRLOG("Retrying vault %s get secret, communication error %d location %s", name.str(), res.error(), location);
933+
if (retryWait)
934+
Sleep(retryWait);
935+
res = cli.Get(location, headers);
934936
}
935-
else if (res->status == 404)
937+
938+
if (res)
936939
{
937-
OERRLOG("Vault %s secret not found %s.%s location %s", name.str(), secretCacheKey, version ? version : "", location);
940+
if (res->status == 200)
941+
{
942+
rkind = kind;
943+
content.append(res->body.c_str());
944+
return true;
945+
}
946+
else if (res->status == 403)
947+
{
948+
//try again forcing relogin, but only once. Just in case the token was invalidated but hasn't passed expiration time (for example max usage count exceeded).
949+
if (permissionDenied==false)
950+
return requestSecretAtLocation(rkind, content, location, secretCacheKey, version, true);
951+
OERRLOG("Vault %s permission denied accessing secret (check namespace=%s?) %s.%s location %s [%d](%d) - response: %s", name.str(), vaultNamespace.str(), secretCacheKey, version ? version : "", location, res->status, res.error(), res->body.c_str());
952+
}
953+
else if (res->status == 404)
954+
{
955+
OERRLOG("Vault %s secret not found %s.%s location %s", name.str(), secretCacheKey, version ? version : "", location);
956+
}
957+
else
958+
{
959+
OERRLOG("Vault %s error accessing secret %s.%s location %s [%d](%d) - response: %s", name.str(), secretCacheKey, version ? version : "", location, res->status, res.error(), res->body.c_str());
960+
}
938961
}
939962
else
940-
{
941-
OERRLOG("Vault %s error accessing secret %s.%s location %s [%d](%d) - response: %s", name.str(), secretCacheKey, version ? version : "", location, res->status, res.error(), res->body.c_str());
942-
}
963+
OERRLOG("Error: Vault %s http error (%d) accessing secret %s.%s location %s", name.str(), res.error(), secretCacheKey, version ? version : "", location);
943964
}
944-
else
945-
OERRLOG("Error: Vault %s http error (%d) accessing secret %s.%s location %s", name.str(), res.error(), secretCacheKey, version ? version : "", location);
965+
catch (IException * e)
966+
{
967+
backoff = true;
968+
OERRLOG(e);
969+
e->Release();
970+
}
971+
972+
//If authentication failed (or another serious error), then record when that failure occurred
973+
if (backoff && backoffTimeoutUs)
974+
backoffFailureTs = getTimeStampNowValue();
946975

947976
return false;
948977
}

0 commit comments

Comments
 (0)