Skip to content

Commit

Permalink
Fix HTTP2 error with openshift routes
Browse files Browse the repository at this point in the history
  • Loading branch information
sayan-biswas authored and gabemontero committed Aug 22, 2023
1 parent f47eabd commit ca52631
Show file tree
Hide file tree
Showing 6 changed files with 58 additions and 15 deletions.
12 changes: 10 additions & 2 deletions developer/openshift/dev_setup.sh
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,14 @@ check_cluster_role() {
fi
}

cluster_setup() {
echo "[cluster-setup]"

# By default HTTP2 is not enabled in test openshift clusters
echo "- Enabling HTTP2 for ingress" | indent 2
oc annotate ingresses.config/cluster ingress.operator.openshift.io/default-enable-http2=true | indent 6
}

install_openshift_gitops() {
APP="openshift-gitops"

Expand Down Expand Up @@ -204,7 +212,6 @@ install_openshift_gitops() {
fi
}


setup_compute_access() {
kustomization_dir="$GIT_URL/operator/gitops/compute/pipeline-service-manager?ref=$GIT_REF"
"$PROJECT_DIR/operator/images/access-setup/content/bin/setup_compute.sh" \
Expand Down Expand Up @@ -269,9 +276,10 @@ EOF

main() {
parse_args "$@"
precheck_binary "curl" "argocd" "kubectl" "yq"
precheck_binary "curl" "argocd" "kubectl" "yq" "oc"
init
check_cluster_role
cluster_setup
echo "[compute-access]"
setup_compute_access
echo
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2400,7 +2400,7 @@
"targets": [
{
"exemplar": true,
"expr": "sum(up{job=\"tekton-results-api-service\"})",
"expr": "sum(up{job=\"tekton-results-api\"})",
"format": "time_series",
"hide": false,
"interval": "",
Expand Down Expand Up @@ -2481,7 +2481,7 @@
"targets": [
{
"exemplar": true,
"expr": "1 - ((sum(rate(grpc_server_handled_total{job=\"tekton-results-api-service\", grpc_service=~\"tekton.results.v1alpha2.*\", grpc_code=~\"Internal|Unavailable|Unknown|Unimplemented\"}[$__rate_interval]))) / sum(rate(grpc_server_started_total{job=\"tekton-results-api-service\", grpc_service=~\"tekton.results.v1alpha2.*\"}[$__rate_interval])))\n",
"expr": "1 - ((sum(rate(grpc_server_handled_total{job=\"tekton-results-api\", grpc_service=~\"tekton.results.v1alpha2.*\", grpc_code=~\"Internal|Unavailable|Unknown|Unimplemented\"}[$__rate_interval]))) / sum(rate(grpc_server_started_total{job=\"tekton-results-api\", grpc_service=~\"tekton.results.v1alpha2.*\"}[$__rate_interval])))\n",
"interval": "",
"intervalFactor": 4,
"legendFormat": "Success Rate",
Expand Down Expand Up @@ -2618,7 +2618,7 @@
"targets": [
{
"exemplar": true,
"expr": "sum(rate(grpc_server_started_total{job=\"tekton-results-api-service\", grpc_service=~\"tekton.results.v1alpha2.*\"}[$__rate_interval]))",
"expr": "sum(rate(grpc_server_started_total{job=\"tekton-results-api\", grpc_service=~\"tekton.results.v1alpha2.*\"}[$__rate_interval]))",
"interval": "",
"intervalFactor": 4,
"legendFormat": "Request Rate",
Expand Down Expand Up @@ -2686,7 +2686,7 @@
"targets": [
{
"exemplar": true,
"expr": "histogram_quantile(0.99, sum(rate(grpc_server_handling_seconds_bucket{job=\"tekton-results-api-service\", grpc_service=~\"tekton.results.v1alpha2.*\"}[$__rate_interval])) by (le) )",
"expr": "histogram_quantile(0.99, sum(rate(grpc_server_handling_seconds_bucket{job=\"tekton-results-api\", grpc_service=~\"tekton.results.v1alpha2.*\"}[$__rate_interval])) by (le) )",
"interval": "",
"intervalFactor": 4,
"legendFormat": "Latency",
Expand Down Expand Up @@ -3029,7 +3029,7 @@
"targets": [
{
"exemplar": true,
"expr": "sum(rate(grpc_server_handled_total{job=\"tekton-results-api-service\", grpc_service=~\"tekton.results.v1alpha2.*\"}[$__rate_interval])) by (grpc_method)",
"expr": "sum(rate(grpc_server_handled_total{job=\"tekton-results-api\", grpc_service=~\"tekton.results.v1alpha2.*\"}[$__rate_interval])) by (grpc_method)",
"interval": "",
"intervalFactor": 4,
"legendFormat": "{{grpc_method}}",
Expand Down Expand Up @@ -3084,7 +3084,7 @@
"targets": [
{
"exemplar": true,
"expr": "sum(increase(grpc_server_handling_seconds_bucket{job=\"tekton-results-api-service\", grpc_service=~\"tekton.results.v1alpha2.*\"}[$__rate_interval])) by (le)",
"expr": "sum(increase(grpc_server_handling_seconds_bucket{job=\"tekton-results-api\", grpc_service=~\"tekton.results.v1alpha2.*\"}[$__rate_interval])) by (le)",
"interval": "",
"intervalFactor": 4,
"legendFormat": "{{le}}",
Expand Down Expand Up @@ -3211,15 +3211,15 @@
"targets": [
{
"exemplar": true,
"expr": "sum(rate(grpc_server_handled_total{job=\"tekton-results-api-service\", grpc_service=~\"tekton.results.v1alpha2.*\", grpc_code!=\"OK\"}[$__rate_interval])) by (grpc_method)",
"expr": "sum(rate(grpc_server_handled_total{job=\"tekton-results-api\", grpc_service=~\"tekton.results.v1alpha2.*\", grpc_code!=\"OK\"}[$__rate_interval])) by (grpc_method)",
"interval": "",
"intervalFactor": 4,
"legendFormat": "RPC: {{grpc_method}}",
"refId": "error methods"
},
{
"exemplar": true,
"expr": "sum(rate(grpc_server_handled_total{job=\"tekton-results-api-service\", grpc_service=~\"tekton.results.v1alpha2.*\", grpc_code=~\"Internal|Unavailable|Unknown|Unimplemented\"}[$__rate_interval])) by (grpc_code) / ignoring(grpc_code) group_left sum(rate(grpc_server_handled_total{job=\"tekton-results-api-service\", grpc_service=~\"tekton.results.v1alpha2.*\", grpc_code!=\"OK\"}[$__rate_interval]))",
"expr": "sum(rate(grpc_server_handled_total{job=\"tekton-results-api\", grpc_service=~\"tekton.results.v1alpha2.*\", grpc_code=~\"Internal|Unavailable|Unknown|Unimplemented\"}[$__rate_interval])) by (grpc_code) / ignoring(grpc_code) group_left sum(rate(grpc_server_handled_total{job=\"tekton-results-api\", grpc_service=~\"tekton.results.v1alpha2.*\", grpc_code!=\"OK\"}[$__rate_interval]))",
"hide": false,
"interval": "",
"intervalFactor": 4,
Expand Down Expand Up @@ -3326,15 +3326,15 @@
"targets": [
{
"exemplar": true,
"expr": "sum(rate(grpc_server_handled_total{job=\"tekton-results-api-service\", grpc_service=~\"tekton.results.v1alpha2.*\", grpc_code!=\"OK\"}[$__rate_interval])) by (grpc_method)",
"expr": "sum(rate(grpc_server_handled_total{job=\"tekton-results-api\", grpc_service=~\"tekton.results.v1alpha2.*\", grpc_code!=\"OK\"}[$__rate_interval])) by (grpc_method)",
"interval": "",
"intervalFactor": 4,
"legendFormat": "RPC: {{grpc_method}}",
"refId": "error methods"
},
{
"exemplar": true,
"expr": "sum(rate(grpc_server_handled_total{job=\"tekton-results-api-service\", grpc_service=~\"tekton.results.v1alpha2.*\", grpc_code!~\"Internal|Unavailable|Unknown|Unimplemented\"}[$__rate_interval])) by (grpc_code) / ignoring(grpc_code) group_left sum(rate(grpc_server_handled_total{job=\"tekton-results-api-service\", grpc_service=~\"tekton.results.v1alpha2.*\", grpc_code!=\"OK\"}[$__rate_interval]))",
"expr": "sum(rate(grpc_server_handled_total{job=\"tekton-results-api\", grpc_service=~\"tekton.results.v1alpha2.*\", grpc_code!~\"Internal|Unavailable|Unknown|Unimplemented\"}[$__rate_interval])) by (grpc_code) / ignoring(grpc_code) group_left sum(rate(grpc_server_handled_total{job=\"tekton-results-api\", grpc_service=~\"tekton.results.v1alpha2.*\", grpc_code!=\"OK\"}[$__rate_interval]))",
"hide": false,
"interval": "",
"intervalFactor": 4,
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# OCP Bug workaround https://issues.redhat.com/browse/OCPBUGS-5916
---
apiVersion: v1
kind: Service
metadata:
name: tekton-results-api-metrics
namespace: tekton-results
labels:
app.kubernetes.io/name: "tekton-results-api"
app.kubernetes.io/component: "metrics"
spec:
selector:
app.kubernetes.io/name: "tekton-results-api"
ports:
- name: metrics
protocol: TCP
port: 9443
targetPort: metrics
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ resources:
- watcher-logging-rbac.yaml
- service-monitor.yaml
- watcher-rbac.yaml
- api-metrics-service.yaml

images:
- name: ko://github.com/tektoncd/results/cmd/api
Expand Down Expand Up @@ -50,9 +51,13 @@ patches:
kind: Service
name: tekton-results-watcher
labelSelector: "app.kubernetes.io/name=tekton-results-watcher"
- path: api-service-patch.yaml
target:
- target:
version: v1
kind: Service
name: tekton-results-api-service
labelSelector: "app.kubernetes.io/name=tekton-results-api"
# OCP Bug workaround https://issues.redhat.com/browse/OCPBUGS-5916
# path: api-service-patch.yaml
patch: |-
- op: remove
path: /spec/ports/1
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ metadata:
annotations:
argocd.argoproj.io/sync-wave: "0"
spec:
jobLabel: "app.kubernetes.io/name"
endpoints:
- path: /metrics
port: metrics
Expand All @@ -63,7 +64,8 @@ spec:
insecureSkipVerify: true
selector:
matchLabels:
app.kubernetes.io/name: tekton-results-api
app.kubernetes.io/name: "tekton-results-api"
app.kubernetes.io/component: "metrics"
---
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
Expand Down
10 changes: 10 additions & 0 deletions operator/test/test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -288,6 +288,16 @@ test_security() {
}

test_results() {
# Check logs for OCP bug https://issues.redhat.com/browse/OCPBUGS-5916
printf "\n - Check HTTP2 health probe errors: "
pattern="http2: server: error reading preface from client"
if kubectl logs deployment/tekton-results-api -c "api" -n "$NAMESPACE" 2>/dev/null | grep -ciq "$pattern"; then
echo "Failed"
exit 1
else
echo "OK"
fi

test_pipelines
echo -n " - Results in database:"

Expand Down

0 comments on commit ca52631

Please sign in to comment.