Skip to content

Commit

Permalink
Merge branch 'main' into optional-timeout
Browse files Browse the repository at this point in the history
  • Loading branch information
igooch authored Dec 6, 2024
2 parents 5cad439 + 171def9 commit 0f6899b
Show file tree
Hide file tree
Showing 19 changed files with 521 additions and 224 deletions.
3 changes: 3 additions & 0 deletions .dockerignore
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,9 @@ bin
/site/public
/test

# Allow upgrade test directory
!/test/upgrade

# Created by .ignore support plugin (hsz.mobi)
### Go template
# Binaries for programs and plugins
Expand Down
169 changes: 163 additions & 6 deletions cloudbuild.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -233,9 +233,18 @@ steps:
# End to end tests
#

# wait for us to be the oldest ongoing build before we run e2es
- name: gcr.io/cloud-builders/gcloud
id: e2e-wait-to-become-leader
# Build and Push upgrade test
- name: make-docker
id: push-upgrade-test
dir: test/upgrade
env: ['REGISTRY=${_REGISTRY}']
args: [push]
waitFor:
- push-images

# Wait for us to be the oldest ongoing build before we run upgrade and e2e tests
- name: gcr.io/google.com/cloudsdktool/cloud-sdk
id: wait-to-become-leader
waitFor: [push-images]
script: |
#!/usr/bin/env bash
Expand All @@ -258,10 +267,157 @@ steps:
- BUILD_ID=$BUILD_ID
- TRIGGER_NAME=$TRIGGER_NAME

# Run the upgrade tests parallel, fail this step if any of the tests fail
- name: gcr.io/google.com/cloudsdktool/cloud-sdk
id: submit-upgrade-test-cloud-build
dir: test/upgrade
entrypoint: bash
args:
- -c
- |
#!/usr/bin/env bash
set -e
set -o pipefail
export KUBECONFIG="/root/.kube/config"
mkdir -p /go/src/agones.dev/ /root/.kube/
ln -s /workspace /go/src/agones.dev/agones
cd /go/src/agones.dev/agones/test/upgrade
pids=()
typeset -A waitPids # Associative array for mapping `kubectl wait job` pid -> `kubectl wait job` output log name
tmpdir=$(mktemp -d)
trap 'rm -rf -- "$tmpdir"' EXIT SIGTERM
# Update image tags to include the current build version.
DevVersion="${_BASE_VERSION}-dev-$(git rev-parse --short=7 HEAD)"
export DevVersion
sed "s/\${DevVersion}/${DevVersion}/" upgradeTest.yaml > "${tmpdir}"/upgradeTest.yaml
sed "s/\${DevVersion}/${DevVersion}/" versionMap.yaml > "${tmpdir}"/versionMap.yaml
# Kill all currently running child processes on exit or if a non-zero signal is seen
trap 'echo Cleaning up any remaining running pids: $(jobs -p) ; kill $(jobs -p) 2> /dev/null || :' EXIT SIGTERM
cloudProducts=("generic" "gke-autopilot")
declare -A versionsAndRegions=( [1.31]=us-east1 [1.30]=us-central1 [1.29]=us-west1 )
for cloudProduct in "${cloudProducts[@]}"
do
for version in "${!versionsAndRegions[@]}"
do
region=${versionsAndRegions[$version]}
if [ "$cloudProduct" = generic ]
then
testCluster="standard-upgrade-test-cluster-${version//./-}"
else
testCluster="gke-autopilot-upgrade-test-cluster-${version//./-}"
fi
testClusterLocation="${region}"
gcloud container clusters get-credentials "$testCluster" --region="$testClusterLocation" --project="$PROJECT_ID"
if [ "$cloudProduct" = gke-autopilot ] ; then
# For autopilot clusters use evictable "balloon" pods to keep a buffer in node pool autoscaling.
kubectl apply -f evictablePods.yaml
fi
# Clean up any existing job / namespace / apiservice from previous run
echo Checking if resources from a previous build of upgrade-test-runner exist and need to be cleaned up on cluster "${testCluster}".
if kubectl get jobs | grep upgrade-test-runner ; then
echo Deleting job from previous run of upgrade-test-runner on cluster "${testCluster}".
kubectl delete job upgrade-test-runner
kubectl wait --for=delete pod -l job-name=upgrade-test-runner --timeout=5m
fi
# Check if there are any dangling game servers.
if kubectl get gs | grep ".*"; then
# Remove any finalizers so that dangling game servers can be manually deleted.
kubectl get gs -o=custom-columns=:.metadata.name --no-headers | xargs kubectl patch gs -p '{"metadata":{"finalizers":[]}}' --type=merge
sleep 5
echo Deleting game servers from previous run of upgrade-test-runner on cluster "${testCluster}".
kubectl delete gs -l app=sdk-client-test
fi
if kubectl get po -l app=sdk-client-test | grep ".*"; then
echo Deleting pods from previous run of upgrade-test-runner on cluster "${testCluster}".
kubectl delete po -l app=sdk-client-test
kubectl wait --for=delete pod -l app=sdk-client-test --timeout=5m
fi
# The v1.allocation.agones.dev apiservice does not get removed automatically and will prevent the namespace from terminating.
if kubectl get apiservice | grep v1.allocation.agones.dev ; then
echo Deleting v1.allocation.agones.dev from previous run of upgrade-test-runner on cluster "${testCluster}".
kubectl delete apiservice v1.allocation.agones.dev
fi
if kubectl get namespace | grep agones-system ; then
echo Deleting agones-system namespace from previous run of upgrade-test-runner on cluster "${testCluster}".
kubectl delete namespace agones-system
kubectl wait --for=delete ns agones-system --timeout=5m
fi
if kubectl get crds | grep agones ; then
echo Deleting crds from previous run of upgrade-test-runner on cluster "${testCluster}".
kubectl get crds -o=custom-columns=:.metadata.name | grep agones | xargs kubectl delete crd
fi
echo kubectl apply -f permissions.yaml on cluster "${testCluster}"
kubectl apply -f permissions.yaml
echo kubectl apply -f versionMap.yaml on cluster "${testCluster}"
kubectl apply -f "${tmpdir}"/versionMap.yaml
echo kubectl apply -f gameserverTemplate.yaml on cluster "${testCluster}"
kubectl apply -f gameserverTemplate.yaml
echo kubectl apply -f upgradeTest.yaml on cluster "${testCluster}"
kubectl apply -f "${tmpdir}"/upgradeTest.yaml
# We need to wait for job pod to be created and ready before we can wait on the job itself.
# TODO: Once all test clusters are at Kubernetes Version >= 1.31 use `kubectl wait --for=create` instead of sleep.
# kubectl wait --for=create pod -l job-name=upgrade-test-runner --timeout=1m
sleep 10s
kubectl wait --for=condition=ready pod -l job-name=upgrade-test-runner --timeout=5m
echo Wait for job upgrade-test-runner to complete or fail on cluster "${testCluster}"
kubectl wait job/upgrade-test-runner --timeout=20m --for jsonpath='{.status.conditions[*].status}'=True -o jsonpath='{.status.conditions[*].type}' | tee "${tmpdir}"/"${testCluster}".log &
waitPid=$!
pids+=( "$waitPid" )
waitPids[$waitPid]="${tmpdir}"/"${testCluster}".log
done
done
for pid in "${pids[@]}"; do
# This block executes when the process exits and pid status==0
if wait $pid; then
outputLog="${waitPids[$pid]}"
# wait for output to finish writing to file
until [ -s "$outputLog" ]; do sleep 1; done
output=$(<"${outputLog}")
echo "${outputLog}": "${output}"
# "Complete" is successful job run.
# Version 1.31 has "SuccessCriteriaMet" as the first completion status returned, or "FailureTarget" in case of failure.
if [ "$output" == "Complete" ] || [ "$output" == "SuccessCriteriaMet" ] ; then
continue
else
exit 1
fi
# This block executes when the process exits and pid status!=0
else
status=$?
outputLog="${waitPids[$pid]}"
echo "One of the upgrade tests pid $pid from cluster log $outputLog exited with a non-zero status ${status}."
exit $status
fi
done
echo "End of Upgrade Tests"
waitFor:
- wait-to-become-leader
- push-upgrade-test

# cancel all the orphan e2e test cloud builds, fail to cancel any of the build will fail this whole build
- name: gcr.io/cloud-builders/gcloud
id: cancel-orphan-e2e-tests
waitFor: [e2e-wait-to-become-leader]
waitFor: [wait-to-become-leader]
script: |
#!/usr/bin/env bash
until gcloud builds list --ongoing --filter "tags:'e2e-test'" --format="value(id)" | xargs --no-run-if-empty gcloud builds cancel
Expand Down Expand Up @@ -386,7 +542,7 @@ steps:
#
- name: gcr.io/cloud-builders/gcloud
id: cleanup-services
waitFor: [e2e-wait-to-become-leader]
waitFor: [wait-to-become-leader]
allowFailure: true
entrypoint: bash
args:
Expand All @@ -400,14 +556,15 @@ steps:
done
substitutions:
_BASE_VERSION: 1.46.0
_CACHE_BUCKET: agones-build-cache
_HTMLTEST_CACHE_KEY: htmltest-0.10.1
_CPP_SDK_BUILD_CACHE_KEY: cpp-sdk-build
_CPP_SDK_CONFORMANCE_CACHE_KEY: cpp-sdk-conformance
_RUST_SDK_BUILD_CACHE_KEY: rust-sdk-build
_REGISTRY: us-docker.pkg.dev/${PROJECT_ID}/ci
tags: [ci, 'commit-${COMMIT_SHA}']
timeout: 18000s # 5h: 3h (e2e-wait-to-become-leader) + 1.5h (e2e timeout) + 0.5h (everything else)
timeout: 18000s # 5h: 3h (wait-to-become-leader) + 1.5h (e2e timeout) + 0.5h (everything else)
queueTtl: 259200s # 72h
images:
- ${_REGISTRY}/agones-controller
Expand Down
2 changes: 1 addition & 1 deletion docs/governance/templates/release_issue.md
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ and copy it into a release issue. Fill in relevant values, found inside {}
- [ ] Run `make post-build-release` to build the artifacts in GCS(These files will be attached in the release notes) and to push the latest images in the release repository and push chart on agones-chart.
- [ ] Run `make shell` and run `gcloud config configurations activate <your development project>` to switch Agones
development tooling off of the `agones-images` project.
- [ ] Smoke Test: run `make install-release` to view helm releases, uninstall agones-system namesapce, fetch the latest version of Agones, verify the new version, installing agones-system namespace, and list all the pods of agones-system.
- [ ] Smoke Test: run `make install-release` to view helm releases, uninstall agones-system namespace, fetch the latest version of Agones, verify the new version, installing agones-system namespace, and list all the pods of agones-system.
- [ ] Attach all assets found in the cloud storage with {version} to the draft GitHub Release.
- [ ] Copy any review changes from the release blog post into the draft GitHub release.
- [ ] Publish the draft GitHub Release.
Expand Down
2 changes: 1 addition & 1 deletion examples/allocation-endpoint/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ helm upgrade my-release --install --namespace agones-system --create-namespace a
--set agones.allocator.service.http.enabled=false
```

After installing Agones, deploy [ESP](https://cloud.google.com/endpoints/docs/grpc/specify-esp-v2-startup-options) which is an envoy based proxy, deployed as a sidecar along side `agones-alloator` container. Run the following to patch the service deployement, change the service port to ESP and add annotation to `agones-allocator` service account to impersonate GCP service account.
After installing Agones, deploy [ESP](https://cloud.google.com/endpoints/docs/grpc/specify-esp-v2-startup-options) which is an envoy based proxy, deployed as a sidecar along side `agones-alloator` container. Run the following to patch the service deployment, change the service port to ESP and add annotation to `agones-allocator` service account to impersonate GCP service account.

Replace [GKE-PROJECT-ID] in `patch-agones-allocator.yaml` with your project ID before running the scripts.

Expand Down
2 changes: 1 addition & 1 deletion examples/allocator-client-csharp/Program.cs
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ class Program
static async Task Main(string[] args)
{
if (args.Length < 6) {
throw new Exception("Arguments are missing. Expecting: <private key> <public key> <server CA> <external IP> <namepace> <enable multi-cluster>");
throw new Exception("Arguments are missing. Expecting: <private key> <public key> <server CA> <external IP> <namespace> <enable multi-cluster>");
}

string clientKey = File.ReadAllText(args[0]);
Expand Down
4 changes: 2 additions & 2 deletions examples/simple-game-server/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -43,8 +43,8 @@ There are some text commands you can send the server to affect its behavior:
| "LIST_CONTAINS" | Returns true if the given value is in the given List, false otherwise |
| "GET_LIST_LENGTH" | Returns the length (number of values) of the given List as a string |
| "GET_LIST_VALUES" | Return the values in the given List as a comma delineated string |
| "APPEND_LIST_VALUE" | Returns if the given value was successfuly added to the List (true) or not (false) |
| "DELETE_LIST_VALUE" | Rreturns if the given value was successfuly deleted from the List (true) or not (false) |
| "APPEND_LIST_VALUE" | Returns if the given value was successfully added to the List (true) or not (false) |
| "DELETE_LIST_VALUE" | Rreturns if the given value was successfully deleted from the List (true) or not (false) |

## Configuration

Expand Down
6 changes: 3 additions & 3 deletions examples/simple-game-server/handlers.go
Original file line number Diff line number Diff line change
Expand Up @@ -283,7 +283,7 @@ func handlePlayerConnected(s *sdk.SDK, parts []string, _ ...context.CancelFunc)
return
}

// handleGetPlayers returns a comma delimeted list of connected players
// handleGetPlayers returns a comma delimited list of connected players
func handleGetPlayers(s *sdk.SDK, parts []string, _ ...context.CancelFunc) (response string, addACK bool, responseError error) {
log.Print("Retrieving connected player list")
list, err := s.Alpha().GetConnectedPlayers()
Expand Down Expand Up @@ -535,7 +535,7 @@ func handleGetListValues(s *sdk.SDK, parts []string, _ ...context.CancelFunc) (r
return
}

// handleAppendListValue returns if the given value was successfuly added to the List or not
// handleAppendListValue returns if the given value was successfully added to the List or not
func handleAppendListValue(s *sdk.SDK, parts []string, _ ...context.CancelFunc) (response string, addACK bool, responseError error) {
if len(parts) < 3 {
response = "Invalid APPEND_LIST_VALUE, should have 2 arguments"
Expand All @@ -553,7 +553,7 @@ func handleAppendListValue(s *sdk.SDK, parts []string, _ ...context.CancelFunc)
return
}

// handleDeleteListValue returns if the given value was successfuly deleted from the List or not
// handleDeleteListValue returns if the given value was successfully deleted from the List or not
func handleDeleteListValue(s *sdk.SDK, parts []string, _ ...context.CancelFunc) (response string, addACK bool, responseError error) {
if len(parts) < 3 {
response = "Invalid DELETE_LIST_VALUE, should have 2 arguments"
Expand Down
4 changes: 2 additions & 2 deletions examples/simple-genai-server/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -224,7 +224,7 @@ type Message struct {
func handleGenAIRequest(prompt string, clientConn *connection, chatHistory []Message) (string, error) {
var jsonStr []byte
var err error
// If the endpoint is the NPC API, use the json request format specifc to that API
// If the endpoint is the NPC API, use the json request format specific to that API
if clientConn.npc {
npcRequest := NPCRequest{
Msg: prompt,
Expand Down Expand Up @@ -329,7 +329,7 @@ func autonomousChat(prompt string, conn1 *connection, conn2 *connection, numChat
autonomousChat(response, conn2, conn1, numChats, stopPhase, chatHistory)
}

// Manually interact via TCP with the GenAI endpont
// Manually interact via TCP with the GenAI endpoint
func tcpListener(port string, genAiConn *connection) {
log.Printf("Starting TCP server, listening on port %s", port)
ln, err := net.Listen("tcp", ":"+port)
Expand Down
14 changes: 9 additions & 5 deletions test/e2e/fleetautoscaler_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1621,9 +1621,8 @@ func TestScheduleAutoscaler(t *testing.T) {
stable := framework.AgonesClient.AgonesV1()
fleets := stable.Fleets(framework.Namespace)
flt, err := fleets.Create(ctx, defaultFleet(framework.Namespace), metav1.CreateOptions{})
if assert.NoError(t, err) {
defer fleets.Delete(context.Background(), flt.ObjectMeta.Name, metav1.DeleteOptions{}) // nolint:errcheck
}
require.NoError(t, err)
defer fleets.Delete(context.Background(), flt.ObjectMeta.Name, metav1.DeleteOptions{}) // nolint:errcheck

framework.AssertFleetCondition(t, flt, e2e.FleetReadyCount(flt.Spec.Replicas))

Expand All @@ -1633,7 +1632,7 @@ func TestScheduleAutoscaler(t *testing.T) {
scheduleAutoscaler := defaultAutoscalerSchedule(t, flt)
scheduleAutoscaler.Spec.Policy.Schedule.ActivePeriod.StartCron = nextCronMinute(time.Now())
fas, err := fleetautoscalers.Create(ctx, scheduleAutoscaler, metav1.CreateOptions{})
assert.NoError(t, err)
require.NoError(t, err)

framework.AssertFleetCondition(t, flt, e2e.FleetReadyCount(5))
fleetautoscalers.Delete(ctx, fas.ObjectMeta.Name, metav1.DeleteOptions{}) // nolint:errcheck
Expand All @@ -1646,7 +1645,7 @@ func TestScheduleAutoscaler(t *testing.T) {
scheduleAutoscaler = defaultAutoscalerSchedule(t, flt)
scheduleAutoscaler.Spec.Policy.Schedule.ActivePeriod.StartCron = nextCronMinuteBetween(time.Now())
fas, err = fleetautoscalers.Create(ctx, scheduleAutoscaler, metav1.CreateOptions{})
assert.NoError(t, err)
require.NoError(t, err)

framework.AssertFleetCondition(t, flt, e2e.FleetReadyCount(5))
fleetautoscalers.Delete(ctx, fas.ObjectMeta.Name, metav1.DeleteOptions{}) // nolint:errcheck
Expand Down Expand Up @@ -1842,8 +1841,13 @@ func nextCronMinute(currentTime time.Time) string {
// nextCronMinuteBetween returns the minute between the very next minute
// e.g. if the current time is 12:00, this method will return "1-2 * * * *"
// meaning between 12:01 - 12:02
// if the current minute if "59" since 59-0 is invalid, we'll return "0-1 * * * *" and wait for a bit longer on e2e tests.
func nextCronMinuteBetween(currentTime time.Time) string {
nextMinute := currentTime.Add(time.Minute).Minute()
if nextMinute == 59 {
return "0-1 * * * *"
}

secondMinute := currentTime.Add(2 * time.Minute).Minute()
return fmt.Sprintf("%d-%d * * * *", nextMinute, secondMinute)
}
Expand Down
2 changes: 1 addition & 1 deletion test/sdk/go/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ project_path := $(dir $(mkfile_path))
root_path = $(realpath $(project_path)/)
# Because go mod init in the Dockerfile installs the most recently released version of Agones, this
# will need to be built and pushed post-release. During DEV it will be built at DEV - 1.
release_version = 1.44.0
release_version = 1.45.0
server_tag := $(REGISTRY)/sdk-client-test:$(release_version)

# _____ _
Expand Down
Loading

0 comments on commit 0f6899b

Please sign in to comment.