diff --git a/.cirrus.yml b/.cirrus.yml
index 9cba9792bb..120aeaa1dc 100644
--- a/.cirrus.yml
+++ b/.cirrus.yml
@@ -836,8 +836,8 @@ local_system_test_task: &local_system_test_task
(changesInclude('**/*.go', '**/*.c', '**/*.h') && !changesIncludeOnly('test/**', 'pkg/machine/e2e/**'))
depends_on: *build
matrix: *platform_axis
- gce_instance: *standardvm
- timeout_in: 35m
+ gce_instance: *fastvm
+ timeout_in: 25m
env:
TEST_FLAVOR: sys
clone_script: *get_gosrc
@@ -890,8 +890,8 @@ rootless_remote_system_test_task:
CTR_FQIN: ${FEDORA_CONTAINER_FQIN}
<<: *local_system_test_task
alias: rootless_remote_system_test
- gce_instance: *standardvm
- timeout_in: 35m
+ gce_instance: *fastvm
+ timeout_in: 25m
env:
TEST_FLAVOR: sys
PODBIN_NAME: remote
@@ -905,8 +905,8 @@ rootless_system_test_task:
only_if: *only_if_system_test
depends_on: *build
matrix: *platform_axis
- gce_instance: *standardvm
- timeout_in: 35m
+ gce_instance: *fastvm
+ timeout_in: 25m
env:
TEST_FLAVOR: sys
PRIV_NAME: rootless
diff --git a/Makefile b/Makefile
index 799ca84390..3cdae32101 100644
--- a/Makefile
+++ b/Makefile
@@ -688,7 +688,8 @@ localmachine:
localsystem:
# Wipe existing config, database, and cache: start with clean slate.
$(RM) -rf ${HOME}/.local/share/containers ${HOME}/.config/containers
- if timeout -v 1 true; then PODMAN=$(CURDIR)/bin/podman QUADLET=$(CURDIR)/bin/quadlet bats -T test/system/; else echo "Skipping $@: 'timeout -v' unavailable'"; fi
+ PODMAN=$(CURDIR)/bin/podman QUADLET=$(CURDIR)/bin/quadlet bats -T --filter-tags '!ci:parallel' test/system/
+ PODMAN=$(CURDIR)/bin/podman QUADLET=$(CURDIR)/bin/quadlet bats -T --filter-tags ci:parallel -j $$(nproc) test/system/
.PHONY: remotesystem
remotesystem:
@@ -717,7 +718,8 @@ remotesystem:
echo "Error: ./bin/podman system service did not come up" >&2;\
exit 1;\
fi;\
- env PODMAN="$(CURDIR)/bin/podman-remote" bats -T test/system/ ;\
+ env PODMAN="$(CURDIR)/bin/podman-remote" bats -T --filter-tags '!ci:parallel' test/system/ ;\
+ env PODMAN="$(CURDIR)/bin/podman-remote" bats -T --filter-tags ci:parallel -j $$(nproc) test/system/ ;\
rc=$$?;\
kill %1;\
else \
diff --git a/contrib/cirrus/logformatter b/contrib/cirrus/logformatter
index 78ac1850d7..ed7135e8d2 100755
--- a/contrib/cirrus/logformatter
+++ b/contrib/cirrus/logformatter
@@ -349,7 +349,7 @@ END_HTML
# BATS handling. This will recognize num_tests both at start and end
if ($line =~ /^1\.\.(\d+)$/) {
$looks_like_bats = 1;
- $bats_count{expected_total} = $1;
+ $bats_count{expected_total} += $1;
undef $looks_like_python;
}
# Since the number of tests can't always be predicted, recognize
@@ -445,9 +445,14 @@ END_HTML
if ($css) {
# Make it linkable, e.g. foo.html#t--00001
if ($line =~ /^(not\s+)?ok\s+(\d+)\s+(.*)/) {
- $line = sprintf("%s", $2, $line);
+ my ($notok, $num, $name) = ($1, $2, $3);
+ # Parallel system tests are shown as |nnn| instead of [nnn]
+ my $p = '';
+ $p = 'p' if $name =~ /^\s*\|\d+\|/;
+
+ $line = sprintf("%s", $num, $p, $line);
- push @{$bats_count{__fail_list}}, [ $2, $3 ] if $1;
+ push @{$bats_count{__fail_list}}, [ $num, $name ] if $notok;
}
$bats_count{$css}++;
$css = "bats-$css";
diff --git a/hack/bats b/hack/bats
index 3002c0199b..3184c95b76 100755
--- a/hack/bats
+++ b/hack/bats
@@ -83,7 +83,10 @@ for i;do
--rootless) TEST_ROOT= ;;
--remote) REMOTE=remote ;;
--ts|-T) bats_opts+=("-T") ;;
- --tag=*) bats_filter=("--filter-tags" "$value") ;;
+ --tag=*) bats_filter=("--filter-tags" "$value")
+ if [[ "$value" = "ci:parallel" ]]; then
+ bats_opts+=("--jobs" $(nproc))
+ fi;;
*/*.bats) TESTS=$i ;;
*)
if [[ $i =~ : ]]; then
@@ -130,7 +133,7 @@ export PODMAN_BATS_LEAK_CHECK=1
# Root
if [[ "$TEST_ROOT" ]]; then
- echo "# bats ${bats_filter[*]} $TESTS"
+ echo "# bats ${bats_opts[*]} ${bats_filter[*]} $TESTS"
sudo --preserve-env=PODMAN \
--preserve-env=QUADLET \
--preserve-env=PODMAN_TEST_DEBUG \
@@ -144,7 +147,7 @@ fi
# Rootless. (Only if we're not already root)
if [[ "$TEST_ROOTLESS" && "$(id -u)" != 0 ]]; then
echo "--------------------------------------------------"
- echo "\$ bats ${bats_filter[*]} $TESTS"
+ echo "\$ bats ${bats_opts[*]} ${bats_filter[*]} $TESTS"
bats "${bats_opts[@]}" "${bats_filter[@]}" $TESTS
rc=$((rc | $?))
fi
diff --git a/test/system/000-TEMPLATE b/test/system/000-TEMPLATE
index a6a9ca1418..cff5de7263 100644
--- a/test/system/000-TEMPLATE
+++ b/test/system/000-TEMPLATE
@@ -11,6 +11,22 @@ load helpers
args="some sort of argument list"
run_podman subcmd $args
assert "$output" == "what we expect" "output from 'podman subcmd $args'"
+
+ # safename() provides a lower-case string with both the BATS test number
+ # and a pseudorandom element. Its use is strongly encouraged for container
+ # names, volumes, networks, images, everything, because the test number
+ # makes it possible to find leaked elements.
+ cname="c-$(safename)"
+
+ # Run "top" in a detached container with a safe name
+ run_podman run -d --name $cname --this-option --that $IMAGE top
+
+ # A number ("17") as the first run_podman arg means "expect this exit code".
+ # By default, run_podman expects success, and will barf on nonzero status.
+ run_podman 17 run --this-option --that $IMAGE exit 17
+
+ # Give a hoot, don't pollute
+ run_podman rm -f -t0 $cname
}
# vim: filetype=sh
@@ -110,5 +126,31 @@ size | -\\\?[0-9]\\\+
done < <(parse_table "$tests")
}
+# Whenever possible, please add the ci:parallel tag to new tests,
+# not only for speed but for stress testing.
+#
+# This is an example of what NOT to do when enabling parallel tests.
+#
+# bats test_tags=ci:parallel
+@test "this test is completely broken in parallel" {
+ # Never use "--name HARDCODED". Define 'cname=c-$(safename)' instead.
+ # Not only does that guarantee uniqueness, it also gives the test number
+ # in the name, so if there's a leak (at end of tests) it will be possible
+ # to identify the culprit.
+ run_podman --name mycontainer $IMAGE top
+
+ # Same thing for build and -t
+ run_podman build -t myimage ...
+
+ # Never assume exact output from podman ps! Many other containers may be running.
+ run_podman ps
+ assert "$output" = "mycontainer"
+
+ # Never use "-l". It is meaningless when other processes are running.
+ run_podman container inspect -l
+
+ # Never 'rm -a'!!! OMG like seriously just don't.
+ run_podman rm -f -a
+}
# vim: filetype=sh
diff --git a/test/system/030-run.bats b/test/system/030-run.bats
index e254fe8183..117e90cc48 100644
--- a/test/system/030-run.bats
+++ b/test/system/030-run.bats
@@ -874,8 +874,9 @@ RUN rm /etc/mtab
EOF
expected="'/etc/mtab' -> '/proc/mounts'"
+ # --layers=false needed to work around buildah#5674 parallel flake
local iname=nomtab-$(safename)
- run_podman build -t $iname $tmpdir
+ run_podman build -t $iname --layers=false $tmpdir
run_podman run --rm $iname stat -c %N /etc/mtab
is "$output" "$expected" "/etc/mtab should be created"
@@ -1080,8 +1081,9 @@ echo -e "#!/bin/sh\nfalse" >> /usr/bin/nsenter; \
chmod +x /usr/bin/nsenter
EOF
+ # --layers=false needed to work around buildah#5674 parallel flake
test_image="cve_2022_1227_test-$(safename)"
- run_podman build -t $test_image $tmpbuilddir
+ run_podman build -t $test_image --layers=false $tmpbuilddir
run_podman run -d ${keepid} $test_image top
ctr="$output"
run_podman top $ctr huser,user
diff --git a/test/system/055-rm.bats b/test/system/055-rm.bats
index e9a23dba52..3673b7dc25 100644
--- a/test/system/055-rm.bats
+++ b/test/system/055-rm.bats
@@ -127,10 +127,10 @@ function __run_healthcheck_container() {
run_podman run -d --name $1 \
--health-cmd /bin/false \
--health-interval 1s \
- --health-retries 1 \
+ --health-retries 2 \
--health-timeout 1s \
--health-on-failure=stop \
- --stop-timeout=1 \
+ --stop-timeout=2 \
--health-start-period 0 \
--stop-signal SIGTERM \
$IMAGE sleep infinity
diff --git a/test/system/060-mount.bats b/test/system/060-mount.bats
index 3dbcd86312..792c777165 100644
--- a/test/system/060-mount.bats
+++ b/test/system/060-mount.bats
@@ -409,8 +409,9 @@ FROM $IMAGE
RUN mkdir /mountroot && echo ${datacontent[img]} > /mountroot/data
EOF
+ # --layers=false needed to work around buildah#5674 parallel flake
img="localhost/img-$(safename)"
- run_podman build -t $img -f $dockerfile
+ run_podman build -t $img --layers=false -f $dockerfile
# Each test is set up in exactly the same way:
#
@@ -554,7 +555,7 @@ glob | /* | /mountroot/ | in
echo "$datafile_contents" > $workdir/$datafile
ln -s $datafile $workdir/link
- run_podman create --mount type=glob,src=$workdir/*,dst=/mountroot/,no-dereference --privileged $img sh -c "stat -c '%N' /mountroot/link; cat /mountroot/link"
+ run_podman create --mount type=glob,src=$workdir/*,dst=/mountroot/,no-dereference --privileged $img sh -c "stat -c '%N' /mountroot/link; cat /mountroot/link; ls -l /mountroot"
cid="$output"
run_podman start -a $cid
assert "${lines[0]}" = "'/mountroot/link' -> '$datafile'" "symlink is preserved, on start"
diff --git a/test/system/220-healthcheck.bats b/test/system/220-healthcheck.bats
index 7e01a5545b..1cf1f9c0b3 100644
--- a/test/system/220-healthcheck.bats
+++ b/test/system/220-healthcheck.bats
@@ -18,7 +18,12 @@ function _check_health {
local hc_status="$5"
# Loop-wait (up to a few seconds) for healthcheck event (#20342)
+ # Allow a margin when running parallel, because of system load
local timeout=5
+ if [[ -n "$PARALLEL_JOBSLOT" ]]; then
+ timeout=$((timeout + 3))
+ fi
+
while :; do
run_podman events --filter container=$ctrname --filter event=health_status \
--since "$since" --stream=false --format "{{.HealthStatus}}"
@@ -157,7 +162,7 @@ Log[-1].Output | \"Uh-oh on stdout!\\\nUh-oh on stderr!\\\n\"
# Wait for the container in the background and create the $wait_file to
# signal the specified wait condition was met.
- (timeout --foreground -v --kill=5 5 $PODMAN wait --condition=$condition $ctr && touch $wait_file) &
+ (timeout --foreground -v --kill=5 10 $PODMAN wait --condition=$condition $ctr && touch $wait_file) &
# Sleep 1 second to make sure above commands are running
sleep 1
diff --git a/test/system/252-quadlet.bats b/test/system/252-quadlet.bats
index f13f15169f..3ef297b869 100644
--- a/test/system/252-quadlet.bats
+++ b/test/system/252-quadlet.bats
@@ -428,12 +428,14 @@ EOF
# A quadlet container depends on a named quadlet volume
@test "quadlet - named volume dependency" {
+ local volume_name="v-$(safename)"
+
# Save the unit name to use as the volume for the container
local quadlet_vol_unit=dep_$(safename).volume
local quadlet_vol_file=$PODMAN_TMPDIR/${quadlet_vol_unit}
cat > $quadlet_vol_file < $quadlet_file <&3
- run_podman rmi --force "$1" >/dev/null 2>&1 || true
+ _run_podman_quiet rmi --force "$1"
# Tagged image will have same IID as our test image; don't rmi it.
if [[ $2 != "$PODMAN_TEST_IMAGE_ID" ]]; then
echo "# setup(): removing stray image $2" >&3
- run_podman rmi --force "$2" >/dev/null 2>&1 || true
+ _run_podman_quiet rmi --force "$2"
fi
fi
done
@@ -440,6 +457,17 @@ function clean_setup() {
if [[ -z "$found_needed_image" ]]; then
_prefetch $PODMAN_TEST_IMAGE_FQN
fi
+
+ # When running in parallel, load (create, actually) the pause image.
+ # This way, all pod tests will have it available. Without this,
+ # parallel pod tests will leave behind : images.
+ # FIXME: #23292 -- this should not be necessary.
+ if [[ -n "$PARALLEL_JOBSLOT" ]]; then
+ run_podman pod create mypod
+ run_podman pod rm mypod
+ # And now, we have a pause image, and each test does not
+ # need to build their own.
+ fi
}
# END setup/teardown tools
diff --git a/test/system/helpers.registry.bash b/test/system/helpers.registry.bash
index 643a0e5151..ff57ad8b87 100644
--- a/test/system/helpers.registry.bash
+++ b/test/system/helpers.registry.bash
@@ -17,8 +17,26 @@ unset REGISTRY_AUTH_FILE
# Start a local registry. Only needed on demand (e.g. by 150-login.bats)
# and then only once: if we start, leave it running until final teardown.
function start_registry() {
- if [[ -d "$PODMAN_LOGIN_WORKDIR/auth" ]]; then
- # Already started
+ AUTHDIR=${PODMAN_LOGIN_WORKDIR}/auth
+
+ local startflag=${PODMAN_LOGIN_WORKDIR}/OK
+
+ if ! mkdir $AUTHDIR; then
+ # *Possibly* already started. Or, possibly (when running
+ # parallel tests) another process is trying to start it.
+ # Give it some time.
+ local timeout=30
+ while [[ $timeout -gt 0 ]]; do
+ if [[ -e $startflag ]]; then
+ echo "Registry has already been started by another process"
+ return
+ fi
+
+ sleep 1
+ timeout=$((timeout - 1))
+ done
+
+ die "Internal error: timed out waiting for another process to start registry"
# Fixes very obscure corner case in root system tests:
# 1) we run 150-login tests, starting a registry; then
@@ -26,11 +44,15 @@ function start_registry() {
# 3) run 700-play, the "private" test, which needs the
# already-started registry, but its port is now DROPped,
# so the test times out trying to talk to registry
- run_podman --storage-driver vfs $(podman_isolation_opts ${PODMAN_LOGIN_WORKDIR}) network reload --all
+
+ ###### FIXME FIXME FIXME TEMPORARY!
+ ###### Trying to understand flake #23725. What happens if we stop
+ ###### doing the network reload?
+ ###### FIXME FIXME FIXME, should we do it in stop_registry??
+ ###### run_podman --storage-driver vfs $(podman_isolation_opts ${PODMAN_LOGIN_WORKDIR}) network reload --all
return
fi
- AUTHDIR=${PODMAN_LOGIN_WORKDIR}/auth
mkdir -p $AUTHDIR
# Registry image; copy of docker.io, but on our own registry
@@ -79,6 +101,9 @@ function start_registry() {
wait_for_port 127.0.0.1 ${PODMAN_LOGIN_REGISTRY_PORT}
# ...so we look in container logs for confirmation that registry is running.
_PODMAN_TEST_OPTS="${PODMAN_LOGIN_ARGS}" wait_for_output "listening on .::.:5000" $cid
+
+ touch $startflag
+ echo "I have started the registry"
}
function stop_registry() {
@@ -103,10 +128,10 @@ function stop_registry() {
mount | grep ${PODMAN_LOGIN_WORKDIR} | awk '{print $3}' | xargs --no-run-if-empty umount
if [[ $(id -u) -eq 0 ]]; then
- rm -rf ${PODMAN_LOGIN_WORKDIR}
+ rm -rf ${PODMAN_LOGIN_WORKDIR}/*
else
# rootless image data is owned by a subuid
- run_podman unshare rm -rf ${PODMAN_LOGIN_WORKDIR}
+ run_podman unshare rm -rf ${PODMAN_LOGIN_WORKDIR}/*
fi
fi
@@ -119,7 +144,7 @@ function stop_registry() {
echo ""
echo "lsof -i -P"
lsof -i -P
- die "Socket still seems open"
+ die "Socket $PODMAN_LOGIN_REGISTRY_PORT still seems open"
fi
}
diff --git a/test/system/setup_suite.bash b/test/system/setup_suite.bash
index 4dd59981db..8a920c6141 100644
--- a/test/system/setup_suite.bash
+++ b/test/system/setup_suite.bash
@@ -25,13 +25,19 @@ function setup_suite() {
# FIXME: racy! It could be many minutes between now and when we start it.
# To mitigate, we use a range not used anywhere else in system tests.
- export PODMAN_LOGIN_REGISTRY_PORT=$(random_free_port 42000-42999)
+ export PODMAN_LOGIN_REGISTRY_PORT=$(random_free_port 27000-27999)
# The above does not handle errors. Do a final confirmation.
assert "$PODMAN_LOGIN_REGISTRY_PORT" != "" \
"Unable to set PODMAN_LOGIN_REGISTRY_PORT"
clean_setup
+
+ # Canary file. Will be removed if any individual test fails.
+ touch "$BATS_SUITE_TMPDIR/all-tests-passed"
+
+ # Track network namespaces, so we can check for leaks at test end
+ ip netns list > $BATS_SUITE_TMPDIR/netns-pre
}
# Run at the very end of all tests. Useful for cleanup of non-BATS tmpdirs.
@@ -39,11 +45,22 @@ function teardown_suite() {
stop_registry
local exit_code=$?
- # After all tests make sure there are no leaks and cleanup if there are
- leak_check
- if [ $? -gt 0 ]; then
- exit_code=$((exit_code + 1))
- clean_setup
+ # At end, if all tests have passed, check for leaks.
+ # Don't do this if there were errors: failing tests may not clean up.
+ if [[ -e "$BATS_SUITE_TMPDIR/all-tests-passed" ]]; then
+ leak_check
+ if [ $? -gt 0 ]; then
+ exit_code=$((exit_code + 1))
+ fi
+
+ # Network namespace leak check. List should match what we saw above.
+ echo
+ ip netns list > $BATS_SUITE_TMPDIR/netns-post
+ if ! diff -u $BATS_SUITE_TMPDIR/netns-{pre,post}; then
+ echo
+ echo "^^^^^ Leaks found in /run/netns ^^^^^"
+ exit_code=$((exit_code + 1))
+ fi
fi
return $exit_code