diff --git a/.appveyor.yml b/.appveyor.yml
index 57fb43606..242702aba 100644
--- a/.appveyor.yml
+++ b/.appveyor.yml
@@ -19,4 +19,4 @@ install:
   - python -m pip install --user ".[dev]"
 
 test_script:
-  - python -m nose --tests benchexec.tablegenerator
+  - python -m pytest benchexec/tablegenerator/
diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index 464ee1595..f391b3c49 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -50,7 +50,7 @@ stages:
   script:
     - sudo -u $PRIMARY_USER
         COVERAGE_PROCESS_START=.coveragerc
-        coverage run -m nose
+        coverage run -m pytest
   after_script:
     - sudo -u $PRIMARY_USER coverage combine
     - sudo -u $PRIMARY_USER coverage report
diff --git a/benchexec/tablegenerator/columns.py b/benchexec/tablegenerator/columns.py
index 3197b4975..042600b09 100644
--- a/benchexec/tablegenerator/columns.py
+++ b/benchexec/tablegenerator/columns.py
@@ -18,12 +18,9 @@
 
 __all__ = ["Column", "ColumnType", "ColumnMeasureType"]
 
-# This sets the rounding mode for all Decimal operations in the process.
-# It is actually used only as default context for new contexts, but because we set this
-# at import time and before any threads are started, it should work according to its
-# documentation. We double check with the context of the current thread.
-decimal.DefaultContext.rounding = decimal.ROUND_HALF_UP
-assert decimal.getcontext().rounding == decimal.ROUND_HALF_UP
+# It's important to make sure on *all* entry points / methods which perform arithmetics that the correct
+# rounding / context is used by using a local context.
+DECIMAL_CONTEXT = decimal.Context(rounding=decimal.ROUND_HALF_UP)
 
 DEFAULT_TIME_PRECISION = 3
 DEFAULT_TOOLTIP_PRECISION = 2
@@ -132,32 +129,34 @@ def __init__(
         relevant_for_diff=None,
         display_title=None,
     ):
-        # If scaling on the variables is performed, a display unit must be defined, explicitly
-        if scale_factor is not None and scale_factor != 1 and unit is None:
-            raise util.TableDefinitionError(
-                f"Scale factor is defined, but display unit is not (in column {title})"
-            )
+        with decimal.localcontext(DECIMAL_CONTEXT):
 
-        self.title = title
-        self.pattern = pattern
-        self.number_of_significant_digits = (
-            int(num_of_digits) if num_of_digits else None
-        )
-        self.type = col_type
-        self.unit = unit
-        self.source_unit = source_unit
-        self.scale_factor = Decimal(scale_factor) if scale_factor else scale_factor
-        self.href = href
-        if relevant_for_diff is None:
-            self.relevant_for_diff = False
-        else:
-            self.relevant_for_diff = (
-                True if relevant_for_diff.lower() == "true" else False
+            # If scaling on the variables is performed, a display unit must be defined, explicitly
+            if scale_factor is not None and scale_factor != 1 and unit is None:
+                raise util.TableDefinitionError(
+                    f"Scale factor is defined, but display unit is not (in column {title})"
+                )
+
+            self.title = title
+            self.pattern = pattern
+            self.number_of_significant_digits = (
+                int(num_of_digits) if num_of_digits else None
             )
-        self.display_title = display_title
+            self.type = col_type
+            self.unit = unit
+            self.source_unit = source_unit
+            self.scale_factor = Decimal(scale_factor) if scale_factor else scale_factor
+            self.href = href
+            if relevant_for_diff is None:
+                self.relevant_for_diff = False
+            else:
+                self.relevant_for_diff = (
+                    True if relevant_for_diff.lower() == "true" else False
+                )
+            self.display_title = display_title
 
-        # expected maximum width (in characters)
-        self.max_width = None
+            # expected maximum width (in characters)
+            self.max_width = None
 
     def is_numeric(self):
         return (
@@ -190,68 +189,73 @@ def format_value(self, value, format_target):
         @param format_target the target the value should be formatted for
         @return: a formatted String representation of the given value.
         """
-        # Only format counts and measures
-        if self.type.type != ColumnType.count and self.type.type != ColumnType.measure:
-            return value
-
-        if format_target not in POSSIBLE_FORMAT_TARGETS:
-            raise ValueError("Unknown format target")
-
-        if value is None or value == "":
-            return ""
-
-        if isinstance(value, str):
-            # If the number ends with "s" or another unit, remove it.
-            # Units should not occur in table cells, but in the table head.
-            number_str = util.remove_unit(value.strip())
-            number = Decimal(number_str)
-        elif isinstance(value, Decimal):
-            number = value
-            number_str = print_decimal(number)
-        else:
-            raise TypeError(f"Unexpected number type {type(value)}")
+        with decimal.localcontext(DECIMAL_CONTEXT):
 
-        if number.is_nan():
-            return "NaN"
-        elif number == inf:
-            return "Inf"
-        elif number == -inf:
-            return "-Inf"
+            # Only format counts and measures
+            if (
+                self.type.type != ColumnType.count
+                and self.type.type != ColumnType.measure
+            ):
+                return value
+
+            if format_target not in POSSIBLE_FORMAT_TARGETS:
+                raise ValueError("Unknown format target")
+
+            if value is None or value == "":
+                return ""
+
+            if isinstance(value, str):
+                # If the number ends with "s" or another unit, remove it.
+                # Units should not occur in table cells, but in the table head.
+                number_str = util.remove_unit(value.strip())
+                number = Decimal(number_str)
+            elif isinstance(value, Decimal):
+                number = value
+                number_str = print_decimal(number)
+            else:
+                raise TypeError(f"Unexpected number type {type(value)}")
 
-        # Apply the scale factor to the value
-        if self.scale_factor is not None:
-            number *= self.scale_factor
-        assert number.is_finite()
+            if number.is_nan():
+                return "NaN"
+            elif number == inf:
+                return "Inf"
+            elif number == -inf:
+                return "-Inf"
 
-        if (
-            self.number_of_significant_digits is None
-            and self.type.type != ColumnType.measure
-            and format_target == "tooltip_stochastic"
-        ):
-            # Column of type count (integral values) without specified sig. digits.
-            # However, we need to round values like stdev, so we just round somehow.
-            return print_decimal(round(number, DEFAULT_TOOLTIP_PRECISION))
+            # Apply the scale factor to the value
+            if self.scale_factor is not None:
+                number *= self.scale_factor
+            assert number.is_finite()
 
-        number_of_significant_digits = self.get_number_of_significant_digits(
-            format_target
-        )
-        max_dec_digits = (
-            self.type.max_decimal_digits
-            if isinstance(self.type, ColumnMeasureType)
-            else 0
-        )
+            if (
+                self.number_of_significant_digits is None
+                and self.type.type != ColumnType.measure
+                and format_target == "tooltip_stochastic"
+            ):
+                # Column of type count (integral values) without specified sig. digits.
+                # However, we need to round values like stdev, so we just round somehow.
+                return print_decimal(round(number, DEFAULT_TOOLTIP_PRECISION))
 
-        if number_of_significant_digits is not None:
-            current_significant_digits = _get_significant_digits(number_str)
-            return _format_number(
-                number,
-                current_significant_digits,
-                number_of_significant_digits,
-                max_dec_digits,
-                format_target,
+            number_of_significant_digits = self.get_number_of_significant_digits(
+                format_target
             )
-        else:
-            return print_decimal(number)
+            max_dec_digits = (
+                self.type.max_decimal_digits
+                if isinstance(self.type, ColumnMeasureType)
+                else 0
+            )
+
+            if number_of_significant_digits is not None:
+                current_significant_digits = _get_significant_digits(number_str)
+                return _format_number(
+                    number,
+                    current_significant_digits,
+                    number_of_significant_digits,
+                    max_dec_digits,
+                    format_target,
+                )
+            else:
+                return print_decimal(number)
 
     def set_column_type_from(self, column_values):
         """
@@ -302,49 +306,54 @@ def __str__(self):
 
 
 def _format_number_align(formattedValue, max_number_of_dec_digits):
-    alignment = max_number_of_dec_digits
+    with decimal.localcontext(DECIMAL_CONTEXT):
+        alignment = max_number_of_dec_digits
 
-    if formattedValue.find(".") >= 0:
-        # Subtract spaces for digits after the decimal point.
-        alignment -= len(formattedValue) - formattedValue.find(".") - 1
-    elif max_number_of_dec_digits > 0:
-        # Add punctuation space.
-        formattedValue += "&#x2008;"
+        if formattedValue.find(".") >= 0:
+            # Subtract spaces for digits after the decimal point.
+            alignment -= len(formattedValue) - formattedValue.find(".") - 1
+        elif max_number_of_dec_digits > 0:
+            # Add punctuation space.
+            formattedValue += "&#x2008;"
 
-    return formattedValue + ("&#x2007;" * alignment)
+        return formattedValue + ("&#x2007;" * alignment)
 
 
 def _get_significant_digits(value):
-    if not Decimal(value).is_finite():
-        return 0
-
-    # Regular expression returns multiple groups:
-    #
-    # Group GROUP_SIGN: Optional sign of value
-    # Group GROUP_INT_PART: Digits in front of decimal point
-    # Group GROUP_DEC_PART: Optional decimal point and digits after it
-    # Group GROUP_SIG_DEC_DIGITS: Digits after decimal point, starting at the first value not 0
-    # Group GROUP_EXP: Optional exponent part (e.g. 'e-5')
-    # Group GROUP_EXP_SIGN: Optional sign of exponent part
-    # Group GROUP_EXP_VALUE: Value of exponent part (e.g. '5' for 'e-5')
-    # Use these groups to compute the number of zeros that have to be added to the current number's
-    # decimal positions.
-    match = REGEX_MEASURE.match(value)
-    assert match, "unexpected output format for number formatting"
-
-    if int(match.group(GROUP_INT_PART)) == 0 and Decimal(value) != 0:
-        sig_digits = len(match.group(GROUP_SIG_DEC_PART))
+    with decimal.localcontext(DECIMAL_CONTEXT):
+
+        if not Decimal(value).is_finite():
+            return 0
+
+        # Regular expression returns multiple groups:
+        #
+        # Group GROUP_SIGN: Optional sign of value
+        # Group GROUP_INT_PART: Digits in front of decimal point
+        # Group GROUP_DEC_PART: Optional decimal point and digits after it
+        # Group GROUP_SIG_DEC_DIGITS: Digits after decimal point, starting at the first value not 0
+        # Group GROUP_EXP: Optional exponent part (e.g. 'e-5')
+        # Group GROUP_EXP_SIGN: Optional sign of exponent part
+        # Group GROUP_EXP_VALUE: Value of exponent part (e.g. '5' for 'e-5')
+        # Use these groups to compute the number of zeros that have to be added to the current number's
+        # decimal positions.
+        match = REGEX_MEASURE.match(value)
+        assert match, "unexpected output format for number formatting"
+
+        if int(match.group(GROUP_INT_PART)) == 0 and Decimal(value) != 0:
+            sig_digits = len(match.group(GROUP_SIG_DEC_PART))
 
-    else:
-        if Decimal(value) != 0:
-            sig_digits = len(match.group(GROUP_INT_PART))
         else:
-            # If the value consists of only zeros, do not count the 0 in front of the decimal
-            sig_digits = 0
-        if match.group(GROUP_DEC_PART):
-            sig_digits += len(match.group(GROUP_DEC_PART)) - 1  # -1 for decimal point
+            if Decimal(value) != 0:
+                sig_digits = len(match.group(GROUP_INT_PART))
+            else:
+                # If the value consists of only zeros, do not count the 0 in front of the decimal
+                sig_digits = 0
+            if match.group(GROUP_DEC_PART):
+                sig_digits += (
+                    len(match.group(GROUP_DEC_PART)) - 1
+                )  # -1 for decimal point
 
-    return sig_digits
+        return sig_digits
 
 
 def _format_number(
@@ -360,60 +369,69 @@ def _format_number(
     with the specified number of significant digits,
     optionally aligned at the decimal point.
     """
-    assert format_target in POSSIBLE_FORMAT_TARGETS, "Invalid format " + format_target
+    with decimal.localcontext(DECIMAL_CONTEXT):
+
+        assert format_target in POSSIBLE_FORMAT_TARGETS, (
+            "Invalid format " + format_target
+        )
 
-    if number == 0:
-        intended_digits = min(number_of_significant_digits, initial_value_sig_digits)
-        # Add as many trailing zeros as desired
-        rounded_value = Decimal(0).scaleb(-intended_digits)
+        if number == 0:
+            intended_digits = min(
+                number_of_significant_digits, initial_value_sig_digits
+            )
+            # Add as many trailing zeros as desired
+            rounded_value = Decimal(0).scaleb(-intended_digits)
 
-    else:
-        # Round to the given amount of significant digits
-        intended_digits = min(initial_value_sig_digits, number_of_significant_digits)
-
-        assert number.adjusted() == int(floor(abs(number).log10()))
-        rounding_point = -number.adjusted() + (intended_digits - 1)
-        # Contrary to its documentation, round() seems to be affected by the rounding
-        # mode of decimal's context (which is good for us) when rounding Decimals.
-        # We add an assertion to double check (calling round() is easier to understand).
-        rounded_value = round(number, rounding_point)
-        assert rounded_value == number.quantize(Decimal(1).scaleb(-rounding_point))
-
-    formatted_value = print_decimal(rounded_value)
-
-    # Get the number of resulting significant digits.
-    current_sig_digits = _get_significant_digits(formatted_value)
-
-    if current_sig_digits > intended_digits:
-        if "." in formatted_value:
-            # Happens when rounding 9.99 to 10 with 2 significant digits,
-            # the formatted_value will be 10.0 and we need to cut one trailing zero.
-            assert current_sig_digits == intended_digits + 1
-            assert formatted_value.endswith("0")
-            formatted_value = formatted_value[:-1].rstrip(".")
         else:
-            # happens for cases like 12300 with 3 significant digits
-            assert formatted_value == str(round(rounded_value))
-    else:
-        assert current_sig_digits == intended_digits
-
-    # Cut the 0 in front of the decimal point for values < 1.
-    # Example: 0.002 => .002
-    if _is_to_cut(formatted_value, format_target):
-        assert formatted_value.startswith("0.")
-        formatted_value = formatted_value[1:]
-
-    # Alignment
-    if format_target == "html_cell":
-        formatted_value = _format_number_align(
-            formatted_value, max_digits_after_decimal
-        )
-    return formatted_value
+            # Round to the given amount of significant digits
+            intended_digits = min(
+                initial_value_sig_digits, number_of_significant_digits
+            )
+
+            assert number.adjusted() == int(floor(abs(number).log10()))
+            rounding_point = -number.adjusted() + (intended_digits - 1)
+            # Contrary to its documentation, round() seems to be affected by the rounding
+            # mode of decimal's context (which is good for us) when rounding Decimals.
+            # We add an assertion to double check (calling round() is easier to understand).
+            rounded_value = round(number, rounding_point)
+            assert rounded_value == number.quantize(Decimal(1).scaleb(-rounding_point))
+
+        formatted_value = print_decimal(rounded_value)
+
+        # Get the number of resulting significant digits.
+        current_sig_digits = _get_significant_digits(formatted_value)
+
+        if current_sig_digits > intended_digits:
+            if "." in formatted_value:
+                # Happens when rounding 9.99 to 10 with 2 significant digits,
+                # the formatted_value will be 10.0 and we need to cut one trailing zero.
+                assert current_sig_digits == intended_digits + 1
+                assert formatted_value.endswith("0")
+                formatted_value = formatted_value[:-1].rstrip(".")
+            else:
+                # happens for cases like 12300 with 3 significant digits
+                assert formatted_value == str(round(rounded_value))
+        else:
+            assert current_sig_digits == intended_digits
+
+        # Cut the 0 in front of the decimal point for values < 1.
+        # Example: 0.002 => .002
+        if _is_to_cut(formatted_value, format_target):
+            assert formatted_value.startswith("0.")
+            formatted_value = formatted_value[1:]
+
+        # Alignment
+        if format_target == "html_cell":
+            formatted_value = _format_number_align(
+                formatted_value, max_digits_after_decimal
+            )
+        return formatted_value
 
 
 def _is_to_cut(value, format_target):
-    correct_target = format_target == "html_cell"
-    return correct_target and "." in value and 1 > Decimal(value) >= 0
+    with decimal.localcontext(DECIMAL_CONTEXT):
+        correct_target = format_target == "html_cell"
+        return correct_target and "." in value and 1 > Decimal(value) >= 0
 
 
 def _get_column_type_heur(
@@ -422,132 +440,139 @@ def _get_column_type_heur(
     ColumnType,
     Tuple[Union[ColumnType, ColumnMeasureType], str, str, Union[int, Decimal], int],
 ]:
-    if column.title == "status":
-        return ColumnType.status
-
-    column_type = column.type or None
-    if column_type and column_type.type == ColumnType.measure:
-        column_type = ColumnMeasureType(0)
-    column_unit = column.unit  # May be None
-    column_source_unit = column.source_unit  # May be None
-    column_scale_factor = column.scale_factor  # May be None
-
-    column_max_int_digits = 0
-    column_max_dec_digits = 0
-    column_has_numbers = False
-    column_has_decimal_numbers = False
-
-    if column_unit:
-        explicit_unit_defined = True
-    else:
-        explicit_unit_defined = False
+    with decimal.localcontext(DECIMAL_CONTEXT):
+        if column.title == "status":
+            return ColumnType.status
+
+        column_type = column.type or None
+        if column_type and column_type.type == ColumnType.measure:
+            column_type = ColumnMeasureType(0)
+        column_unit = column.unit  # May be None
+        column_source_unit = column.source_unit  # May be None
+        column_scale_factor = column.scale_factor  # May be None
+
+        column_max_int_digits = 0
+        column_max_dec_digits = 0
+        column_has_numbers = False
+        column_has_decimal_numbers = False
+
+        if column_unit:
+            explicit_unit_defined = True
+        else:
+            explicit_unit_defined = False
 
-    if column_scale_factor is None:
-        explicit_scale_defined = False
-    else:
-        explicit_scale_defined = True
+        if column_scale_factor is None:
+            explicit_scale_defined = False
+        else:
+            explicit_scale_defined = True
 
-    for value in column_values:
-        if value is None or value == "":
-            continue
+        for value in column_values:
+            if value is None or value == "":
+                continue
 
-        value_match = REGEX_MEASURE.match(str(value))
+            value_match = REGEX_MEASURE.match(str(value))
 
-        # As soon as one row's value is no number, the column type is 'text'
-        if value_match is None:
-            return ColumnType.text
-        else:
-            column_has_numbers = True
-            curr_column_unit = value_match.group(GROUP_UNIT)
-
-            # If the units in two different rows of the same column differ,
-            # 1. Raise an error if an explicit unit is defined by the displayUnit attribute
-            #    and the unit in the column cell differs from the defined sourceUnit, or
-            # 2. Handle the column as 'text' type, if no displayUnit was defined for the column's values.
-            #    In that case, a unit different from the definition of sourceUnit does not lead to an error.
-            if curr_column_unit:
-                if column_source_unit is None and not explicit_scale_defined:
-                    column_source_unit = curr_column_unit
-                elif column_source_unit != curr_column_unit:
-                    raise util.TableDefinitionError(
-                        f"Attribute sourceUnit different from real source unit: "
-                        f"{column_source_unit} and {curr_column_unit} (in column {column.title})"
-                    )
-                if column_unit and curr_column_unit != column_unit:
-                    if explicit_unit_defined:
-                        _check_unit_consistency(
-                            curr_column_unit, column_source_unit, column
+            # As soon as one row's value is no number, the column type is 'text'
+            if value_match is None:
+                return ColumnType.text
+            else:
+                column_has_numbers = True
+                curr_column_unit = value_match.group(GROUP_UNIT)
+
+                # If the units in two different rows of the same column differ,
+                # 1. Raise an error if an explicit unit is defined by the displayUnit attribute
+                #    and the unit in the column cell differs from the defined sourceUnit, or
+                # 2. Handle the column as 'text' type, if no displayUnit was defined for the column's values.
+                #    In that case, a unit different from the definition of sourceUnit does not lead to an error.
+                if curr_column_unit:
+                    if column_source_unit is None and not explicit_scale_defined:
+                        column_source_unit = curr_column_unit
+                    elif column_source_unit != curr_column_unit:
+                        raise util.TableDefinitionError(
+                            f"Attribute sourceUnit different from real source unit: "
+                            f"{column_source_unit} and {curr_column_unit} (in column {column.title})"
                         )
+                    if column_unit and curr_column_unit != column_unit:
+                        if explicit_unit_defined:
+                            _check_unit_consistency(
+                                curr_column_unit, column_source_unit, column
+                            )
+                        else:
+                            return ColumnType.text
                     else:
-                        return ColumnType.text
-                else:
-                    column_unit = curr_column_unit
+                        column_unit = curr_column_unit
+
+                if column_scale_factor is None:
+                    column_scale_factor = _get_scale_factor(
+                        column_unit, column_source_unit, column
+                    )
 
-            if column_scale_factor is None:
-                column_scale_factor = _get_scale_factor(
-                    column_unit, column_source_unit, column
+                # Compute the number of decimal digits of the current value, considering the number of significant
+                # digits for this column.
+                # Use the column's scale factor for computing the decimal digits of the current value.
+                # Otherwise, they might be different from output.
+                scaled_value = (
+                    Decimal(util.remove_unit(str(value))) * column_scale_factor
                 )
 
-            # Compute the number of decimal digits of the current value, considering the number of significant
-            # digits for this column.
-            # Use the column's scale factor for computing the decimal digits of the current value.
-            # Otherwise, they might be different from output.
-            scaled_value = Decimal(util.remove_unit(str(value))) * column_scale_factor
-
-            # Due to the scaling operation above, floats in the exponent notation may be created. Since this creates
-            # special cases, immediately convert the value back to decimal notation.
-            if value_match.group(GROUP_DEC_PART):
-                # -1 since GROUP_DEC_PART includes the decimal point
-                dec_digits_before_scale = len(value_match.group(GROUP_DEC_PART)) - 1
-            else:
-                dec_digits_before_scale = 0
-            max_number_of_dec_digits_after_scale = max(
-                0, dec_digits_before_scale - ceil(log10(column_scale_factor))
-            )
+                # Due to the scaling operation above, floats in the exponent notation may be created. Since this creates
+                # special cases, immediately convert the value back to decimal notation.
+                if value_match.group(GROUP_DEC_PART):
+                    # -1 since GROUP_DEC_PART includes the decimal point
+                    dec_digits_before_scale = len(value_match.group(GROUP_DEC_PART)) - 1
+                else:
+                    dec_digits_before_scale = 0
+                max_number_of_dec_digits_after_scale = max(
+                    0, dec_digits_before_scale - ceil(log10(column_scale_factor))
+                )
 
-            scaled_value = f"{scaled_value:.{max_number_of_dec_digits_after_scale}f}"
-            scaled_value_match = REGEX_MEASURE.match(scaled_value)
-            assert scaled_value_match, "unexpected output format for number formatting"
+                scaled_value = (
+                    f"{scaled_value:.{max_number_of_dec_digits_after_scale}f}"
+                )
+                scaled_value_match = REGEX_MEASURE.match(scaled_value)
+                assert (
+                    scaled_value_match
+                ), "unexpected output format for number formatting"
 
-            curr_dec_digits = _get_decimal_digits(
-                scaled_value_match, column.number_of_significant_digits
-            )
-            column_max_dec_digits = max(column_max_dec_digits, curr_dec_digits)
+                curr_dec_digits = _get_decimal_digits(
+                    scaled_value_match, column.number_of_significant_digits
+                )
+                column_max_dec_digits = max(column_max_dec_digits, curr_dec_digits)
 
-            curr_int_digits = _get_int_digits(scaled_value_match)
-            column_max_int_digits = max(column_max_int_digits, curr_int_digits)
+                curr_int_digits = _get_int_digits(scaled_value_match)
+                column_max_int_digits = max(column_max_int_digits, curr_int_digits)
 
-            if (
-                scaled_value_match.group(GROUP_DEC_PART) is not None
-                or value_match.group(GROUP_DEC_PART) is not None
-                or scaled_value_match.group(GROUP_SPECIAL_FLOATS_PART) is not None
-            ):
-                column_has_decimal_numbers = True
+                if (
+                    scaled_value_match.group(GROUP_DEC_PART) is not None
+                    or value_match.group(GROUP_DEC_PART) is not None
+                    or scaled_value_match.group(GROUP_SPECIAL_FLOATS_PART) is not None
+                ):
+                    column_has_decimal_numbers = True
 
-    if not column_has_numbers:
-        # only empty values
-        return ColumnType.text
+        if not column_has_numbers:
+            # only empty values
+            return ColumnType.text
 
-    if (
-        column_has_decimal_numbers
-        or column_max_dec_digits
-        or int(column_scale_factor) != column_scale_factor  # non-int scaling factor
-    ):
-        column_type = ColumnMeasureType(column_max_dec_digits)
-    else:
-        column_type = ColumnType.count
+        if (
+            column_has_decimal_numbers
+            or column_max_dec_digits
+            or int(column_scale_factor) != column_scale_factor  # non-int scaling factor
+        ):
+            column_type = ColumnMeasureType(column_max_dec_digits)
+        else:
+            column_type = ColumnType.count
 
-    column_width = column_max_int_digits
-    if column_max_dec_digits:
-        column_width += column_max_dec_digits + 1
+        column_width = column_max_int_digits
+        if column_max_dec_digits:
+            column_width += column_max_dec_digits + 1
 
-    return (
-        column_type,
-        column_unit,
-        column_source_unit,
-        column_scale_factor,
-        column_width,
-    )
+        return (
+            column_type,
+            column_unit,
+            column_source_unit,
+            column_scale_factor,
+            column_width,
+        )
 
 
 # This function assumes that scale_factor is not defined.
@@ -580,44 +605,46 @@ def _get_decimal_digits(decimal_number_match, number_of_significant_digits):
     @return: the number of decimal digits of the given decimal number match's representation, after expanding
         the number to the required amount of significant digits
     """
-    # check that only decimal notation is used
-    assert "e" not in decimal_number_match.group()
-
-    try:
-        num_of_digits = int(number_of_significant_digits)
-    except TypeError:
-        num_of_digits = DEFAULT_NUMBER_OF_SIGNIFICANT_DIGITS
-
-    if not decimal_number_match.group(GROUP_DEC_PART):
-        return 0
-
-    # If 1 > value > 0, only look at the decimal digits.
-    # In the second condition, we have to remove the first character from the decimal part group because the
-    # first character always is '.'
-    if (
-        int(decimal_number_match.group(GROUP_INT_PART)) == 0
-        and int(decimal_number_match.group(GROUP_DEC_PART)[1:]) != 0
-    ):
-        max_num_of_digits = len(decimal_number_match.group(GROUP_SIG_DEC_PART))
-        num_of_digits = min(num_of_digits, max_num_of_digits)
-        # number of needed decimal digits = number of zeroes after decimal point + significant digits
-        curr_dec_digits = len(decimal_number_match.group(GROUP_ZEROES)) + int(
-            num_of_digits
-        )
+    with decimal.localcontext(DECIMAL_CONTEXT):
 
-    else:
-        max_num_of_digits = (
-            len(decimal_number_match.group(GROUP_INT_PART))
-            + len(decimal_number_match.group(GROUP_DEC_PART))
-            - 1  # for decimal point, which is guaranteed to exist at this point
-        )
-        num_of_digits = min(num_of_digits, max_num_of_digits)
-        # number of needed decimal digits = significant digits - number of digits in front of decimal point
-        curr_dec_digits = int(num_of_digits) - len(
-            decimal_number_match.group(GROUP_INT_PART)
-        )
+        # check that only decimal notation is used
+        assert "e" not in decimal_number_match.group()
+
+        try:
+            num_of_digits = int(number_of_significant_digits)
+        except TypeError:
+            num_of_digits = DEFAULT_NUMBER_OF_SIGNIFICANT_DIGITS
+
+        if not decimal_number_match.group(GROUP_DEC_PART):
+            return 0
+
+        # If 1 > value > 0, only look at the decimal digits.
+        # In the second condition, we have to remove the first character from the decimal part group because the
+        # first character always is '.'
+        if (
+            int(decimal_number_match.group(GROUP_INT_PART)) == 0
+            and int(decimal_number_match.group(GROUP_DEC_PART)[1:]) != 0
+        ):
+            max_num_of_digits = len(decimal_number_match.group(GROUP_SIG_DEC_PART))
+            num_of_digits = min(num_of_digits, max_num_of_digits)
+            # number of needed decimal digits = number of zeroes after decimal point + significant digits
+            curr_dec_digits = len(decimal_number_match.group(GROUP_ZEROES)) + int(
+                num_of_digits
+            )
+
+        else:
+            max_num_of_digits = (
+                len(decimal_number_match.group(GROUP_INT_PART))
+                + len(decimal_number_match.group(GROUP_DEC_PART))
+                - 1  # for decimal point, which is guaranteed to exist at this point
+            )
+            num_of_digits = min(num_of_digits, max_num_of_digits)
+            # number of needed decimal digits = significant digits - number of digits in front of decimal point
+            curr_dec_digits = int(num_of_digits) - len(
+                decimal_number_match.group(GROUP_INT_PART)
+            )
 
-    return curr_dec_digits
+        return curr_dec_digits
 
 
 def _get_int_digits(decimal_number_match):
@@ -625,11 +652,12 @@ def _get_int_digits(decimal_number_match):
     Returns the amount of integer digits of the given regex match.
     @param number_of_significant_digits: the number of significant digits required
     """
-    int_part = decimal_number_match.group(GROUP_INT_PART) or ""
-    if int_part == "0":
-        # we skip leading zeros of numbers < 1
-        int_part = ""
-    return len(int_part)
+    with decimal.localcontext(DECIMAL_CONTEXT):
+        int_part = decimal_number_match.group(GROUP_INT_PART) or ""
+        if int_part == "0":
+            # we skip leading zeros of numbers < 1
+            int_part = ""
+        return len(int_part)
 
 
 def _check_unit_consistency(actual_unit, wanted_unit, column):
diff --git a/benchexec/tablegenerator/statistics.py b/benchexec/tablegenerator/statistics.py
index d22fef02a..4a0528a29 100644
--- a/benchexec/tablegenerator/statistics.py
+++ b/benchexec/tablegenerator/statistics.py
@@ -14,6 +14,9 @@
 from benchexec.tablegenerator import util
 from benchexec.tablegenerator.columns import ColumnType
 
+# It's important to make sure on *all* entry points / methods which perform arithmetics that the correct
+# rounding / context is used.
+DECIMAL_CONTEXT = decimal.Context(rounding=decimal.ROUND_HALF_UP)
 
 nan = Decimal("nan")
 inf = Decimal("inf")
@@ -65,55 +68,56 @@ def __str__(self):
 
     @classmethod
     def from_list(cls, values):
-        if any(v is not None and v.is_nan() for v in values):
-            return StatValue(nan, nan, nan, nan, nan, nan)
-
-        values = sorted(v for v in values if v is not None)
-        if not values:
-            return None
-
-        values_len = len(values)
-        min_value = values[0]
-        max_value = values[-1]
-
-        if min_value == -inf and max_value == +inf:
-            values_sum = nan
-            mean = nan
-            stdev = nan
-        elif max_value == inf:
-            values_sum = inf
-            mean = inf
-            stdev = inf
-        elif min_value == -inf:
-            values_sum = -inf
-            mean = -inf
-            stdev = inf
-        else:
-            values_sum = sum(values)
-            mean = values_sum / values_len
-
-            # The scaling is just to avoid having too few decimal digits when printing,
-            # the value is still just 0.
-            stdev = Decimal(0).scaleb(-decimal.getcontext().prec)
-            for v in values:
-                diff = v - mean
-                stdev += diff * diff
-            stdev = (stdev / values_len).sqrt()
-
-        half, len_is_odd = divmod(values_len, 2)
-        if len_is_odd:
-            median = values[half]
-        else:
-            median = (values[half - 1] + values[half]) / Decimal(2)
-
-        return StatValue(
-            values_sum,
-            min=min_value,
-            max=max_value,
-            avg=mean,
-            median=median,
-            stdev=stdev,
-        )
+        with decimal.localcontext(DECIMAL_CONTEXT):
+            if any(v is not None and v.is_nan() for v in values):
+                return StatValue(nan, nan, nan, nan, nan, nan)
+
+            values = sorted(v for v in values if v is not None)
+            if not values:
+                return None
+
+            values_len = len(values)
+            min_value = values[0]
+            max_value = values[-1]
+
+            if min_value == -inf and max_value == +inf:
+                values_sum = nan
+                mean = nan
+                stdev = nan
+            elif max_value == inf:
+                values_sum = inf
+                mean = inf
+                stdev = inf
+            elif min_value == -inf:
+                values_sum = -inf
+                mean = -inf
+                stdev = inf
+            else:
+                values_sum = sum(values)
+                mean = values_sum / values_len
+
+                # The scaling is just to avoid having too few decimal digits when printing,
+                # the value is still just 0.
+                stdev = Decimal(0).scaleb(-decimal.getcontext().prec)
+                for v in values:
+                    diff = v - mean
+                    stdev += diff * diff
+                stdev = (stdev / values_len).sqrt()
+
+            half, len_is_odd = divmod(values_len, 2)
+            if len_is_odd:
+                median = values[half]
+            else:
+                median = (values[half - 1] + values[half]) / Decimal(2)
+
+            return StatValue(
+                values_sum,
+                min=min_value,
+                max=max_value,
+                avg=mean,
+                median=median,
+                stdev=stdev,
+            )
 
 
 def get_stats_of_run_set(runResults, correct_only):
diff --git a/benchexec/tablegenerator/test_columns.py b/benchexec/tablegenerator/test_columns.py
index b1422a4e7..e436858cd 100644
--- a/benchexec/tablegenerator/test_columns.py
+++ b/benchexec/tablegenerator/test_columns.py
@@ -182,6 +182,15 @@ def test_format_value_small_value(self):
         formatted_value_aligned = small_value_column.format_value("2", "html_cell")
         self.assertEqual(formatted_value_aligned, ".0000000002&#x2007;&#x2007;")
 
+    def test_invalid_rounding_mode(self):
+        import decimal
+
+        decimal.getcontext().rounding = decimal.ROUND_HALF_DOWN
+        formatted_value_no_align_zeros_cut = self.measure_column.format_value(
+            "5.7715", *self.default_optionals
+        )
+        self.assertEqual(formatted_value_no_align_zeros_cut, "5.772")
+
     def test_format_value_align_int(self):
         formatted_value_int_aligned = self.measure_column.format_value(
             "20", "html_cell"
diff --git a/benchexec/tablegenerator/test_integration/__init__.py b/benchexec/tablegenerator/test_integration/__init__.py
index 2695b9ca4..239611e8a 100644
--- a/benchexec/tablegenerator/test_integration/__init__.py
+++ b/benchexec/tablegenerator/test_integration/__init__.py
@@ -17,8 +17,6 @@
 import benchexec.util
 import benchexec.tablegenerator.util
 
-sys.dont_write_bytecode = True  # prevent creation of .pyc files
-
 here = os.path.relpath(os.path.dirname(__file__))
 base_dir = os.path.join(here, "..", "..", "..")
 bin_dir = os.path.join(base_dir, "bin")
@@ -38,11 +36,6 @@ class TableGeneratorIntegrationTests(unittest.TestCase):
     # Tests compare the generated CSV files and ignore the HTML files
     # because we assume the HTML files change more often on purpose.
 
-    @classmethod
-    def setUpClass(cls):
-        cls.longMessage = True
-        cls.maxDiff = None
-
     def setUp(self):
         # We use a temporary directory inside the source tree to avoid mismatching
         # path names inside HTML tables.
diff --git a/benchexec/tablegenerator/test_statvalue.py b/benchexec/tablegenerator/test_statvalue.py
index 4b8344f44..3557e7b89 100644
--- a/benchexec/tablegenerator/test_statvalue.py
+++ b/benchexec/tablegenerator/test_statvalue.py
@@ -6,20 +6,12 @@
 # SPDX-License-Identifier: Apache-2.0
 
 from decimal import Decimal
-import sys
 import unittest
 
 from benchexec.tablegenerator.statistics import StatValue
 
-sys.dont_write_bytecode = True  # prevent creation of .pyc files
-
 
 class TestStatValue(unittest.TestCase):
-    @classmethod
-    def setUpClass(cls):
-        cls.longMessage = True
-        cls.maxDiff = None
-
     def test_empty(self):
         self.assertIsNone(StatValue.from_list([]))
 
diff --git a/benchexec/tablegenerator/test_util.py b/benchexec/tablegenerator/test_util.py
index f7709c954..bd469e831 100644
--- a/benchexec/tablegenerator/test_util.py
+++ b/benchexec/tablegenerator/test_util.py
@@ -6,19 +6,12 @@
 # SPDX-License-Identifier: Apache-2.0
 
 from decimal import Decimal
-import sys
 import unittest
 
 from benchexec.tablegenerator import util
 
-sys.dont_write_bytecode = True  # prevent creation of .pyc files
-
 
 class TestUnit(unittest.TestCase):
-    @classmethod
-    def setUpClass(cls):
-        cls.longMessage = True
-        cls.maxDiff = None
 
     def assertEqualNumberAndUnit(self, value, number, unit):
         self.assertEqual(util.split_number_and_unit(value), (number, unit))
diff --git a/benchexec/test_analyze_run_result.py b/benchexec/test_analyze_run_result.py
index 4c9461659..1edd7584a 100644
--- a/benchexec/test_analyze_run_result.py
+++ b/benchexec/test_analyze_run_result.py
@@ -5,8 +5,6 @@
 #
 # SPDX-License-Identifier: Apache-2.0
 
-import logging
-import sys
 import unittest
 import types
 
@@ -20,17 +18,10 @@
 )
 from benchexec.tools.template import BaseTool
 
-sys.dont_write_bytecode = True  # prevent creation of .pyc files
-
 normal_result = ProcessExitCode(raw=0, value=0, signal=None)
 
 
 class TestResult(unittest.TestCase):
-    @classmethod
-    def setUpClass(cls):
-        cls.longMessage = True
-        logging.disable(logging.CRITICAL)
-
     def create_run(self, info_result=RESULT_UNKNOWN):
         runSet = types.SimpleNamespace()
         runSet.log_folder = "."
diff --git a/benchexec/test_benchmark_definition.py b/benchexec/test_benchmark_definition.py
index 221de868e..0ccf07fb6 100644
--- a/benchexec/test_benchmark_definition.py
+++ b/benchexec/test_benchmark_definition.py
@@ -69,10 +69,6 @@ class TestBenchmarkDefinition(unittest.TestCase):
     testing mostly the classes from benchexec.model.
     """
 
-    @classmethod
-    def setUpClass(cls):
-        cls.longMessage = True
-
     @patch("benchexec.model.load_task_definition_file", new=mock_load_task_def_file)
     @patch("benchexec.result.Property.create", new=mock_property_create)
     @patch("benchexec.util.expand_filename_pattern", new=mock_expand_filename_pattern)
diff --git a/benchexec/test_cgroups.py b/benchexec/test_cgroups.py
index 1fbf927eb..9b2171629 100644
--- a/benchexec/test_cgroups.py
+++ b/benchexec/test_cgroups.py
@@ -5,23 +5,13 @@
 #
 # SPDX-License-Identifier: Apache-2.0
 
-import logging
 import subprocess
-import sys
 import unittest
 
 from benchexec import check_cgroups
 
-sys.dont_write_bytecode = True  # prevent creation of .pyc files
-
 
 class TestCheckCgroups(unittest.TestCase):
-    @classmethod
-    def setUpClass(cls):
-        cls.longMessage = True
-        cls.maxDiff = None
-        logging.disable(logging.CRITICAL)
-
     def execute_run_extern(self, *args, **kwargs):
         try:
             return subprocess.check_output(
diff --git a/benchexec/test_core_assignment.py b/benchexec/test_core_assignment.py
index 4e6d14adb..64e8ecfb2 100644
--- a/benchexec/test_core_assignment.py
+++ b/benchexec/test_core_assignment.py
@@ -6,25 +6,17 @@
 # SPDX-License-Identifier: Apache-2.0
 
 import itertools
-import logging
-import sys
 import unittest
 import math
 
 from benchexec.resources import _get_cpu_cores_per_run0
 
-sys.dont_write_bytecode = True  # prevent creation of .pyc files
-
 
 def lrange(start, end):
     return list(range(start, end))
 
 
 class TestCpuCoresPerRun(unittest.TestCase):
-    @classmethod
-    def setUpClass(cls):
-        cls.longMessage = True
-        logging.disable(logging.CRITICAL)
 
     def assertValid(self, coreLimit, num_of_threads, expectedResult=None):
         result = _get_cpu_cores_per_run0(
diff --git a/benchexec/test_integration/__init__.py b/benchexec/test_integration/__init__.py
index 64356cc8c..98bdbdb3e 100644
--- a/benchexec/test_integration/__init__.py
+++ b/benchexec/test_integration/__init__.py
@@ -10,15 +10,12 @@
 import os
 import shutil
 import subprocess
-import sys
 import tempfile
 import unittest
 import zipfile
 
 from xml.etree import ElementTree
 
-sys.dont_write_bytecode = True  # prevent creation of .pyc files
-
 here = os.path.dirname(__file__)
 base_dir = os.path.join(here, "..", "..")
 bin_dir = os.path.join(base_dir, "bin")
@@ -45,10 +42,6 @@
 
 
 class BenchExecIntegrationTests(unittest.TestCase):
-    @classmethod
-    def setUpClass(cls):
-        cls.longMessage = True
-        cls.maxDiff = None
 
     def _build_tmp_dir(self):
         """
diff --git a/benchexec/test_pqos.py b/benchexec/test_pqos.py
index eb00cbcaf..6ffaf0936 100644
--- a/benchexec/test_pqos.py
+++ b/benchexec/test_pqos.py
@@ -10,13 +10,11 @@
 """
 import json
 import copy
-import logging
 import unittest
 from subprocess import CalledProcessError
 from unittest.mock import patch, MagicMock
 from benchexec.pqos import Pqos
 
-
 mock_pqos_wrapper_output = {
     "load_pqos": {
         "function_output": {},
@@ -147,10 +145,6 @@ class TestPqos(unittest.TestCase):
     Unit tests for pqos module
     """
 
-    @classmethod
-    def setUpClass(cls):
-        logging.disable(logging.CRITICAL)
-
     @patch("benchexec.pqos.find_executable2", return_value="/path/to/pqos_wrapper/lib")
     def test_pqos_init(self, mock_find_executable):
         """
diff --git a/benchexec/test_result.py b/benchexec/test_result.py
index 157247b62..07c4956d3 100644
--- a/benchexec/test_result.py
+++ b/benchexec/test_result.py
@@ -5,8 +5,6 @@
 #
 # SPDX-License-Identifier: Apache-2.0
 
-import logging
-import sys
 import tempfile
 import unittest
 
@@ -18,8 +16,6 @@
     _SCORE_WRONG_FALSE,
 )
 
-sys.dont_write_bytecode = True  # prevent creation of .pyc files
-
 
 class TestExpectedResult(unittest.TestCase):
     def test_via_string(self):
@@ -56,10 +52,6 @@ def test(s):
 
 
 class TestResult(unittest.TestCase):
-    @classmethod
-    def setUpClass(cls):
-        cls.longMessage = True
-        logging.disable(logging.CRITICAL)
 
     def expected_result(self, result, subcategory=None):
         return {"dummy.prp": ExpectedResult(result, subcategory)}
diff --git a/benchexec/test_runexecutor.py b/benchexec/test_runexecutor.py
index 340164887..75cf1886a 100644
--- a/benchexec/test_runexecutor.py
+++ b/benchexec/test_runexecutor.py
@@ -10,7 +10,6 @@
 import os
 import re
 import subprocess
-import sys
 import tempfile
 import threading
 import time
@@ -25,8 +24,6 @@
 from benchexec import runexecutor
 from benchexec import util
 
-sys.dont_write_bytecode = True  # prevent creation of .pyc files
-
 here = os.path.dirname(__file__)
 base_dir = os.path.join(here, "..")
 bin_dir = os.path.join(base_dir, "bin")
@@ -38,9 +35,6 @@
 class TestRunExecutor(unittest.TestCase):
     @classmethod
     def setUpClass(cls):
-        cls.longMessage = True
-        cls.maxDiff = None
-        logging.disable(logging.NOTSET)  # need to make sure to get all messages
         if not hasattr(cls, "assertRegex"):
             cls.assertRegex = cls.assertRegexpMatches
 
diff --git a/benchexec/test_util.py b/benchexec/test_util.py
index 523d7161a..4aba17c42 100644
--- a/benchexec/test_util.py
+++ b/benchexec/test_util.py
@@ -6,7 +6,6 @@
 # SPDX-License-Identifier: Apache-2.0
 
 from decimal import Decimal
-import sys
 import unittest
 from benchexec.util import ProcessExitCode
 import tempfile
@@ -15,14 +14,8 @@
 
 from benchexec import util
 
-sys.dont_write_bytecode = True  # prevent creation of .pyc files
-
 
 class TestParse(unittest.TestCase):
-    @classmethod
-    def setUpClass(cls):
-        cls.longMessage = True
-        cls.maxDiff = None
 
     def assertEqualNumberAndUnit(self, value, number, unit):
         self.assertEqual(util.split_number_and_unit(value), (number, unit))
@@ -103,10 +96,6 @@ def test_print_decimal_float(self):
 
 
 class TestProcessExitCode(unittest.TestCase):
-    @classmethod
-    def setUpClass(cls):
-        cls.longMessage = True
-        cls.maxDiff = None
 
     def ProcessExitCode_with_value(self, value):
         return ProcessExitCode(raw=value << 8, value=value, signal=None)
@@ -137,11 +126,6 @@ def test_signal(self):
 
 
 class TestRmtree(unittest.TestCase):
-    @classmethod
-    def setUpClass(cls):
-        cls.longMessage = True
-        cls.maxDiff = None
-
     def setUp(self):
         self.base_dir = tempfile.mkdtemp(prefix="BenchExec_test_util_rmtree")
 
diff --git a/benchexec/tools/test.py b/benchexec/tools/test.py
index 8581a4562..9c6d2e154 100644
--- a/benchexec/tools/test.py
+++ b/benchexec/tools/test.py
@@ -8,12 +8,9 @@
 import logging
 import os
 import unittest
-import sys
 
 import benchexec.model
 
-sys.dont_write_bytecode = True  # prevent creation of .pyc files
-
 here = os.path.dirname(__file__)
 
 
diff --git a/debian/control b/debian/control
index 798ccc835..73ddc6d2e 100644
--- a/debian/control
+++ b/debian/control
@@ -9,7 +9,7 @@ Build-Depends: debhelper-compat (= 12),
                python3-setuptools,
                python3-lxml,
                python3-yaml (>= 3.12),
-               python3-nose
+               python3-pytest
 Standards-Version: 3.9.6.1
 X-Python3-Version: >= 3.8
 Homepage: https://github.com/sosy-lab/benchexec
diff --git a/doc/DEVELOPMENT.md b/doc/DEVELOPMENT.md
index 776bd450d..35d27ee0f 100644
--- a/doc/DEVELOPMENT.md
+++ b/doc/DEVELOPMENT.md
@@ -51,10 +51,23 @@ Please format all code using `black .`.
 Apart from what is formatted automatically,
 we try to follow the official Python style guide [PEP8](https://www.python.org/dev/peps/pep-0008/).
 
-We also check our code using the static-analysis tool [flake8](http://flake8.pycqa.org).
+
+## Tests and CI
+
+To run the test suite of BenchExec, use the following command:
+
+    python3 -m pytest
+
+We also check our code using the static-analysis tools
+[flake8](http://flake8.pycqa.org) and [ruff](https://github.com/astral-sh/ruff/).
 If you find a rule that should not be enforced in your opinion,
 please raise an issue.
 
+As main CI we use GitLab, which runs all tests and checks,
+but only on branches from our repository (not on PRs from forks).
+GitHub Actions and AppVeyor also run a subset of checks
+(mostly for the JavaScript part of BenchExec) on all PRs.
+
 
 ## Releasing a new Version
 
diff --git a/pyproject.toml b/pyproject.toml
index 5e4b78ba9..6be7e209c 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -70,3 +70,7 @@ ignore = [
     # wildcard imports significantly shorten test code,
     'F405',
 ]
+
+[tool.pytest.ini_options]
+python_files = ["test_*.py", "test_integration/__init__.py", "test.py"]
+norecursedirs = ["contrib/p4/docker_files", "build"]
diff --git a/release.sh b/release.sh
index 5c887e886..a19cb7b09 100755
--- a/release.sh
+++ b/release.sh
@@ -80,13 +80,9 @@ python3 -m venv "$TEMP3"
 . "$TEMP3/bin/activate"
 git clone "file://$DIR" "$TEMP3/benchexec"
 pushd "$TEMP3/benchexec"
-# Avoid the wheel on PyPi for nose, it does not work on Python 3.10.
-# Local building from source works, but only with setuptools<58.
-pip install "setuptools < 58"
-pip install nose --no-binary :all:
 pip install build
 pip install -e ".[dev]"
-python -m nose
+python -m pytest
 python -m build
 popd
 deactivate
diff --git a/setup.cfg b/setup.cfg
index f48cfa62f..be3291295 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -61,12 +61,11 @@ license_files =
 packages = benchexec, benchexec.tablegenerator, benchexec.tools
 install_requires =
   PyYAML >= 3.12
-test_suite = nose.collector
 zip_safe = True
 
 [options.extras_require]
 dev =
-  nose >= 1.0
+  pytest
   lxml
 systemd =
   pystemd >= 0.7.0
@@ -83,8 +82,3 @@ benchexec.tablegenerator =
   react-table/build/*.min.js
   react-table/build/*.min.css
 
-[nosetests]
-# Necessary for nose since Python 3.8 to find the tests on Windows
-traverse-namespace=1
-# Necessary to find tests in non-package
-include=contrib
diff --git a/test/Dockerfile.python-3.10 b/test/Dockerfile.python-3.10
index 2f42b9225..825aa9e5e 100644
--- a/test/Dockerfile.python-3.10
+++ b/test/Dockerfile.python-3.10
@@ -29,10 +29,5 @@ RUN pip install \
   "coverage[toml] >= 5.0" \
   lxml \
   pystemd \
-  pyyaml \
-  'setuptools < 58'
-
-# Avoid the wheel on PyPi for nose, because it does not work on Python 3.10.
-# An installation from source does work, though, if setuptools<58 exists.
-# Cf. https://github.com/nose-devs/nose/issues/1099
-RUN pip install nose --no-binary :all:
+  pytest \
+  pyyaml
diff --git a/test/Dockerfile.python-3.11 b/test/Dockerfile.python-3.11
index 628e05b18..825b67d09 100644
--- a/test/Dockerfile.python-3.11
+++ b/test/Dockerfile.python-3.11
@@ -29,10 +29,5 @@ RUN pip install \
   "coverage[toml] >= 5.0" \
   lxml \
   pystemd \
-  pyyaml \
-  'setuptools < 58'
-
-# Avoid the wheel on PyPi for nose, because it does not work on Python 3.11.
-# An installation from source does work, though, if setuptools<58 exists.
-# Cf. https://github.com/nose-devs/nose/issues/1099
-RUN pip install nose --no-binary :all:
+  pytest \
+  pyyaml
diff --git a/test/Dockerfile.python-3.8 b/test/Dockerfile.python-3.8
index 379e7f39a..276b79774 100644
--- a/test/Dockerfile.python-3.8
+++ b/test/Dockerfile.python-3.8
@@ -28,6 +28,6 @@ RUN pip install \
   coloredlogs \
   "coverage[toml] >= 5.0" \
   lxml \
-  nose \
   pystemd \
+  pytest \
   pyyaml
diff --git a/test/Dockerfile.python-3.9 b/test/Dockerfile.python-3.9
index c2e7af0a6..ad464d9ff 100644
--- a/test/Dockerfile.python-3.9
+++ b/test/Dockerfile.python-3.9
@@ -28,6 +28,6 @@ RUN pip install \
   coloredlogs \
   "coverage[toml] >= 5.0" \
   lxml \
-  nose \
   pystemd \
+  pytest \
   pyyaml