From b52a6f5dcd9e598d74c4afedc9abd7ca1b82fb8a Mon Sep 17 00:00:00 2001 From: Stephen Brennan Date: Fri, 3 Nov 2023 15:48:01 -0700 Subject: [PATCH 1/6] testing: vmcore: Allow filtering vmcores By using "--vmcore", you can provide one or more fnmatch patterns to select which cores to test. Signed-off-by: Stephen Brennan --- testing/vmcore.py | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/testing/vmcore.py b/testing/vmcore.py index 19212ce8..67d92752 100644 --- a/testing/vmcore.py +++ b/testing/vmcore.py @@ -4,6 +4,7 @@ Manager for test vmcores - downloaded from OCI block storage """ import argparse +import fnmatch import os import signal import subprocess @@ -237,9 +238,19 @@ def upload_all(client: ObjectStorageClient, core: str) -> None: future.result() -def test() -> None: +def test(vmcore_list: List[str]) -> None: + def should_run_vmcore(name: str) -> bool: + if not vmcore_list: + return True + for pat in vmcore_list: + if fnmatch.fnmatch(name, pat): + return True + return False + for path in CORE_DIR.iterdir(): core_name = path.name + if not should_run_vmcore(core_name): + continue with ci_section( f"vmcore-{core_name}", f"Running tests on vmcore {core_name}", @@ -292,6 +303,14 @@ def main(): action="store_true", help="delete any files which are not listed on block storage", ) + parser.add_argument( + "--vmcore", + action="append", + default=[], + help="only run tests on the given vmcore(s). you can use this " + "multiple times to specify multiple vmcore names. You can also " + "use fnmmatch patterns to specify several cores at once.", + ) args = parser.parse_args() if args.core_directory: CORE_DIR = args.core_directory.absolute() @@ -305,7 +324,7 @@ def main(): sys.exit("error: --upload-core is required for upload operation") upload_all(get_client(), args.upload_core) elif args.action == "test": - test() + test(args.vmcore) if __name__ == "__main__": From a02ecf4c51ce9105354a0a411e4db0433df39803 Mon Sep 17 00:00:00 2001 From: Stephen Brennan Date: Mon, 6 Nov 2023 15:07:17 -0800 Subject: [PATCH 2/6] mm: Fully support aarch64 in AddrKind One of the very useful functions of drgn-tools is the address categorization capability, which should be capable of determining whether an arbitrary memory address falls into categories like userspace, kernel text, kernel data (ro, bss, etc), kernel module, vmalloc, or vmemmap. This is highly architecture-specific, and configuration-specific, since kernels don't typically export this information in an easy format. We try to implement it in a way that should apply to other common configurations, but UEK is the main target here. Add support for the aarch64 architecture. This solves a great deal of the existing test failures on aarch64. Signed-off-by: Stephen Brennan --- drgn_tools/mm.py | 178 +++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 171 insertions(+), 7 deletions(-) diff --git a/drgn_tools/mm.py b/drgn_tools/mm.py index a7aaf6f7..b4023c0e 100644 --- a/drgn_tools/mm.py +++ b/drgn_tools/mm.py @@ -4,6 +4,7 @@ Helpers for examining the memory management subsystem. """ import enum +import math from typing import List from typing import Tuple @@ -11,6 +12,7 @@ from drgn import FaultError from drgn.helpers.common.format import escape_ascii_string from drgn.helpers.linux.boot import pgtable_l5_enabled +from drgn.helpers.linux.list import list_for_each_entry from drgn.helpers.linux.percpu import per_cpu_ptr from drgn.helpers.linux.slab import for_each_slab_cache @@ -127,11 +129,9 @@ class AddrKind(enum.Enum): """ @classmethod - def _ranges(cls, prog: drgn.Program) -> List[Tuple["AddrKind", int, int]]: - ranges = prog.cache.get("drgn_tools_AddrKind_ranges") - if ranges: - return ranges - + def _ranges_x86_64( + cls, prog: drgn.Program + ) -> List[Tuple["AddrKind", int, int]]: # See include/asm-generic/vmlinux-lds.h # and also Documentation/x86/x86_64/mm.{rst,txt} # Convenient link: @@ -234,6 +234,172 @@ def _ranges(cls, prog: drgn.Program) -> List[Tuple["AddrKind", int, int]]: 0xFFFFFFFFFEFFFFFF, ), ] + return ranges + + @classmethod + def _ranges_aarch64( + cls, prog: drgn.Program + ) -> List[Tuple["AddrKind", int, int]]: + # For canonical information on this, see documentation: + # https://www.kernel.org/doc/html/latest/arch/arm64/memory.html + # And more importantly, code: + # arch/arm64/include/asm/memory.h + # The docs neglect to describe how KASLR impacts things. + + MB = 1024 * 1024 + GB = 1024 * MB + + vmcoreinfo = dict( + line.split("=", 1) + for line in prog["VMCOREINFO"] + .string_() + .decode("utf-8") + .strip() + .split("\n") + ) + va_bits = int(vmcoreinfo["NUMBER(VA_BITS)"]) + if va_bits != 48: + raise NotImplementedError( + "Drgn-tools does not (yet) support arm64 with {va_bit} bit VAs" + ) + + page_offset = (1 << 64) - (1 << va_bits) + modules_vaddr = (1 << 64) - (1 << (va_bits - 1)) + try: + # 3e35d303ab7d ("arm64: module: rework module VA range selection") + # changes the module virtual region to 2GiB. It also introduces the + # variable "module_direct_base", which we can use to detect it + prog.symbol("module_direct_base") + modules_vsize = 2 * GB + except LookupError: + modules_vsize = 128 * MB + modules_end = modules_vaddr + modules_vsize + + # vmemmap is at the end of the address space, except for a guard hole + # (whose size depends on the kernel version). Thankfully, Drgn already + # knows how to find it, so all we need to do is calculate the length. + # The length doesn't seem to vary based on kernel version. + # The computation is seen in arch/arm64/include/asm/memory.h, + # essentially we take the max length of the direct map, convert to + # pages, and multiply by the aligned #bytes per struct page. Direct map + # spans page_offset to modules_vaddr + vmemmap_start = prog["vmemmap"].value_() + page_order = int(math.log2(prog.type("struct page").size - 1)) + 1 + vmemmap_size = (modules_vaddr - page_offset) >> ( + prog["PAGE_SHIFT"].value_() - page_order + ) + + # For arm64, the kernel image mapping is actually within VMALLOC_START + # .. VMALLOC_END. In fact, VMALLOC_START = MODULES_END. So we need to be + # careful to split up the vmalloc region into a section before, and a + # section after the kernel image. + # + # What's worse, in 9ad7c6d5e75b ("arm64: mm: tidy up top of kernel VA + # space"), the top of the vmalloc space became VMEMMAP_START - 256MiB. + # Prior to that, it was defined as: (- PUD_SIZE - VMEMMAP_SIZE - 64 + # KiB)... Unfortunately, the commit that does this, makes no change in + # terms of symbols or variables! + # + # We can use two tricks to help resolve this problem. + # 1. The /proc/kcore implementation contains a handy list of memory + # ranges and their types. We can find the range which begins with + # VMALLOC_START, and read the size out of it to get the end. + # This is a nice, easy way to handle it, but it depends on having + # CONFIG_PROC_KCORE enabled, and the kernel must have finished + # initialization. Debugging partially initialized kernels should be + # possible, so we'd like a backup, even a less-than-perfect one. + # 2. If that doesn't work, we can fall back on using the vmemmap_start + # as the top of vmalloc. This is not strictly correct: there's a + # "fixmap" region in between as well as an IO range. However... it's + # the best we can do for this case. + vmalloc_end = vmemmap_start + try: + KCORE_VMALLOC = prog.constant("KCORE_VMALLOC") + for kcl in list_for_each_entry( + "struct kcore_list", prog["kclist_head"].address_of_(), "list" + ): + # In the code, VMALLOC_START is defined to MODULES_END + if kcl.type == KCORE_VMALLOC and kcl.addr == modules_end: + vmalloc_end = (kcl.addr + kcl.size).value_() + except LookupError: + pass + + return [ + (cls.USER, 0, (1 << va_bits) - 1), + (cls.DIRECT_MAP, page_offset, modules_vaddr), + (cls.MODULE, modules_vaddr, modules_end), + # In between the modules_end and _text, there's the KASLR + # offset. This is more vmalloc! + (cls.VMALLOC, modules_end, prog.symbol("_text").address), + ( + cls.TEXT, + prog.symbol("_text").address, + prog.symbol("_etext").address, + ), + ( + cls.RODATA, + prog.symbol("__start_rodata").address, + prog.symbol("__end_rodata").address, + ), + ( + cls.INITTEXT, + prog.symbol("__inittext_begin").address, + prog.symbol("__inittext_end").address, + ), + ( + # TODO: should we have INITDATA too? + # NOTE: initdata begin .. end includes percpu, as well as some + # hypervisor percpu things, and relocation information. We're + # splitting initdata here to ensure we get it right. + cls.DATA, + prog.symbol("__initdata_begin").address, + prog.symbol("__per_cpu_start").address, + ), + ( + cls.PERCPU, + prog.symbol("__per_cpu_start").address, + prog.symbol("__per_cpu_end").address, + ), + ( + cls.DATA, + prog.symbol("__per_cpu_end").address, + prog.symbol("__initdata_end").address, + ), + ( + cls.DATA, + prog.symbol("_sdata").address, + prog.symbol("_edata").address, + ), + ( + cls.BSS, + prog.symbol("__bss_start").address, + prog.symbol("__bss_stop").address, + ), + ( + cls.VMALLOC, + prog.symbol("_end").address, + vmalloc_end, + ), + # There's some arch-specific junk between _text and _end which isn't + # fully covered by the ranges above for the kernel image. For the + # most part this shouldn't matter. + (cls.VMEMMAP, vmemmap_start, vmemmap_start + vmemmap_size), + ] + + @classmethod + def _ranges(cls, prog: drgn.Program) -> List[Tuple["AddrKind", int, int]]: + ranges = prog.cache.get("drgn_tools_AddrKind_ranges") + if ranges: + return ranges + + if prog.platform.arch == drgn.Architecture.X86_64: + ranges = cls._ranges_x86_64(prog) + elif prog.platform.arch == drgn.Architecture.AARCH64: + ranges = cls._ranges_aarch64(prog) + else: + raise NotImplementedError( + f"AddrKind is not implemented for {prog.platform.arch}" + ) prog.cache["drgn_tools_AddrKind_ranges"] = ranges return ranges @@ -250,8 +416,6 @@ def categorize( :param prog: program we're debugging :param addr: address to categorize """ - if prog.platform.arch != drgn.Architecture.X86_64: - raise NotImplementedError("Only implemented for x86_64") addr = int(addr) for kind, start, end in cls._ranges(prog): From 1861cc4f254bcc124635eb3a32ec4c60909cc9d0 Mon Sep 17 00:00:00 2001 From: Stephen Brennan Date: Mon, 6 Nov 2023 15:11:47 -0800 Subject: [PATCH 3/6] mm: use panic_cpu, not crashing_cpu The former is generic, the latter is arch-specific. To handle this, let's make the check_freelists_at_crashing_cpu() function more general, and then pick the right CPU number in the test. This will pass on x86_64 and aarch64. Signed-off-by: Stephen Brennan --- drgn_tools/mm.py | 7 +++---- tests/test_mm.py | 8 ++++++-- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/drgn_tools/mm.py b/drgn_tools/mm.py index b4023c0e..94417f84 100644 --- a/drgn_tools/mm.py +++ b/drgn_tools/mm.py @@ -445,10 +445,9 @@ def totalram_pages(prog: drgn.Program) -> drgn.Object: return prog["totalram_pages"] -def check_freelists_at_crashing_cpu(prog: drgn.Program) -> None: - crashing_cpu = prog["crashing_cpu"].value_() +def check_freelists_at_cpu(prog: drgn.Program, cpu: int) -> None: for slab_cache in for_each_slab_cache(prog): - cpu_slab = per_cpu_ptr(slab_cache.cpu_slab.read_(), crashing_cpu) + cpu_slab = per_cpu_ptr(slab_cache.cpu_slab.read_(), cpu) if cpu_slab.freelist.value_(): try: _ = prog.read(cpu_slab.freelist.value_(), 1) @@ -457,7 +456,7 @@ def check_freelists_at_crashing_cpu(prog: drgn.Program) -> None: slab_cache.name.string_(), escape_backslash=True ) print( - f"found freelist corruption in lockless freelist of slab-cache: {slab_cache_name} at crash cpu: {crashing_cpu}" + f"found freelist corruption in lockless freelist of slab-cache: {slab_cache_name} at cpu: {cpu}" ) return diff --git a/tests/test_mm.py b/tests/test_mm.py index d8ea28b2..626cad8f 100644 --- a/tests/test_mm.py +++ b/tests/test_mm.py @@ -116,5 +116,9 @@ def test_AddrKind_categorize_module_percpu(prog: drgn.Program) -> None: @pytest.mark.skip_live -def test_check_freelists_at_crashing_cpu(prog: drgn.Program) -> None: - mm.check_freelists_at_crashing_cpu(prog) +def test_check_freelists_at_cpu(prog: drgn.Program) -> None: + if "crashing_cpu" in prog: + cpu = prog["crashing_cpu"].value_() + else: + cpu = prog["panic_cpu"].counter.value_() + mm.check_freelists_at_cpu(prog, cpu) From c9ab69874f41f348897def7f5b7bac2dfff869e9 Mon Sep 17 00:00:00 2001 From: Stephen Brennan Date: Mon, 6 Nov 2023 17:01:18 -0800 Subject: [PATCH 4/6] gitlab ci: Use tox -e runner to initialize runner Signed-off-by: Stephen Brennan --- .gitlab-ci.yml | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 33d7d840..a8b414f3 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -2,8 +2,7 @@ vmtest: script: - rm -rf .tox - git archive HEAD -o archive.tar.gz - - python3.8 -m venv venv - - venv/bin/pip install -r testing/requirements.txt - - venv/bin/python -m testing.vmcore test --core-directory /var/drgn-tools/vmcores + - tox -e runner --notest + - tox -e runner -- python -m testing.vmcore test --core-directory /var/drgn-tools/vmcores - mkdir -p tmp/overlays tmp/info - - PATH=/usr/local/bin:$PATH venv/bin/python -m testing.heavyvm.runner --image-dir /var/drgn-tools/images --vm-info-dir tmp/info --overlay-dir tmp/overlays --tarball archive.tar.gz + - tox -e runner -- python -m testing.heavyvm.runner --image-dir /var/drgn-tools/images --vm-info-dir tmp/info --overlay-dir tmp/overlays --tarball archive.tar.gz From d84f55bb38dc0ca1fa2f0ff381a2892dc1f56627 Mon Sep 17 00:00:00 2001 From: Partha Sarathi Satapathy Date: Fri, 3 Nov 2023 10:19:31 +0000 Subject: [PATCH 5/6] Helper for cmdline --- doc/api.rst | 7 ++++++- drgn_tools/cmdline.py | 35 +++++++++++++++++++++++++++++++++++ tests/test_cmdline.py | 8 ++++++++ 3 files changed, 49 insertions(+), 1 deletion(-) create mode 100755 drgn_tools/cmdline.py create mode 100644 tests/test_cmdline.py diff --git a/doc/api.rst b/doc/api.rst index 53b4da7b..9c69920f 100644 --- a/doc/api.rst +++ b/doc/api.rst @@ -106,9 +106,14 @@ drgn_tools.rds .. automodule:: drgn_tools.rds :members: - drgn_tools.cpuinfo ----------------------- .. automodule:: drgn_tools.cpuinfo :members: + +drgn_tools.cmdline +----------------------- + +.. automodule:: drgn_tools.cmdline + :members: diff --git a/drgn_tools/cmdline.py b/drgn_tools/cmdline.py new file mode 100755 index 00000000..913d7220 --- /dev/null +++ b/drgn_tools/cmdline.py @@ -0,0 +1,35 @@ +# Copyright (c) 2023, Oracle and/or its affiliates. +# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/ +""" +Helpers for command line +""" +import argparse + +from drgn import Program + +from drgn_tools.corelens import CorelensModule + + +def get_cmdline(prog: Program) -> str: + """ + Returns the kernel command line + """ + str_cmdline = prog["saved_command_line"] + return str_cmdline.string_().decode("utf-8") + + +def show_cmdline(prog: Program) -> None: + """ + Prints the kernel command line + """ + str_cmdline = get_cmdline(prog) + print(str_cmdline) + + +class CmdLine(CorelensModule): + """Display the kernel command line""" + + name = "cmdline" + + def run(self, prog: Program, args: argparse.Namespace) -> None: + show_cmdline(prog) diff --git a/tests/test_cmdline.py b/tests/test_cmdline.py new file mode 100644 index 00000000..80303569 --- /dev/null +++ b/tests/test_cmdline.py @@ -0,0 +1,8 @@ +# Copyright (c) 2023, Oracle and/or its affiliates. +# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/ +from drgn_tools import cmdline + + +def test_cmdline(prog): + cmdline.get_cmdline(prog) + cmdline.show_cmdline(prog) From 22d287cf8299bba84f6dcf6bf24f9bb53655e3b0 Mon Sep 17 00:00:00 2001 From: Partha Sarathi Satapathy Date: Wed, 8 Nov 2023 05:18:27 +0000 Subject: [PATCH 6/6] Merge conflicts --- doc/api.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/doc/api.rst b/doc/api.rst index 9c69920f..3737f1b3 100644 --- a/doc/api.rst +++ b/doc/api.rst @@ -106,12 +106,14 @@ drgn_tools.rds .. automodule:: drgn_tools.rds :members: + drgn_tools.cpuinfo ----------------------- .. automodule:: drgn_tools.cpuinfo :members: + drgn_tools.cmdline -----------------------