diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 33d7d840..a8b414f3 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -2,8 +2,7 @@ vmtest: script: - rm -rf .tox - git archive HEAD -o archive.tar.gz - - python3.8 -m venv venv - - venv/bin/pip install -r testing/requirements.txt - - venv/bin/python -m testing.vmcore test --core-directory /var/drgn-tools/vmcores + - tox -e runner --notest + - tox -e runner -- python -m testing.vmcore test --core-directory /var/drgn-tools/vmcores - mkdir -p tmp/overlays tmp/info - - PATH=/usr/local/bin:$PATH venv/bin/python -m testing.heavyvm.runner --image-dir /var/drgn-tools/images --vm-info-dir tmp/info --overlay-dir tmp/overlays --tarball archive.tar.gz + - tox -e runner -- python -m testing.heavyvm.runner --image-dir /var/drgn-tools/images --vm-info-dir tmp/info --overlay-dir tmp/overlays --tarball archive.tar.gz diff --git a/doc/api.rst b/doc/api.rst index 53b4da7b..3737f1b3 100644 --- a/doc/api.rst +++ b/doc/api.rst @@ -112,3 +112,10 @@ drgn_tools.cpuinfo .. automodule:: drgn_tools.cpuinfo :members: + + +drgn_tools.cmdline +----------------------- + +.. automodule:: drgn_tools.cmdline + :members: diff --git a/drgn_tools/cmdline.py b/drgn_tools/cmdline.py new file mode 100755 index 00000000..913d7220 --- /dev/null +++ b/drgn_tools/cmdline.py @@ -0,0 +1,35 @@ +# Copyright (c) 2023, Oracle and/or its affiliates. +# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/ +""" +Helpers for command line +""" +import argparse + +from drgn import Program + +from drgn_tools.corelens import CorelensModule + + +def get_cmdline(prog: Program) -> str: + """ + Returns the kernel command line + """ + str_cmdline = prog["saved_command_line"] + return str_cmdline.string_().decode("utf-8") + + +def show_cmdline(prog: Program) -> None: + """ + Prints the kernel command line + """ + str_cmdline = get_cmdline(prog) + print(str_cmdline) + + +class CmdLine(CorelensModule): + """Display the kernel command line""" + + name = "cmdline" + + def run(self, prog: Program, args: argparse.Namespace) -> None: + show_cmdline(prog) diff --git a/drgn_tools/mm.py b/drgn_tools/mm.py index a7aaf6f7..94417f84 100644 --- a/drgn_tools/mm.py +++ b/drgn_tools/mm.py @@ -4,6 +4,7 @@ Helpers for examining the memory management subsystem. """ import enum +import math from typing import List from typing import Tuple @@ -11,6 +12,7 @@ from drgn import FaultError from drgn.helpers.common.format import escape_ascii_string from drgn.helpers.linux.boot import pgtable_l5_enabled +from drgn.helpers.linux.list import list_for_each_entry from drgn.helpers.linux.percpu import per_cpu_ptr from drgn.helpers.linux.slab import for_each_slab_cache @@ -127,11 +129,9 @@ class AddrKind(enum.Enum): """ @classmethod - def _ranges(cls, prog: drgn.Program) -> List[Tuple["AddrKind", int, int]]: - ranges = prog.cache.get("drgn_tools_AddrKind_ranges") - if ranges: - return ranges - + def _ranges_x86_64( + cls, prog: drgn.Program + ) -> List[Tuple["AddrKind", int, int]]: # See include/asm-generic/vmlinux-lds.h # and also Documentation/x86/x86_64/mm.{rst,txt} # Convenient link: @@ -234,6 +234,172 @@ def _ranges(cls, prog: drgn.Program) -> List[Tuple["AddrKind", int, int]]: 0xFFFFFFFFFEFFFFFF, ), ] + return ranges + + @classmethod + def _ranges_aarch64( + cls, prog: drgn.Program + ) -> List[Tuple["AddrKind", int, int]]: + # For canonical information on this, see documentation: + # https://www.kernel.org/doc/html/latest/arch/arm64/memory.html + # And more importantly, code: + # arch/arm64/include/asm/memory.h + # The docs neglect to describe how KASLR impacts things. + + MB = 1024 * 1024 + GB = 1024 * MB + + vmcoreinfo = dict( + line.split("=", 1) + for line in prog["VMCOREINFO"] + .string_() + .decode("utf-8") + .strip() + .split("\n") + ) + va_bits = int(vmcoreinfo["NUMBER(VA_BITS)"]) + if va_bits != 48: + raise NotImplementedError( + "Drgn-tools does not (yet) support arm64 with {va_bit} bit VAs" + ) + + page_offset = (1 << 64) - (1 << va_bits) + modules_vaddr = (1 << 64) - (1 << (va_bits - 1)) + try: + # 3e35d303ab7d ("arm64: module: rework module VA range selection") + # changes the module virtual region to 2GiB. It also introduces the + # variable "module_direct_base", which we can use to detect it + prog.symbol("module_direct_base") + modules_vsize = 2 * GB + except LookupError: + modules_vsize = 128 * MB + modules_end = modules_vaddr + modules_vsize + + # vmemmap is at the end of the address space, except for a guard hole + # (whose size depends on the kernel version). Thankfully, Drgn already + # knows how to find it, so all we need to do is calculate the length. + # The length doesn't seem to vary based on kernel version. + # The computation is seen in arch/arm64/include/asm/memory.h, + # essentially we take the max length of the direct map, convert to + # pages, and multiply by the aligned #bytes per struct page. Direct map + # spans page_offset to modules_vaddr + vmemmap_start = prog["vmemmap"].value_() + page_order = int(math.log2(prog.type("struct page").size - 1)) + 1 + vmemmap_size = (modules_vaddr - page_offset) >> ( + prog["PAGE_SHIFT"].value_() - page_order + ) + + # For arm64, the kernel image mapping is actually within VMALLOC_START + # .. VMALLOC_END. In fact, VMALLOC_START = MODULES_END. So we need to be + # careful to split up the vmalloc region into a section before, and a + # section after the kernel image. + # + # What's worse, in 9ad7c6d5e75b ("arm64: mm: tidy up top of kernel VA + # space"), the top of the vmalloc space became VMEMMAP_START - 256MiB. + # Prior to that, it was defined as: (- PUD_SIZE - VMEMMAP_SIZE - 64 + # KiB)... Unfortunately, the commit that does this, makes no change in + # terms of symbols or variables! + # + # We can use two tricks to help resolve this problem. + # 1. The /proc/kcore implementation contains a handy list of memory + # ranges and their types. We can find the range which begins with + # VMALLOC_START, and read the size out of it to get the end. + # This is a nice, easy way to handle it, but it depends on having + # CONFIG_PROC_KCORE enabled, and the kernel must have finished + # initialization. Debugging partially initialized kernels should be + # possible, so we'd like a backup, even a less-than-perfect one. + # 2. If that doesn't work, we can fall back on using the vmemmap_start + # as the top of vmalloc. This is not strictly correct: there's a + # "fixmap" region in between as well as an IO range. However... it's + # the best we can do for this case. + vmalloc_end = vmemmap_start + try: + KCORE_VMALLOC = prog.constant("KCORE_VMALLOC") + for kcl in list_for_each_entry( + "struct kcore_list", prog["kclist_head"].address_of_(), "list" + ): + # In the code, VMALLOC_START is defined to MODULES_END + if kcl.type == KCORE_VMALLOC and kcl.addr == modules_end: + vmalloc_end = (kcl.addr + kcl.size).value_() + except LookupError: + pass + + return [ + (cls.USER, 0, (1 << va_bits) - 1), + (cls.DIRECT_MAP, page_offset, modules_vaddr), + (cls.MODULE, modules_vaddr, modules_end), + # In between the modules_end and _text, there's the KASLR + # offset. This is more vmalloc! + (cls.VMALLOC, modules_end, prog.symbol("_text").address), + ( + cls.TEXT, + prog.symbol("_text").address, + prog.symbol("_etext").address, + ), + ( + cls.RODATA, + prog.symbol("__start_rodata").address, + prog.symbol("__end_rodata").address, + ), + ( + cls.INITTEXT, + prog.symbol("__inittext_begin").address, + prog.symbol("__inittext_end").address, + ), + ( + # TODO: should we have INITDATA too? + # NOTE: initdata begin .. end includes percpu, as well as some + # hypervisor percpu things, and relocation information. We're + # splitting initdata here to ensure we get it right. + cls.DATA, + prog.symbol("__initdata_begin").address, + prog.symbol("__per_cpu_start").address, + ), + ( + cls.PERCPU, + prog.symbol("__per_cpu_start").address, + prog.symbol("__per_cpu_end").address, + ), + ( + cls.DATA, + prog.symbol("__per_cpu_end").address, + prog.symbol("__initdata_end").address, + ), + ( + cls.DATA, + prog.symbol("_sdata").address, + prog.symbol("_edata").address, + ), + ( + cls.BSS, + prog.symbol("__bss_start").address, + prog.symbol("__bss_stop").address, + ), + ( + cls.VMALLOC, + prog.symbol("_end").address, + vmalloc_end, + ), + # There's some arch-specific junk between _text and _end which isn't + # fully covered by the ranges above for the kernel image. For the + # most part this shouldn't matter. + (cls.VMEMMAP, vmemmap_start, vmemmap_start + vmemmap_size), + ] + + @classmethod + def _ranges(cls, prog: drgn.Program) -> List[Tuple["AddrKind", int, int]]: + ranges = prog.cache.get("drgn_tools_AddrKind_ranges") + if ranges: + return ranges + + if prog.platform.arch == drgn.Architecture.X86_64: + ranges = cls._ranges_x86_64(prog) + elif prog.platform.arch == drgn.Architecture.AARCH64: + ranges = cls._ranges_aarch64(prog) + else: + raise NotImplementedError( + f"AddrKind is not implemented for {prog.platform.arch}" + ) prog.cache["drgn_tools_AddrKind_ranges"] = ranges return ranges @@ -250,8 +416,6 @@ def categorize( :param prog: program we're debugging :param addr: address to categorize """ - if prog.platform.arch != drgn.Architecture.X86_64: - raise NotImplementedError("Only implemented for x86_64") addr = int(addr) for kind, start, end in cls._ranges(prog): @@ -281,10 +445,9 @@ def totalram_pages(prog: drgn.Program) -> drgn.Object: return prog["totalram_pages"] -def check_freelists_at_crashing_cpu(prog: drgn.Program) -> None: - crashing_cpu = prog["crashing_cpu"].value_() +def check_freelists_at_cpu(prog: drgn.Program, cpu: int) -> None: for slab_cache in for_each_slab_cache(prog): - cpu_slab = per_cpu_ptr(slab_cache.cpu_slab.read_(), crashing_cpu) + cpu_slab = per_cpu_ptr(slab_cache.cpu_slab.read_(), cpu) if cpu_slab.freelist.value_(): try: _ = prog.read(cpu_slab.freelist.value_(), 1) @@ -293,7 +456,7 @@ def check_freelists_at_crashing_cpu(prog: drgn.Program) -> None: slab_cache.name.string_(), escape_backslash=True ) print( - f"found freelist corruption in lockless freelist of slab-cache: {slab_cache_name} at crash cpu: {crashing_cpu}" + f"found freelist corruption in lockless freelist of slab-cache: {slab_cache_name} at cpu: {cpu}" ) return diff --git a/testing/vmcore.py b/testing/vmcore.py index 19212ce8..67d92752 100644 --- a/testing/vmcore.py +++ b/testing/vmcore.py @@ -4,6 +4,7 @@ Manager for test vmcores - downloaded from OCI block storage """ import argparse +import fnmatch import os import signal import subprocess @@ -237,9 +238,19 @@ def upload_all(client: ObjectStorageClient, core: str) -> None: future.result() -def test() -> None: +def test(vmcore_list: List[str]) -> None: + def should_run_vmcore(name: str) -> bool: + if not vmcore_list: + return True + for pat in vmcore_list: + if fnmatch.fnmatch(name, pat): + return True + return False + for path in CORE_DIR.iterdir(): core_name = path.name + if not should_run_vmcore(core_name): + continue with ci_section( f"vmcore-{core_name}", f"Running tests on vmcore {core_name}", @@ -292,6 +303,14 @@ def main(): action="store_true", help="delete any files which are not listed on block storage", ) + parser.add_argument( + "--vmcore", + action="append", + default=[], + help="only run tests on the given vmcore(s). you can use this " + "multiple times to specify multiple vmcore names. You can also " + "use fnmmatch patterns to specify several cores at once.", + ) args = parser.parse_args() if args.core_directory: CORE_DIR = args.core_directory.absolute() @@ -305,7 +324,7 @@ def main(): sys.exit("error: --upload-core is required for upload operation") upload_all(get_client(), args.upload_core) elif args.action == "test": - test() + test(args.vmcore) if __name__ == "__main__": diff --git a/tests/test_cmdline.py b/tests/test_cmdline.py new file mode 100644 index 00000000..80303569 --- /dev/null +++ b/tests/test_cmdline.py @@ -0,0 +1,8 @@ +# Copyright (c) 2023, Oracle and/or its affiliates. +# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/ +from drgn_tools import cmdline + + +def test_cmdline(prog): + cmdline.get_cmdline(prog) + cmdline.show_cmdline(prog) diff --git a/tests/test_mm.py b/tests/test_mm.py index d8ea28b2..626cad8f 100644 --- a/tests/test_mm.py +++ b/tests/test_mm.py @@ -116,5 +116,9 @@ def test_AddrKind_categorize_module_percpu(prog: drgn.Program) -> None: @pytest.mark.skip_live -def test_check_freelists_at_crashing_cpu(prog: drgn.Program) -> None: - mm.check_freelists_at_crashing_cpu(prog) +def test_check_freelists_at_cpu(prog: drgn.Program) -> None: + if "crashing_cpu" in prog: + cpu = prog["crashing_cpu"].value_() + else: + cpu = prog["panic_cpu"].counter.value_() + mm.check_freelists_at_cpu(prog, cpu)