Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Corelens module: dentrycache #26

Merged
merged 7 commits into from
Nov 18, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
196 changes: 161 additions & 35 deletions drgn_tools/dentry.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
"""
Helpers for dentries.
"""
import argparse
import stat
from typing import Iterable
from typing import Iterator
Expand All @@ -11,11 +12,74 @@
import drgn
from drgn import Object
from drgn import Program
from drgn.helpers.linux.fs import dentry_path
from drgn.helpers.linux.list import list_for_each_entry

from drgn_tools.corelens import CorelensModule
from drgn_tools.itertools import count
from drgn_tools.itertools import take
from drgn_tools.table import print_table
from drgn_tools.table import print_row
from drgn_tools.util import kernel_version


MNT_INTERNAL = 0x4000


def sb_first_mount_point(sb: Object) -> Optional[Object]:
"""
Return the first mountpoint of the superblock

A single filesystem instance can be mounted at several locations, so the
super_block has a list of instances. When iterating over the dentry cache,
we want the full path and don't care too much about _which_ path we get. We
just want to have a valid filesystem path. So return any arbitrary mount
point, the first one in the list. If the list is empty (unlikely except
during an unmount race) or if we are at the root filesystem, return None.

:param sb: ``struct super_block *``
:returns: ``struct dentry *`` or None
"""
for mount in list_for_each_entry(
"struct mount", sb.s_mounts.address_of_(), "mnt_instance"
):
mnt_parent = mount.mnt_parent.read_()
if mount.mnt.mnt_flags & MNT_INTERNAL:
continue
if mnt_parent == mount:
return None
return mount.mnt_mountpoint.read_()
return None


def dentry_path_any_mount(dentry: Object) -> bytes:
"""
Like dentry_path(), but don't require a path/mount. Just pick one
arbitrarily

:param dentry: ``struct dentry *``
"""
dentry = dentry.read_()
d_op = dentry.d_op.read_()
if d_op and d_op.d_dname:
return b"[" + dentry.d_inode.i_sb.s_type.name.string_() + b"]"

components = []
while True:
# reading dentry_val allows us to get all the fields of dentry at once
dentry_val = dentry[0].read_()
if dentry_val.d_parent == dentry:
dentry = sb_first_mount_point(dentry_val.d_sb)
if not dentry:
break
else:
continue
d_parent = dentry_val.d_parent
components.append(dentry_val.d_name.name.string_())
components.append(b"/")
dentry = d_parent
if components:
return b"".join(reversed(components))
else:
return b"/"


def for_each_dentry_in_hashtable(prog: Program) -> Iterator[Object]:
Expand Down Expand Up @@ -159,32 +223,36 @@ def print_dentry_table(
:param dentries: Any iterable of ``struct dentry *``
"""
if refcount:
dentry_table = [
["DENTRY", "SUPER_BLOCK", "INODE", "REFCOUNT", "TYPE", "PATH"]
]
col_widths = [16] * 3 + [5, 4, 0]
print_row(
["DENTRY", "SUPER_BLOCK", "INODE", "REFCOUNT", "TYPE", "PATH"],
col_widths,
)
else:
dentry_table = [["DENTRY", "SUPER_BLOCK", "INODE", "TYPE", "PATH"]]
col_widths = [16] * 3 + [4, 0]
print_row(
["DENTRY", "SUPER_BLOCK", "INODE", "TYPE", "PATH"], col_widths
)
for d in dentries:
file_type = __file_type(int(d.d_inode.i_mode)) if d.d_inode else "NONE"
if refcount:
dentry_stats = [
hex(d.value_()),
hex(d.d_sb.value_()),
hex(d.d_inode.value_()),
int(d_count(d)),
row = [
f"{d.value_():016x}",
f"{d.d_sb.value_():016x}",
f"{d.d_inode.value_():016x}",
str(int(d_count(d))),
file_type,
dentry_path(d).decode(),
dentry_path_any_mount(d).decode(),
]
else:
dentry_stats = [
hex(d.value_()),
hex(d.d_sb.value_()),
hex(d.d_inode.value_()),
row = [
f"{d.value_():016x}",
f"{d.d_sb.value_():016x}",
f"{d.d_inode.value_():016x}",
file_type,
dentry_path(d).decode(),
dentry_path_any_mount(d).decode(),
]
dentry_table.append(dentry_stats)
print_table(dentry_table)
print_row(row, col_widths)


def dentry_is_used(dentry: Object) -> bool:
Expand Down Expand Up @@ -227,23 +295,37 @@ def d_count(dentry: Object) -> int:
return dentry.d_lockref.count


def __dentry_iter(prog: Program) -> Iterator[Object]:
def __dentry_iter(prog: Program, chunk_size: int = 2048) -> Iterator[Object]:
"""Iterate through the hashtable"""
dentry_hashtable = prog["dentry_hashtable"]
# for uek5 and newer
dentry_hashtable_size = 2 ** (32 - int(prog["d_hash_shift"].read_()))
# for uek4
if not prog.symbols("in_lookup_hashtable"):
dentry_hashtable_size = 2 ** (int(prog["d_hash_shift"].read_()))

# iterate though the hashtable bucket by bucket
for i in range(dentry_hashtable_size):
bucket = dentry_hashtable[i]
d_hash = bucket.first
while d_hash:
dentry = drgn.container_of(d_hash, "struct dentry", "d_hash")
yield dentry
d_hash = d_hash.next
dentry_hashtable = prog["dentry_hashtable"].read_()

# Commit 854d3e63438d ("dcache: subtract d_hash_shift from 32 in advance")
# changes the logical meaning of d_hash_shift with absolutely no detectable
# change to any type or symbol. It was first included in 4.16 and has never
# been backported to any stable kernel release or UEK. There is simply no
# other way to know how to interpret d_hash_shift, except by using the
# kernel version. Thankfully, it's just a simply comparison against 4.16.
if kernel_version(prog) < (4, 16, 0):
dentry_hashtable_size = 2 ** (int(prog["d_hash_shift"]))
else:
dentry_hashtable_size = 2 ** (32 - int(prog["d_hash_shift"]))

if dentry_hashtable_size % chunk_size != 0:
raise ValueError("chunk size is too big")

# iterate though the hashtable chunk by chunk
chunk_type = prog.array_type(dentry_hashtable[0].type_, chunk_size)
for chunk_start in range(0, dentry_hashtable_size, chunk_size):
array_chunk = drgn.Object(
prog, chunk_type, address=dentry_hashtable + chunk_start
).read_()
for i in range(chunk_size):
bucket = array_chunk[i]
d_hash = bucket.first
while d_hash:
dentry = drgn.container_of(d_hash, "struct dentry", "d_hash")
yield dentry
d_hash = d_hash.next.read_()


def __file_type(mode: Object) -> str:
Expand All @@ -270,3 +352,47 @@ def __file_type(mode: Object) -> str:
return "WHT"

return "UNKN"


class DentryCache(CorelensModule):
"""List dentries from the dentry hash table"""

name = "dentrycache"

def add_args(self, parser: argparse.ArgumentParser) -> None:
parser.add_argument(
"--limit",
"-l",
type=int,
default=10000,
help="list at most <number> dentries, 10000 by default",
)
parser.add_argument(
"--negative",
"-n",
action="store_true",
help="list negative dentries only, disabled by default",
)
parser.add_argument(
"--detailed",
"-d",
action="store_true",
help="include inode, super, file type, refcount",
)

def run(self, prog: Program, args: argparse.Namespace) -> None:
if args.negative:
dentries = for_each_negative_dentry_in_hashtable(prog)
else:
dentries = for_each_dentry_in_hashtable(prog)

if args.limit:
dentries = take(args.limit, dentries)

if args.detailed:
print_dentry_table(dentries)
else:
# Emulate oled dentrycache
for i, dentry in enumerate(dentries):
path = dentry_path_any_mount(dentry).decode()
print(f"{i:05d} {path}")
21 changes: 21 additions & 0 deletions drgn_tools/table.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,27 @@
from typing import Optional


def print_row(fields: List[Any], col_widths: List[int]):
"""
Print a single row of a table, given pre-determined column widths

Note that this doesn't guarantee that the width of every field in the row
will be less than the width of the column: in that case, the field's full
contents will be printed and columns will be misaligned. For guaranteed
aligned columns, see print_table(), or be very careful about your column
widths.

:param fields: a list of fields to print
:param col_widths: the width of each field (not including spaces)
"""
print(
" ".join(
str(val) if w <= 0 else str(val).ljust(w)
for val, w in zip(fields, col_widths)
)
)


def print_table(
fields: List[List[Any]],
outfile: Optional[str] = None,
Expand Down
37 changes: 37 additions & 0 deletions drgn_tools/util.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# Copyright (c) 2023, Oracle and/or its affiliates.
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
import re
import sys
import time
import typing as t
Expand Down Expand Up @@ -46,6 +47,42 @@ def uts_to_string(s):
return uts


def kernel_version(prog: Program) -> t.Tuple[int, int, int]:
"""
Returns the kernel version as a tuple (major, minor, patch)

This is not the full release string, and it shouldn't be confused with the
UEK-specific parsing that is present in
:class:`drgn_tools.debuginfo.KernelVersion`. It simply corresponds to the
upstream major, minor, and patch versions, which typically (but not always)
remain constant over a distribution kernel's releases.

Given a kernel version, especially the major.minor version alone, there is
no guarantee about whether a commit is necessarily present or not. The
linux-stable process regularly backports commits from newer releases into
older ones, especially when they have a Fixes tag. Distributions like UEK
also backport certain changes, regardless of whether they were included in
stable releases.

This should be used only as a last resort for helper compatibility. At each
usage of this function, a comment should be in place describing (a) the
exact git commit SHA which introduces the change, and which kernel version
the change appears in. (b) Why couldn't the change in behavior be handled by
detecting changes to variables or types? (c) Address whether there is a risk
that stable/distro kernels may have a bakckport of the commit, which
couldn't be detected via a simple kernel version comparison.
"""
release = prog["UTS_RELEASE"].string_().decode("utf-8")
# Accepts 5.15.0, 6.0, 4.1.3-whatever...
match = re.match(r"^(\d+)\.(\d+)(?:\.(\d+))?", release)
if not match:
raise ValueError(f"Cannot understand kernel release: {release}")
maj, min, patch = match.groups()
if not patch:
patch = "0"
return (int(maj), int(min), int(patch))


def has_member(obj: Object, name: str) -> bool:
"""
Return true if a given object has a member with the given name.
Expand Down