Skip to content

Commit

Permalink
Adding helper for lock contention detection
Browse files Browse the repository at this point in the history
  • Loading branch information
pssatapathy-oracle committed Nov 9, 2023
1 parent 22d287c commit c4432a1
Show file tree
Hide file tree
Showing 5 changed files with 234 additions and 41 deletions.
7 changes: 7 additions & 0 deletions doc/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -119,3 +119,10 @@ drgn_tools.cmdline

.. automodule:: drgn_tools.cmdline
:members:


drgn_tools.lock
-----------------------

.. automodule:: drgn_tools.lock
:members:
43 changes: 2 additions & 41 deletions drgn_tools/ext4_dirlock.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,6 @@
import argparse

import drgn
from drgn import Object
from drgn import Program
from drgn.helpers.linux.fs import d_path
from drgn.helpers.linux.sched import task_state_to_char
Expand All @@ -97,51 +96,13 @@
from drgn_tools.corelens import CorelensModule
from drgn_tools.locking import for_each_mutex_waiter
from drgn_tools.locking import for_each_rwsem_waiter
from drgn_tools.locking import show_lock_waiter
from drgn_tools.locking import timestamp_str
from drgn_tools.module import ensure_debuginfo
from drgn_tools.task import task_lastrun2now
from drgn_tools.util import has_member


def timestamp_str(ns: int) -> str:
value = ns // 1000000
ms = value % 1000
value = value // 1000
secs = value % 60
value = value // 60
mins = value % 60
value = value // 60
hours = value % 24
days = value // 24
return "%d %02d:%02d:%02d.%03d" % (days, hours, mins, secs, ms)


def show_lock_waiter(
prog: Program, task: Object, index: int, stacktrace: bool
) -> None:
"""
Show lock waiter
:param prog: drgn program
:param task: ``struct task_struct *``
:param index: index of waiter
:param stacktrace: true to dump stack trace of the waiter
:returns: None
"""
prefix = "[%d] " % index
print(
"%12s: %-16s %-8d %-6s %-16s"
% (
prefix,
task.comm.string_().decode(),
task.pid.value_(),
task_state_to_char(task),
timestamp_str(task_lastrun2now(task)),
)
)
if stacktrace:
bt(task)


def ext4_dirlock_scan(prog: drgn.Program, stacktrace: bool = False) -> None:
"""
Scan processes hung by ext4 directory inode lock
Expand Down
158 changes: 158 additions & 0 deletions drgn_tools/lock.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,158 @@
# Copyright (c) 2023, Oracle and/or its affiliates.
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
"""
“corelens lock ” should support :
1. Should find process stuck in mutex or semaphore.
2. Find the contested mutex
3. List all waiters on that lock and the time of wait
Target Mutex API list for the lock contentation:
void mutex_lock(struct mutex *lock);
void mutex_lock_nested(struct mutex *lock, unsigned int subclass);
int mutex_lock_interruptible_nested(struct mutex *lock,
unsigned int subclass);
int mutex_lock_interruptible(struct mutex *lock);
int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock);
TRY variats of mutex are ignored as they will not block.
The common function used in all these api's is:
__mutex_lock() and is sufficant to trap all block by mutexes.
For semaphores,
There is no woners, and waiters generally have the common functions
as : "__down_common" and "__down" depending upon releases. So trapping
these two function is sufficent to check the semaphore waiters.
"""
import argparse
from typing import List

import drgn
from drgn import Program

from drgn_tools.bt import bt
from drgn_tools.bt import bt_has
from drgn_tools.corelens import CorelensModule
from drgn_tools.locking import for_each_mutex_waiter
from drgn_tools.locking import for_each_rwsem_waiter
from drgn_tools.locking import mtx_owner
from drgn_tools.locking import show_lock_waiter


def scan_mutex_lock(prog: Program, stk: bool) -> None:
"""Scan for mutex and show deitals"""

frame_list = bt_has(prog, "__mutex_lock")
lock_detected = bool(frame_list)

arr_lock: List = []

if not lock_detected:
return

for task, frame in frame_list:
# Debug...
# pid = task.pid.value_()
# comm = task.comm.string_().decode("utf-8")
# print("%-15s %-15s" %(pid,comm))

mtx = frame["lock"]
struct_owner = mtx_owner(prog, mtx)

duplock = 1
if not arr_lock:
arr_lock.append(mtx)
else:
for locks in arr_lock:
if locks == mtx:
duplock = 0

if duplock == 1:
arr_lock.append(mtx)
index = 1
print("Mutex:", hex(mtx.owner.counter.address_of_().value_()))
print("Mutex OWNER:", struct_owner.comm.string_().decode("utf-8"))
print("")
if stk:
bt(struct_owner.pid)
print("")
print(
"Mutex WAITERS (Index, cpu, comm, pid, state, wait time (d hr:min:sec:ms)):"
)
for waiter in for_each_mutex_waiter(prog, mtx):
show_lock_waiter(prog, waiter, index, stacktrace=stk)
index = index + 1
print("")


sem_lock: List = []


def show_sem_lock(prog: Program, frame_list, stk: bool) -> None:
"""Show semaphore details"""
for task, frame in frame_list:
try:
sem = frame["sem"]
semaddr = sem.value_()
except drgn.ObjectAbsentError:
continue

duplock = 1

if not sem_lock:
sem_lock.append(semaddr)
else:
for locks in sem_lock:
if locks == semaddr:
duplock = 0

if duplock == 1:
sem_lock.append(semaddr)
index = 1
print("Semaphore:", hex(semaddr))
print(
"Semaphore WAITERS (Index, cpu, comm, pid, state, wait time (d hr:min:sec:ms)):"
)
for waiter in for_each_rwsem_waiter(prog, sem):
show_lock_waiter(prog, waiter, index, stacktrace=stk)
index = index + 1

print("")


def scan_sem_lock(prog: Program, stk: bool) -> None:
"""Scan for semphores"""
frame_list = bt_has(prog, "__down")
lock_detected = bool(frame_list)
if lock_detected:
show_sem_lock(prog, frame_list, stk)

frame_list = bt_has(prog, "__down_common")
lock_detected = bool(frame_list)
if lock_detected:
show_sem_lock(prog, frame_list, stk)


def scan_task(prog: Program, stk: bool) -> None:
"""Scan tasks for Mutex and Semaphore"""
print("Scanning Mutexes ...")
print("")
scan_mutex_lock(prog, stk)

print("Scanning Semaphores...")
print("")
scan_sem_lock(prog, stk)


class Locking(CorelensModule):
"""Display active mutex and semaphoes and their waiters"""

name = "lock"
need_dwarf = True

def add_args(self, parser: argparse.ArgumentParser) -> None:
parser.add_argument(
"--stack", action="store_true", help="Print the stack."
)

def run(self, prog: Program, args: argparse.Namespace) -> None:
scan_task(prog, args.stack)
60 changes: 60 additions & 0 deletions drgn_tools/locking.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,69 @@
"""
from typing import Iterable

import drgn
from drgn import Object
from drgn import Program
from drgn.helpers.linux.list import list_for_each_entry
from drgn.helpers.linux.sched import task_state_to_char

from drgn_tools.bt import bt
from drgn_tools.task import task_lastrun2now

MUTEX_FLAGS = 0x7


def mtx_owner(prog: Program, mtx: drgn.Object) -> drgn.Object:
owner = mtx.owner.counter.value_()
if owner < 0:
owner = 2**64 + owner - 1
Owner = owner & (~MUTEX_FLAGS)
struct_owner = Object(prog, "struct task_struct *", value=Owner)
return struct_owner


def timestamp_str(ns: int) -> str:
value = ns // 1000000
ms = value % 1000
value = value // 1000
secs = value % 60
value = value // 60
mins = value % 60
value = value // 60
hours = value % 24
days = value // 24
return "%d %02d:%02d:%02d.%03d" % (days, hours, mins, secs, ms)


def show_lock_waiter(
prog: Program, task: Object, index: int, stacktrace: bool
) -> None:
"""
Show lock waiter
:param prog: drgn program
:param task: ``struct task_struct *``
:param index: index of waiter
:param stacktrace: true to dump stack trace of the waiter
:returns: None
"""
prefix = "[%d] " % index
ncpu = task.cpu.value_()
print(
"%12s: %-4s %-4d %-16s %-8d %-6s %-16s"
% (
prefix,
"cpu:",
ncpu,
task.comm.string_().decode(),
task.pid.value_(),
task_state_to_char(task),
timestamp_str(task_lastrun2now(task)),
)
)
if stacktrace:
print("")
bt(task)


def for_each_rwsem_waiter(prog: Program, rwsem: Object) -> Iterable[Object]:
Expand Down
7 changes: 7 additions & 0 deletions tests/test_lock.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# Copyright (c) 2023, Oracle and/or its affiliates.
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
from drgn_tools import lock


def test_locks(prog):
lock.scan_task(prog, 1)

0 comments on commit c4432a1

Please sign in to comment.