Skip to content

Commit

Permalink
numastat: Add numastat corelens module
Browse files Browse the repository at this point in the history
This commit implements numastat corelens module that enables users to
dump detailed memory statistics for each NUMA node. The output is
similar to 'numastat -m', which is to show meminfo-like system-wide
memory usage.

This module supports UEK 4, 5, 6, and 7 for both x86-64 and aarch64. It
is tested for all above settings. For each case, this module's output
is compared against the output of `numastat -m`. Results match closely
with only small differences.

Signed-off-by: Jianfeng Wang <[email protected]>
  • Loading branch information
jianfenw authored and brenns10 committed Dec 1, 2023
1 parent 7e369d5 commit 8ce8757
Show file tree
Hide file tree
Showing 3 changed files with 368 additions and 21 deletions.
44 changes: 23 additions & 21 deletions drgn_tools/meminfo.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,28 @@ def get_mm_constants(prog: Program) -> Dict[str, int]:
_hpage_pmd_order = _hpage_pmd_shift - mm_consts["PAGE_SHIFT"]
mm_consts["HPAGE_PMD_NR"] = 1 << _hpage_pmd_order

# The vm statistics items for transparent hugepages may be counted
# in hugepages or in pages (since latest kernels). This commit
# (ID: 69473e5de87389be6c0fa4a5d574a50c8f904fb3) changed the unit from
# hugepages to pages and updates ``memory_stats`` to reflect it.
try:
unit = 1
for item in prog["memory_stats"]:
if item.name.string_().decode("utf-8") == "anon_thp":
# If ``ratio`` exists and does not equal to PAGE_SIZE, the
# unit is in hugepages. After the above commit, ``ratio``
# was changed to 1 (page). Later, the ``ratio`` column was
# removed because all statistics have the same unit.
if (
has_member(item, "ratio")
and item.ratio.value_() != mm_consts["PAGE_SIZE"]
):
unit = mm_consts["HPAGE_PMD_NR"]
break
except KeyError:
unit = mm_consts["HPAGE_PMD_NR"]
mm_consts["TRANS_HPAGE_UNIT"] = unit

# Determine the max number of swap file types.
# In mm/swap_state.c, ``nr_swapper_spaces`` is defined as:
# "static unsigned int nr_swapper_spaces[MAX_SWAPFILES] __read_mostly;"
Expand Down Expand Up @@ -514,27 +536,7 @@ def get_all_meminfo(prog: Program) -> Dict[str, int]:
].counter.value_() << (mm_consts["PAGE_SHIFT"] - 10)

# Collect transparent hugepage meminfo
# The vm statistics items for transparent hugepages may be counted
# in hugepages or in pages (since latest kernels). This commit
# (ID: 69473e5de87389be6c0fa4a5d574a50c8f904fb3) changed the unit from
# hugepages to pages and updates ``memory_stats`` to reflect the change.
try:
unit = 1
for item in prog["memory_stats"]:
if item.name.string_().decode("utf-8") == "anon_thp":
# If ``ratio`` exists and does not equal to PAGE_SIZE, the unit
# is in hugepages. After the above commit, ``ratio`` was
# changed to 1 (page). Later, the ``ratio`` column was removed
# because all statistics have the same unit.
if (
has_member(item, "ratio")
and item.ratio.value_() != mm_consts["PAGE_SIZE"]
):
unit = mm_consts["HPAGE_PMD_NR"]
break
except KeyError:
unit = get_mm_constants(prog)["HPAGE_PMD_NR"]

unit = mm_consts["TRANS_HPAGE_UNIT"]
if "NR_ANON_THPS" in global_stats:
stats["AnonHugePages"] = global_stats["NR_ANON_THPS"] * unit
stats["ShmemHugePages"] = global_stats["NR_SHMEM_THPS"] * unit
Expand Down
280 changes: 280 additions & 0 deletions drgn_tools/numastat.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,280 @@
# Copyright (c) 2023, Oracle and/or its affiliates.
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
"""
Helpers for dumping memory usage information for each NUMA node
"""
import argparse
from collections import UserDict
from typing import Dict

from drgn import Object
from drgn import Program

from drgn_tools.corelens import CorelensModule
from drgn_tools.meminfo import for_each_node_zone
from drgn_tools.meminfo import get_active_numa_nodes
from drgn_tools.meminfo import get_mm_constants
from drgn_tools.meminfo import StatDict
from drgn_tools.util import has_member


__all__ = ("show_all_nodes_meminfo", "get_per_node_meminfo")


def get_per_node_mm_stats(prog: Program, node: Object) -> UserDict:
"""
Read memory statistics counters from ``node.vm_stat`` and each memory
zone's ``zone.vm_stat`` for the target node. All statistics values are
stored in a customized dictionary as return.
:param prog: drgn program
:param node: ``struct pglist_data *`` of the target NUMA node
:returns: A dictionary that contains all global statistics items
"""
stats = StatDict()

# Add global memory statistics for this node.
if has_member(node, "vm_stat"):
arr = node.vm_stat.read_()
node_stat_enum_obj = prog.type("enum node_stat_item")
node_stats = StatDict()
# Skip the last item, which is the length of the array
for name, value in node_stat_enum_obj.enumerators[:-1]:
node_stats[name] = max(0, arr[value].counter.value_())
stats.update(node_stats)

# Add up all zones' memory statistics for this node.
all_zones_stats = []
for zone in for_each_node_zone(prog, node):
arr = zone.vm_stat.read_()
zone_stat_enum_obj = prog.type("enum zone_stat_item")
zone_stats = StatDict()
# Skip the last item, which is the length of the array
for name, value in zone_stat_enum_obj.enumerators[:-1]:
zone_stats[name] = max(0, arr[value].counter.value_())

try:
zone_stats[
"NR_MANAGED_PAGES"
] = zone.managed_pages.counter.value_()
except AttributeError:
zone_stats["NR_MANAGED_PAGES"] = zone.managed_pages.value_()

all_zones_stats.append(zone_stats)

for zone_stats in all_zones_stats:
for name, value in zone_stats.items():
if name not in stats:
stats[name] = value
else:
stats[name] += value
return stats


def get_per_node_meminfo(prog: Program, node: Object) -> Dict[str, int]:
"""
Collect detailed memory statistics for a NUMA node. Results are expected
to be similar to outputs produced by node_read_meminfo(...)
in drivers/base/node.c.
:param prog: drgn program
:param node: ``struct pglist_data *`` of the target NUMA node
:returns: A dictionary that contains all potential memory statistics items.
"""
mm_stats = {}

# Read memory statistics and constants.
mm_consts = get_mm_constants(prog)
node_zone_stats = get_per_node_mm_stats(prog, node)

totalram_pages = node_zone_stats["NR_MANAGED_PAGES"]
freeram_pages = node_zone_stats["NR_FREE_PAGES"]
bounce_pages = node_zone_stats["NR_BOUNCE"]
mlocked_pages = node_zone_stats["NR_MLOCK"]

slab_reclaimable = node_zone_stats["NR_SLAB_RECLAIMABLE"]
slab_unreclaimable = node_zone_stats["NR_SLAB_UNRECLAIMABLE"]
lru_active_anon = node_zone_stats["NR_ACTIVE_ANON"]
lru_inactive_anon = node_zone_stats["NR_INACTIVE_ANON"]
lru_active_file = node_zone_stats["NR_ACTIVE_FILE"]
lru_inactive_file = node_zone_stats["NR_INACTIVE_FILE"]
lru_unevictable = node_zone_stats["NR_UNEVICTABLE"]

mm_stats["MemTotal"] = totalram_pages
mm_stats["MemFree"] = freeram_pages
mm_stats["MemUsed"] = totalram_pages - freeram_pages

mm_stats["Active"] = lru_active_anon + lru_active_file
mm_stats["Inactive"] = lru_inactive_anon + lru_inactive_file
mm_stats["Active(anon)"] = lru_active_anon
mm_stats["Inactive(anon)"] = lru_inactive_anon
mm_stats["Active(file)"] = lru_active_file
mm_stats["Inactive(file)"] = lru_inactive_file
mm_stats["Unevictable"] = lru_unevictable
mm_stats["Mlocked"] = mlocked_pages

# Collect swap meminfo.
try:
mm_stats["SwapCached"] = node_zone_stats["NR_SWAPCACHE"]
except LookupError:
mm_stats["SwapCached"] = -1
mm_stats["Dirty"] = node_zone_stats["NR_FILE_DIRTY"]
mm_stats["Writeback"] = node_zone_stats["NR_WRITEBACK"]
mm_stats["FilePages"] = node_zone_stats["NR_FILE_PAGES"]
mm_stats["Mapped"] = node_zone_stats["NR_FILE_MAPPED"]
mm_stats["AnonPages"] = node_zone_stats["NR_ANON_MAPPED"]
mm_stats["Shmem"] = node_zone_stats["NR_SHMEM"]

# Collect slab meminfo.
try:
kernel_misc = node_zone_stats["NR_KERNEL_MISC_RECLAIMABLE"]
mm_stats["KReclaimable"] = slab_reclaimable + kernel_misc
except LookupError:
mm_stats["KReclaimable"] = -1
mm_stats["Slab"] = slab_reclaimable + slab_unreclaimable
mm_stats["SReclaimable"] = slab_reclaimable
mm_stats["SUnreclaim"] = slab_unreclaimable

# Collect other kernel page usage.
mm_stats["KernelStack"] = node_zone_stats["NR_KERNEL_STACK_KB"] >> (
mm_consts["PAGE_SHIFT"] - 10
)
mm_stats["PageTables"] = node_zone_stats["NR_PAGETABLE"]
mm_stats["NFS_Unstable"] = 0
if "NR_UNSTABLE_NFS" in mm_stats:
mm_stats["NFS_Unstable"] = node_zone_stats["NR_UNSTABLE_NFS"]
mm_stats["Bounce"] = bounce_pages
mm_stats["WritebackTmp"] = node_zone_stats["NR_WRITEBACK_TEMP"]

# Collect transparent hugepage meminfo.
unit = mm_consts["TRANS_HPAGE_UNIT"]
if "NR_ANON_THPS" in node_zone_stats:
mm_stats["AnonHugePages"] = node_zone_stats["NR_ANON_THPS"] * unit
mm_stats["ShmemHugePages"] = node_zone_stats["NR_SHMEM_THPS"] * unit
mm_stats["ShmemPmdMapped"] = (
node_zone_stats["NR_SHMEM_PMDMAPPED"] * unit
)
try:
mm_stats["FileHugePages"] = node_zone_stats["NR_FILE_THPS"] * unit
except LookupError:
mm_stats["FileHugePages"] = -1
try:
mm_stats["FilePmdMapped"] = node_zone_stats["NR_FILE_PMDMAPPED"]
except LookupError:
mm_stats["FilePmdMapped"] = -1
else:
mm_stats["AnonHugePages"] = (
node_zone_stats["NR_ANON_TRANSPARENT_HUGEPAGES"] * unit
)
mm_stats["ShmemHugePages"] = -1
mm_stats["ShmemPmdMapped"] = -1
mm_stats["FileHugePages"] = -1
mm_stats["FilePmdMapped"] = -1

# Collect hugepage info for the default hugepage size in this node.
node_id = node.node_id.value_()
hstate = prog["hstates"][prog["default_hstate_idx"]]
mm_stats["HugePages_Total"] = hstate.nr_huge_pages_node[node_id].value_()
mm_stats["HugePages_Free"] = hstate.free_huge_pages_node[node_id].value_()
mm_stats["HugePages_Surp"] = hstate.surplus_huge_pages_node[
node_id
].value_()
return mm_stats


def show_all_nodes_meminfo(prog: Program) -> None:
"""
Dump various details about the memory subsystem for each NUMA node.
This function must parse machine info to determine arch-specific parameters
before parsing per-node memory statistics.
:param prog: drgn program
"""
# A list of all mm statistics items in numastat
node_meminfo_items = [
"MemTotal",
"MemFree",
"MemUsed",
"Active",
"Inactive",
"Active(anon)",
"Inactive(anon)",
"Active(file)",
"Inactive(file)",
"Unevictable",
"Mlocked",
"Dirty",
"Writeback",
"FilePages",
"Mapped",
"AnonPages",
"Shmem",
"KernelStack",
"PageTables",
"NFS_Unstable",
"Bounce",
"WritebackTmp",
"KReclaimable",
"Slab",
"SReclaimable",
"SUnreclaim",
"AnonHugePages",
"ShmemHugePages",
"ShmemPmdMapped",
"FileHugePages",
"FilePmdMapped",
"HugePages_Total",
"HugePages_Free",
"HugePages_Surp",
]

# Collect mm statistics from all active NUMA nodes
per_node_meminfo = []
active_nodes = get_active_numa_nodes(prog)
num_active_nodes = len(active_nodes)
for node in active_nodes:
node_mm_stats = get_per_node_meminfo(prog, node)
per_node_meminfo.append(node_mm_stats)

# Output
print("Per-node system memory usage (in MBs):")

node_name_line = " "
for i in range(num_active_nodes):
node_name = f"Node {i}"
node_name_line += f"{node_name: >16}"
node_name_line += f"{'Total': >16}"
print(node_name_line)
print(" " + " ---------------" * (num_active_nodes + 1))

page_to_mb = prog.constant("PAGE_SIZE").value_() / (1024 * 1024)
for mm_item in node_meminfo_items:
should_skip = any(
[node_stats[mm_item] < 0 for node_stats in per_node_meminfo]
)
if should_skip:
continue

curr_line = f"{mm_item: <16}"
sum_node_stats_mb = 0
for node_stats in per_node_meminfo:
num_mb = node_stats[mm_item] * page_to_mb
sum_node_stats_mb += num_mb
curr_line += f"{num_mb: >16.2f}"

# Add a column that sums mm items from all NUMA nodes
curr_line += f"{sum_node_stats_mb: >16.2f}"
print(curr_line)


class NumastatModule(CorelensModule):
"""
Show various details about the memory management subsystem for all
active NUMA nodes in the system.
"""

name = "numastat"

def run(self, prog: Program, args: argparse.Namespace) -> None:
# Dump meminfo-like statistics for each NUMA node
show_all_nodes_meminfo(prog)
Loading

0 comments on commit 8ce8757

Please sign in to comment.