Extract and store free memory chunks

This commit is contained in:
Carsten Grohmann 2022-11-27 11:09:46 +01:00
parent d23f2dc23c
commit f281642379
2 changed files with 114 additions and 0 deletions

View File

@ -508,6 +508,13 @@ class BaseKernelConfig:
rec_oom_end = re.compile(r"^Killed process \d+", re.MULTILINE)
"""RE to match the last line of an OOM block"""
zoneinfo_start = "Node 0 DMA: "
"""
Pattern to find the start of the memory chunk information (buddyinfo)
:type: str
"""
def __init__(self):
super().__init__()
@ -2680,6 +2687,11 @@ class OOMAnalyser:
)
"""RE to match the OOM line with kernel version"""
REC_FREE_MEMORY_CHUNKS = re.compile(
"Node (?P<node>\d+) (?P<zone>DMA|DMA32|Normal): (?P<zone_usage>.*) = \d+kB"
)
"""RE to extract free memory chunks in a zone"""
def __init__(self, oom):
self.oom_entity = oom
self.oom_result = OOMResult()
@ -2888,6 +2900,7 @@ class OOMAnalyser:
self._extract_pstable()
self._extract_gpf_mask()
self._extract_buddyinfo()
def _extract_pstable(self):
"""Extract process table"""
@ -2906,6 +2919,59 @@ class OOMAnalyser:
self.oom_result.details["_pstable"][pid] = {}
self.oom_result.details["_pstable"][pid].update(details)
def _extract_buddyinfo(self):
"""Extract information about free areas in all zones
The migration types "(UEM)" or similar are not evaluated. They are documented in
mm/page_alloc.c:show_migration_types().
This function fills:
* OOMResult.details["_buddyinfo"] with [<node>][<zone>][<order>] = <number of free chunks>
* OOMResult.details["_buddyinfo_pagesize_kb"] with the extracted page size
"""
self.oom_result.details["_buddyinfo"] = {}
self.oom_result.details["_buddyinfo_pagesize_kb"] = None
buddy_info = self.oom_result.details["_buddyinfo"]
self.oom_entity.find_text(self.oom_result.kconfig.zoneinfo_start)
# Currently omm_entity point to the first line of the buddyinfo.
# The iterator protocol uses the next() call. However, this will cause the
# current line to be skipped.
# Therefore, we reset the counter by one line.
self.oom_entity.back()
for line in self.oom_entity:
match = self.REC_FREE_MEMORY_CHUNKS.match(line)
if not match:
continue
node = int(match.group("node"))
zone = match.group("zone")
if zone not in buddy_info:
buddy_info[zone] = {}
zone_details = buddy_info[zone]
order = -1 # to start with 0 after the first increment in for loop
for element in match.group("zone_usage").split(" "):
if element.startswith("("): # skip migration types
continue
order += 1
if order not in zone_details:
zone_details[order] = {}
order_details = zone_details[order]
count = element.split("*")[0]
count.strip()
order_details[node] = int(count)
if "_total" not in order_details:
order_details["_total"] = 0
order_details["_total"] += order_details[node]
if not self.oom_result.details["_buddyinfo_pagesize_kb"] and order == 0:
size = element.split("*")[1]
size = size[:-2] # strip "kB"
self.oom_result.details["_buddyinfo_pagesize_kb"] = int(size)
def _gfp_hex2flags(self, hexvalue):
"""\
Convert the hexadecimal value into flags specified by definition
@ -3009,6 +3075,13 @@ class OOMAnalyser:
self.oom_result.details["trigger_proc_requested_memory_pages"]
* self.oom_result.details["page_size_kb"]
)
if "DMA32" in self.oom_result.details["trigger_proc_gfp_mask"]:
zone = "DMA32"
elif "DMA" in self.oom_result.details["trigger_proc_gfp_mask"]:
zone = "DMA"
else:
zone = "Normal"
self.oom_result.details["trigger_proc_mem_zone"] = zone
def _calc_killed_process_values(self):
"""Calculate all values related with the killed process"""

41
test.py
View File

@ -855,6 +855,47 @@ Hardware name: HP ProLiant DL385 G7, BIOS A18 12/08/2012
% (kversion, min_version),
)
def test_009_extract_zoneinfo(self):
"""Test extracting zone usage information"""
oom = OOMAnalyser.OOMEntity(OOMAnalyser.OOMDisplay.example_rhel7)
analyser = OOMAnalyser.OOMAnalyser(oom)
success = analyser.analyse()
self.assertTrue(success, "OOM analysis failed")
self.assertEqual(
analyser.oom_result.kconfig.release,
(3, 10, ".el7."),
"Wrong KernelConfig release",
)
buddyinfo = analyser.oom_result.details["_buddyinfo"]
for zone, order, node, except_count in [
("Normal", 6, 0, 0), # order 6 - page size 256kB
("Normal", 6, 1, 2), # order 6 - page size 256kB
("Normal", 6, "_total", 0 + 2), # order 6 - page size 256kB
("Normal", 0, 0, 1231), # order 0 - page size 4kB
("Normal", 0, 1, 2245), # order 0 - page size 4kB
("Normal", 0, "_total", 1231 + 2245), # order 0 - page size 4kB
("DMA", 5, 0, 1), # order 5 - page size 128kB
("DMA", 5, "_total", 1), # order 5 - page size 128kB
("DMA32", 4, 0, 157), # order 4 - page size 64k
("DMA32", 4, "_total", 157), # order 4 - page size 64k
]:
self.assertTrue(
zone in buddyinfo, "Missing details for zone %s in buddy info" % zone
)
zone_info = buddyinfo[zone]
self.assertTrue(
order in zone_info,
'Missing details for order "%d" in buddy info' % order,
)
order_info = zone_info[order]
count = order_info[node]
self.assertTrue(
count == except_count,
'Wrong chunk count for order %d in zone "%s" for node "%s" (got: %d, expect %d)'
% (order, zone, node, count, except_count),
)
if __name__ == "__main__":
unittest.main(verbosity=2)