From 91caad251e81c30849699b4a527ea400bbdba0e8 Mon Sep 17 00:00:00 2001
From: Carsten Grohmann
Date: Tue, 7 Mar 2023 21:48:23 +0100
Subject: [PATCH] Add check for heavy memory fragmentation
---
OOMAnalyser.html | 25 +++++++++++++++++++++++++
OOMAnalyser.py | 40 ++++++++++++++++++++++++++++++++++++++++
test.py | 28 +++++++++++++++++++++++++++-
3 files changed, 92 insertions(+), 1 deletion(-)
diff --git a/OOMAnalyser.html b/OOMAnalyser.html
index db48703..26e14fe 100644
--- a/OOMAnalyser.html
+++ b/OOMAnalyser.html
@@ -87,6 +87,15 @@ THIS PROGRAM COMES WITH NO WARRANTY
.js-killed-proc-score--show {
/* empty - used to hide/show OOM score of killed process */
}
+ .js-memory-fragmentation--show {
+ /* empty - used to hide/show details for memory fragmentation */
+ }
+ .js-memory-heavy-fragmentation--show {
+ /* empty - used to hide/show details for memory fragmentation */
+ }
+ .js-memory-no-heavy-fragmentation--show {
+ /* empty - used to hide/show details for memory fragmentation */
+ }
.js-oom-automatic--show {
/* empty - used to show sections for automatically triggered OOMs */
@@ -442,6 +451,21 @@ window.onerror = function (msg, url, lineNo, columnNo, errorObj) {
This analysis result is an estimate because the kernel reduces the minimum watermark in some rare cases.
+
+
+ Memory fragmentation is common. It occurs due to dynamic memory allocation by the kernel as well as by
+ all applications.
+
+ The system memory is heavily fragmented, because all chunks with an order ≥
+ are in use. Allocation of larger contiguous
+ memory areas will fail.
+
+
+ The system memory is not heavily fragmented, because chunks with an order ≥
+ are freely available.
+
+
+
Details of analysis
@@ -992,6 +1016,7 @@ window.onerror = function (msg, url, lineNo, columnNo, errorObj) {
Add GFP flags for more kernel releases
Display missing memory chunks (buddyinfo) again
Add analysis why the memory request failed
+ Add check for heavy memory fragmentation
...
diff --git a/OOMAnalyser.py b/OOMAnalyser.py
index aee3b38..9fb3322 100644
--- a/OOMAnalyser.py
+++ b/OOMAnalyser.py
@@ -2730,6 +2730,13 @@ class OOMResult:
@see: OOMAnalyser._analyse_alloc_failure()
"""
+ mem_fragmented = None
+ """True if the memory is heavily fragmented. This means that the higher order has no free chunks.
+
+ @see: BaseKernelConfig.PAGE_ALLOC_COSTLY_ORDER, OOMAnalyser._check_for_memory_fragmentation()
+ @type: None | bool
+ """
+
oom_entity = None
"""
State of this OOM (unknown, incomplete, ...)
@@ -3271,6 +3278,27 @@ class OOMAnalyser:
return True
return False
+ def _check_for_memory_fragmentation(self):
+ """Check for heavy memory fragmentation. This means that the higher order has no free chunks.
+
+ Returns True, all high order chunk are in use.
+ Returns False, if high order chunks are available.
+ Returns None, if buddyinfo doesn't contain information for the requested node, order or zone
+
+ @see: BaseKernelConfig.PAGE_ALLOC_COSTLY_ORDER, OOMResult.mem_fragmented
+ @rtype: None|bool
+ """
+ zone = self.oom_result.details["trigger_proc_mem_zone"]
+ node = self._extract_node_from_watermarks(zone)
+ if zone not in self.oom_result.details["_buddyinfo"]:
+ return None
+ self.oom_result.mem_fragmented = not self._check_free_chunks(
+ self.oom_result.kconfig.PAGE_ALLOC_COSTLY_ORDER, zone, node
+ )
+ self.oom_result.details[
+ "kconfig.PAGE_ALLOC_COSTLY_ORDER"
+ ] = self.oom_result.kconfig.PAGE_ALLOC_COSTLY_ORDER
+
def _analyse_alloc_failure(self):
"""
Analyse why the memory allocation could be failed.
@@ -3506,6 +3534,7 @@ class OOMAnalyser:
self._calc_trigger_process_values()
self._calc_killed_process_values()
self._analyse_alloc_failure()
+ self._check_for_memory_fragmentation()
def analyse(self):
"""
@@ -4373,6 +4402,7 @@ Out of memory: Killed process 651 (unattended-upgr) total-vm:108020kB, anon-rss:
self._show_swap_usage()
self._show_ram_usage()
self._show_alloc_failure()
+ self._show_memory_fragmentation()
# generate process table
self._show_pstable()
@@ -4405,6 +4435,16 @@ Out of memory: Killed process 651 (unattended-upgr) total-vm:108020kB, anon-rss:
else:
hide_elements(".js-alloc-failure--show")
+ def _show_memory_fragmentation(self):
+ """Show details about memory fragmentation"""
+ if self.oom_result.mem_fragmented is None:
+ return
+ show_elements(".js-memory-fragmentation--show")
+ if self.oom_result.mem_fragmented:
+ show_elements(".js-memory-heavy-fragmentation--show")
+ else:
+ show_elements(".js-memory-no-heavy-fragmentation--show")
+
def _show_ram_usage(self):
"""Generate RAM usage diagram"""
ram_title_attr = (
diff --git a/test.py b/test.py
index d9fd564..9d43cba 100755
--- a/test.py
+++ b/test.py
@@ -250,6 +250,10 @@ class TestInBrowser(TestBase):
in explanation.text,
"Memory allocation failure analysis not found",
)
+ self.assertTrue(
+ "The system memory is not heavily fragmented" in explanation.text,
+ "Missing statement about not fragmented memory",
+ )
mem_node_info = self.driver.find_element(By.CLASS_NAME, "mem_node_info")
self.assertEqual(
@@ -322,7 +326,12 @@ class TestInBrowser(TestBase):
)
self.assertTrue(
"The request failed because" not in explanation.text,
- "Memory allocation failure analysis found",
+ "Unexpected memory allocation failure analysis found",
+ )
+ self.assertTrue(
+ "The system memory is heavily fragmented" not in explanation.text
+ and "The system memory is not heavily fragmented" not in explanation.text,
+ "Unexpected memory fragmentation statement found",
)
mem_node_info = self.driver.find_element(By.CLASS_NAME, "mem_node_info")
@@ -1026,6 +1035,23 @@ Hardware name: HP ProLiant DL385 G7, BIOS A18 12/08/2012
"Unexpected reason why the memory allocation has failed.",
)
+ def test_012_fragmentation(self):
+ """Test memory fragmentation"""
+ oom = OOMAnalyser.OOMEntity(OOMAnalyser.OOMDisplay.example_rhel7)
+ analyser = OOMAnalyser.OOMAnalyser(oom)
+ success = analyser.analyse()
+ self.assertTrue(success, "OOM analysis failed")
+ zone = analyser.oom_result.details["trigger_proc_mem_zone"]
+ node = analyser._extract_node_from_watermarks(zone)
+ mem_fragmented = not analyser._check_free_chunks(
+ analyser.oom_result.kconfig.PAGE_ALLOC_COSTLY_ORDER, zone, node
+ )
+ self.assertFalse(
+ mem_fragmented,
+ 'Memory of Node %d, Zone "%s" is not fragmented, but reported as fragmented'
+ % (node, zone),
+ )
+
if __name__ == "__main__":
unittest.main(verbosity=2)