From 91caad251e81c30849699b4a527ea400bbdba0e8 Mon Sep 17 00:00:00 2001 From: Carsten Grohmann Date: Tue, 7 Mar 2023 21:48:23 +0100 Subject: [PATCH] Add check for heavy memory fragmentation --- OOMAnalyser.html | 25 +++++++++++++++++++++++++ OOMAnalyser.py | 40 ++++++++++++++++++++++++++++++++++++++++ test.py | 28 +++++++++++++++++++++++++++- 3 files changed, 92 insertions(+), 1 deletion(-) diff --git a/OOMAnalyser.html b/OOMAnalyser.html index db48703..26e14fe 100644 --- a/OOMAnalyser.html +++ b/OOMAnalyser.html @@ -87,6 +87,15 @@ THIS PROGRAM COMES WITH NO WARRANTY .js-killed-proc-score--show { /* empty - used to hide/show OOM score of killed process */ } + .js-memory-fragmentation--show { + /* empty - used to hide/show details for memory fragmentation */ + } + .js-memory-heavy-fragmentation--show { + /* empty - used to hide/show details for memory fragmentation */ + } + .js-memory-no-heavy-fragmentation--show { + /* empty - used to hide/show details for memory fragmentation */ + } .js-oom-automatic--show { /* empty - used to show sections for automatically triggered OOMs */ @@ -442,6 +451,21 @@ window.onerror = function (msg, url, lineNo, columnNo, errorObj) { This analysis result is an estimate because the kernel reduces the minimum watermark in some rare cases.

+
+

+ Memory fragmentation is common. It occurs due to dynamic memory allocation by the kernel as well as by + all applications. + + The system memory is heavily fragmented, because all chunks with an order ≥ + are in use. Allocation of larger contiguous + memory areas will fail. + + + The system memory is not heavily fragmented, because chunks with an order ≥ + are freely available. + +

+

Details of analysis

@@ -992,6 +1016,7 @@ window.onerror = function (msg, url, lineNo, columnNo, errorObj) {
  • Add GFP flags for more kernel releases
  • Display missing memory chunks (buddyinfo) again
  • Add analysis why the memory request failed
  • +
  • Add check for heavy memory fragmentation
  • ...
  • diff --git a/OOMAnalyser.py b/OOMAnalyser.py index aee3b38..9fb3322 100644 --- a/OOMAnalyser.py +++ b/OOMAnalyser.py @@ -2730,6 +2730,13 @@ class OOMResult: @see: OOMAnalyser._analyse_alloc_failure() """ + mem_fragmented = None + """True if the memory is heavily fragmented. This means that the higher order has no free chunks. + + @see: BaseKernelConfig.PAGE_ALLOC_COSTLY_ORDER, OOMAnalyser._check_for_memory_fragmentation() + @type: None | bool + """ + oom_entity = None """ State of this OOM (unknown, incomplete, ...) @@ -3271,6 +3278,27 @@ class OOMAnalyser: return True return False + def _check_for_memory_fragmentation(self): + """Check for heavy memory fragmentation. This means that the higher order has no free chunks. + + Returns True, all high order chunk are in use. + Returns False, if high order chunks are available. + Returns None, if buddyinfo doesn't contain information for the requested node, order or zone + + @see: BaseKernelConfig.PAGE_ALLOC_COSTLY_ORDER, OOMResult.mem_fragmented + @rtype: None|bool + """ + zone = self.oom_result.details["trigger_proc_mem_zone"] + node = self._extract_node_from_watermarks(zone) + if zone not in self.oom_result.details["_buddyinfo"]: + return None + self.oom_result.mem_fragmented = not self._check_free_chunks( + self.oom_result.kconfig.PAGE_ALLOC_COSTLY_ORDER, zone, node + ) + self.oom_result.details[ + "kconfig.PAGE_ALLOC_COSTLY_ORDER" + ] = self.oom_result.kconfig.PAGE_ALLOC_COSTLY_ORDER + def _analyse_alloc_failure(self): """ Analyse why the memory allocation could be failed. @@ -3506,6 +3534,7 @@ class OOMAnalyser: self._calc_trigger_process_values() self._calc_killed_process_values() self._analyse_alloc_failure() + self._check_for_memory_fragmentation() def analyse(self): """ @@ -4373,6 +4402,7 @@ Out of memory: Killed process 651 (unattended-upgr) total-vm:108020kB, anon-rss: self._show_swap_usage() self._show_ram_usage() self._show_alloc_failure() + self._show_memory_fragmentation() # generate process table self._show_pstable() @@ -4405,6 +4435,16 @@ Out of memory: Killed process 651 (unattended-upgr) total-vm:108020kB, anon-rss: else: hide_elements(".js-alloc-failure--show") + def _show_memory_fragmentation(self): + """Show details about memory fragmentation""" + if self.oom_result.mem_fragmented is None: + return + show_elements(".js-memory-fragmentation--show") + if self.oom_result.mem_fragmented: + show_elements(".js-memory-heavy-fragmentation--show") + else: + show_elements(".js-memory-no-heavy-fragmentation--show") + def _show_ram_usage(self): """Generate RAM usage diagram""" ram_title_attr = ( diff --git a/test.py b/test.py index d9fd564..9d43cba 100755 --- a/test.py +++ b/test.py @@ -250,6 +250,10 @@ class TestInBrowser(TestBase): in explanation.text, "Memory allocation failure analysis not found", ) + self.assertTrue( + "The system memory is not heavily fragmented" in explanation.text, + "Missing statement about not fragmented memory", + ) mem_node_info = self.driver.find_element(By.CLASS_NAME, "mem_node_info") self.assertEqual( @@ -322,7 +326,12 @@ class TestInBrowser(TestBase): ) self.assertTrue( "The request failed because" not in explanation.text, - "Memory allocation failure analysis found", + "Unexpected memory allocation failure analysis found", + ) + self.assertTrue( + "The system memory is heavily fragmented" not in explanation.text + and "The system memory is not heavily fragmented" not in explanation.text, + "Unexpected memory fragmentation statement found", ) mem_node_info = self.driver.find_element(By.CLASS_NAME, "mem_node_info") @@ -1026,6 +1035,23 @@ Hardware name: HP ProLiant DL385 G7, BIOS A18 12/08/2012 "Unexpected reason why the memory allocation has failed.", ) + def test_012_fragmentation(self): + """Test memory fragmentation""" + oom = OOMAnalyser.OOMEntity(OOMAnalyser.OOMDisplay.example_rhel7) + analyser = OOMAnalyser.OOMAnalyser(oom) + success = analyser.analyse() + self.assertTrue(success, "OOM analysis failed") + zone = analyser.oom_result.details["trigger_proc_mem_zone"] + node = analyser._extract_node_from_watermarks(zone) + mem_fragmented = not analyser._check_free_chunks( + analyser.oom_result.kconfig.PAGE_ALLOC_COSTLY_ORDER, zone, node + ) + self.assertFalse( + mem_fragmented, + 'Memory of Node %d, Zone "%s" is not fragmented, but reported as fragmented' + % (node, zone), + ) + if __name__ == "__main__": unittest.main(verbosity=2)