Add check for heavy memory fragmentation

This commit is contained in:
Carsten Grohmann 2023-03-07 21:48:23 +01:00
parent ec8b4c2374
commit 91caad251e
3 changed files with 92 additions and 1 deletions

View File

@ -87,6 +87,15 @@ THIS PROGRAM COMES WITH NO WARRANTY
.js-killed-proc-score--show { .js-killed-proc-score--show {
/* empty - used to hide/show OOM score of killed process */ /* empty - used to hide/show OOM score of killed process */
} }
.js-memory-fragmentation--show {
/* empty - used to hide/show details for memory fragmentation */
}
.js-memory-heavy-fragmentation--show {
/* empty - used to hide/show details for memory fragmentation */
}
.js-memory-no-heavy-fragmentation--show {
/* empty - used to hide/show details for memory fragmentation */
}
.js-oom-automatic--show { .js-oom-automatic--show {
/* empty - used to show sections for automatically triggered OOMs */ /* empty - used to show sections for automatically triggered OOMs */
@ -442,6 +451,21 @@ window.onerror = function (msg, url, lineNo, columnNo, errorObj) {
This analysis result is an estimate because the kernel reduces the minimum watermark in some rare cases. This analysis result is an estimate because the kernel reduces the minimum watermark in some rare cases.
</p> </p>
</div> </div>
<div>
<p class="js-text--default-hide js-memory-fragmentation--show">
Memory fragmentation is common. It occurs due to dynamic memory allocation by the kernel as well as by
all applications.
<span class="js-text--default-hide js-memory-heavy-fragmentation--show">
The system memory is heavily fragmented, because all chunks with an order &ge;
<span class="kconfig.PAGE_ALLOC_COSTLY_ORDER"></span> are in use. Allocation of larger contiguous
memory areas will fail.
</span>
<span class="js-text--default-hide js-memory-no-heavy-fragmentation--show">
The system memory is not heavily fragmented, because chunks with an order &ge;
<span class="kconfig.PAGE_ALLOC_COSTLY_ORDER"></span> are freely available.
</span>
</p>
</div>
</div> </div>
<h3>Details of analysis</h3> <h3>Details of analysis</h3>
@ -992,6 +1016,7 @@ window.onerror = function (msg, url, lineNo, columnNo, errorObj) {
<li>Add GFP flags for more kernel releases</li> <li>Add GFP flags for more kernel releases</li>
<li>Display missing memory chunks (buddyinfo) again</li> <li>Display missing memory chunks (buddyinfo) again</li>
<li>Add analysis why the memory request failed</li> <li>Add analysis why the memory request failed</li>
<li>Add check for heavy memory fragmentation</li>
<li>...</li> <li>...</li>
</ol> </ol>

View File

@ -2730,6 +2730,13 @@ class OOMResult:
@see: OOMAnalyser._analyse_alloc_failure() @see: OOMAnalyser._analyse_alloc_failure()
""" """
mem_fragmented = None
"""True if the memory is heavily fragmented. This means that the higher order has no free chunks.
@see: BaseKernelConfig.PAGE_ALLOC_COSTLY_ORDER, OOMAnalyser._check_for_memory_fragmentation()
@type: None | bool
"""
oom_entity = None oom_entity = None
""" """
State of this OOM (unknown, incomplete, ...) State of this OOM (unknown, incomplete, ...)
@ -3271,6 +3278,27 @@ class OOMAnalyser:
return True return True
return False return False
def _check_for_memory_fragmentation(self):
"""Check for heavy memory fragmentation. This means that the higher order has no free chunks.
Returns True, all high order chunk are in use.
Returns False, if high order chunks are available.
Returns None, if buddyinfo doesn't contain information for the requested node, order or zone
@see: BaseKernelConfig.PAGE_ALLOC_COSTLY_ORDER, OOMResult.mem_fragmented
@rtype: None|bool
"""
zone = self.oom_result.details["trigger_proc_mem_zone"]
node = self._extract_node_from_watermarks(zone)
if zone not in self.oom_result.details["_buddyinfo"]:
return None
self.oom_result.mem_fragmented = not self._check_free_chunks(
self.oom_result.kconfig.PAGE_ALLOC_COSTLY_ORDER, zone, node
)
self.oom_result.details[
"kconfig.PAGE_ALLOC_COSTLY_ORDER"
] = self.oom_result.kconfig.PAGE_ALLOC_COSTLY_ORDER
def _analyse_alloc_failure(self): def _analyse_alloc_failure(self):
""" """
Analyse why the memory allocation could be failed. Analyse why the memory allocation could be failed.
@ -3506,6 +3534,7 @@ class OOMAnalyser:
self._calc_trigger_process_values() self._calc_trigger_process_values()
self._calc_killed_process_values() self._calc_killed_process_values()
self._analyse_alloc_failure() self._analyse_alloc_failure()
self._check_for_memory_fragmentation()
def analyse(self): def analyse(self):
""" """
@ -4373,6 +4402,7 @@ Out of memory: Killed process 651 (unattended-upgr) total-vm:108020kB, anon-rss:
self._show_swap_usage() self._show_swap_usage()
self._show_ram_usage() self._show_ram_usage()
self._show_alloc_failure() self._show_alloc_failure()
self._show_memory_fragmentation()
# generate process table # generate process table
self._show_pstable() self._show_pstable()
@ -4405,6 +4435,16 @@ Out of memory: Killed process 651 (unattended-upgr) total-vm:108020kB, anon-rss:
else: else:
hide_elements(".js-alloc-failure--show") hide_elements(".js-alloc-failure--show")
def _show_memory_fragmentation(self):
"""Show details about memory fragmentation"""
if self.oom_result.mem_fragmented is None:
return
show_elements(".js-memory-fragmentation--show")
if self.oom_result.mem_fragmented:
show_elements(".js-memory-heavy-fragmentation--show")
else:
show_elements(".js-memory-no-heavy-fragmentation--show")
def _show_ram_usage(self): def _show_ram_usage(self):
"""Generate RAM usage diagram""" """Generate RAM usage diagram"""
ram_title_attr = ( ram_title_attr = (

28
test.py
View File

@ -250,6 +250,10 @@ class TestInBrowser(TestBase):
in explanation.text, in explanation.text,
"Memory allocation failure analysis not found", "Memory allocation failure analysis not found",
) )
self.assertTrue(
"The system memory is not heavily fragmented" in explanation.text,
"Missing statement about not fragmented memory",
)
mem_node_info = self.driver.find_element(By.CLASS_NAME, "mem_node_info") mem_node_info = self.driver.find_element(By.CLASS_NAME, "mem_node_info")
self.assertEqual( self.assertEqual(
@ -322,7 +326,12 @@ class TestInBrowser(TestBase):
) )
self.assertTrue( self.assertTrue(
"The request failed because" not in explanation.text, "The request failed because" not in explanation.text,
"Memory allocation failure analysis found", "Unexpected memory allocation failure analysis found",
)
self.assertTrue(
"The system memory is heavily fragmented" not in explanation.text
and "The system memory is not heavily fragmented" not in explanation.text,
"Unexpected memory fragmentation statement found",
) )
mem_node_info = self.driver.find_element(By.CLASS_NAME, "mem_node_info") mem_node_info = self.driver.find_element(By.CLASS_NAME, "mem_node_info")
@ -1026,6 +1035,23 @@ Hardware name: HP ProLiant DL385 G7, BIOS A18 12/08/2012
"Unexpected reason why the memory allocation has failed.", "Unexpected reason why the memory allocation has failed.",
) )
def test_012_fragmentation(self):
"""Test memory fragmentation"""
oom = OOMAnalyser.OOMEntity(OOMAnalyser.OOMDisplay.example_rhel7)
analyser = OOMAnalyser.OOMAnalyser(oom)
success = analyser.analyse()
self.assertTrue(success, "OOM analysis failed")
zone = analyser.oom_result.details["trigger_proc_mem_zone"]
node = analyser._extract_node_from_watermarks(zone)
mem_fragmented = not analyser._check_free_chunks(
analyser.oom_result.kconfig.PAGE_ALLOC_COSTLY_ORDER, zone, node
)
self.assertFalse(
mem_fragmented,
'Memory of Node %d, Zone "%s" is not fragmented, but reported as fragmented'
% (node, zone),
)
if __name__ == "__main__": if __name__ == "__main__":
unittest.main(verbosity=2) unittest.main(verbosity=2)