Fix interpretation of the last OOM line

The memory statistics in the last OOM line (total-vm, anon-rss,
file-rss, shmem-rss) are overall and not related to the killed process.
This commit is contained in:
Carsten Grohmann 2020-01-05 01:00:02 +01:00
parent 85308f6d03
commit 374cc2e910
2 changed files with 110 additions and 62 deletions

View File

@ -203,16 +203,13 @@ function goBack() {
The system couldn't satisfy this request and started the OOM killer to free memory. The OOM killer The system couldn't satisfy this request and started the OOM killer to free memory. The OOM killer
calculates a score for each process and terminates the process with the highest score. calculates a score for each process and terminates the process with the highest score.
The it terminates &quot;<span id="explain_killed_proc_name"></span>&quot; (pid It terminates &quot;<span id="explain_killed_proc_name"></span>&quot; (pid
<span id="explain_killed_proc_pid"></span>) to satisfy the initial memory request. <span id="explain_killed_proc_pid"></span>) to satisfy the initial memory request.
</p> </p>
<p> <p>
The terminated process uses <span class="text--append-suffix-kbytes" id="explain_killed_proc_rss_kb"></span> The terminated process uses <span class="text--append-suffix-kbytes" id="explain_killed_proc_rss_kb"></span>
(<span class="text--append-suffix-percent" id="explain_killed_proc_rss_percent"></span>) resident memory. (<span class="text--append-suffix-percent" id="explain_killed_proc_rss_percent"></span>) resident memory
These values are the sum of resident anonymous memory (anon-rss) and resident file mapping memory and has an OOM score of <span id="explain_killed_proc_score"></span>.
(file-rss). Resident shared memory
(<span class="text--append-suffix-kbytes" id="explain_killed_proc_shmem_rss_kb"></span>) is ignored in
this calculation.
<p> <p>
The system has <span class="text--append-suffix-kbytes" id="explain_ram_kb"></span> The system has <span class="text--append-suffix-kbytes" id="explain_ram_kb"></span>
(<span class="text--append-suffix-pages" id="explain_ram_pages"></span> * (<span class="text--append-suffix-pages" id="explain_ram_pages"></span> *
@ -221,6 +218,9 @@ function goBack() {
That's <span class="text--append-suffix-kbytes" id="explain_total_memory_kb"></span> total. That's <span class="text--append-suffix-kbytes" id="explain_total_memory_kb"></span> total.
</p> </p>
<!-- <!--
r'^Killed process \d+ \(.*\) total-vm:(?P<system_total_vm_kb>\d+)kB, anon-rss:(?P<system_anon_rss_kb>\d+)kB, '
r'file-rss:(?P<system_file_rss_kb>\d+)kB, shmem-rss:(?P<system_shmem_rss_kb>\d+)kB.*'
<p> <p>
<span class="text--append-suffix-kbytes" id="explain_used_memory_kb"></span> <span class="text--append-suffix-kbytes" id="explain_used_memory_kb"></span>
(<span class="text--append-suffix-percent" id="explain_used_memory_percent"></span>) of the physical memory (<span class="text--append-suffix-percent" id="explain_used_memory_percent"></span>) of the physical memory
@ -302,27 +302,15 @@ function goBack() {
<td>Programs with the highest OOM score are terminated first.</td> <td>Programs with the highest OOM score are terminated first.</td>
</tr> </tr>
<tr> <tr>
<td>Virtual Memory <br> (total-vm) </td> <td>Virtual Memory <br> (total_vm) </td>
<td id="killed_proc_vm_kb" class="text--append-suffix-kbytes"></td> <td id="killed_proc_vm_kb" class="text--append-suffix-kbytes"></td>
<td>Virtual memory used by this process.</td> <td>Virtual memory used by this process.</td>
</tr> </tr>
<tr> <tr>
<td>Resident anonymous memory <br> (anon-rss) </td> <td>Resident anonymous memory <br> (rss) </td>
<td id="killed_proc_anon_rss_kb" class="text--append-suffix-kbytes"></td> <td class="text--append-suffix-kbytes" id="killed_proc_rss_kb"></td>
<td>Part of the virtual process memory mapped into RAM.</td> <td>Part of the virtual process memory mapped into RAM.</td>
</tr> </tr>
<tr>
<td>Resident file mapping memory <br> (file-rss) </td>
<td id="killed_proc_file_rss_kb" class="text--append-suffix-kbytes"></td>
<td>Files which have been mapped into RAM (with
<a href="http://man7.org/linux/man-pages/man2/mmap.2.html">mmap(2).</a>)
</td>
</tr>
<tr>
<td>Resident shared memory <br> (shmem-rss) </td>
<td id="killed_proc_shmem_rss_kb" class="text--append-suffix-kbytes"></td>
<td>This may include System V shared memory and shared anonymous memory.</td>
</tr>
<!-- Memory Usage Graphs --> <!-- Memory Usage Graphs -->
@ -700,6 +688,7 @@ function goBack() {
<h4>General</h4> <h4>General</h4>
<ol> <ol>
<li>Add a textual summary of the analysis</li> <li>Add a textual summary of the analysis</li>
<li>Fix interpretation of the last OOM line</li>
<li>...</li> <li>...</li>
</ol> </ol>

View File

@ -337,17 +337,24 @@ class OOMAnalyser(object):
r')?', r')?',
re.MULTILINE) re.MULTILINE)
REC_PROCESSES = re.compile( REC_PROCESS_TABLE = re.compile(
r'^\[ pid \].*(?:\n)' r'^\[ pid \].*(?:\n)'
r'(^(\[[ \d]+.+)(?:\n))+', r'(^(\[[ \d]+.+)(?:\n))+',
re.MULTILINE) re.MULTILINE)
REC_KILLED = re.compile( REC_PROCESS_LINE = re.compile(
r'^\[(?P<pid>[ \d]+)\]\s+(?P<uid>\d+)\s+(?P<tgid>\d+)\s+(?P<total_vm>\d+)\s+(?P<rss>\d+)\s+(?P<nr_ptes>\d+)\s+'
r'(?P<swapents>\d+)\s+(?P<oom_score_adj>-?\d+)\s+(?P<name>.+)\s*')
REC_OOM_KILL_PROCESS = re.compile(
r'^Out of memory: Kill process (?P<killed_proc_pid>\d+) \((?P<killed_proc_name>[\w ]+)\) ' r'^Out of memory: Kill process (?P<killed_proc_pid>\d+) \((?P<killed_proc_name>[\w ]+)\) '
r'score (?P<killed_proc_score>\d+) or sacrifice child' r'score (?P<killed_proc_score>\d+) or sacrifice child',
r'(?:\n)' re.MULTILINE
r'Killed process \d+ \(.*\) total-vm:(?P<killed_proc_vm_kb>\d+)kB, anon-rss:(?P<killed_proc_anon_rss_kb>\d+)kB, ' )
r'file-rss:(?P<killed_proc_file_rss_kb>\d+)kB, shmem-rss:(?P<killed_proc_shmem_rss_kb>\d+)kB',
REC_KILLED_PROCESS = re.compile(
r'^Killed process \d+ \(.*\) total-vm:(?P<system_total_vm_kb>\d+)kB, anon-rss:(?P<system_anon_rss_kb>\d+)kB, '
r'file-rss:(?P<system_file_rss_kb>\d+)kB, shmem-rss:(?P<system_shmem_rss_kb>\d+)kB.*',
re.MULTILINE) re.MULTILINE)
lines = [] lines = []
@ -443,9 +450,10 @@ class OOMAnalyser(object):
self.results = {} self.results = {}
for rec in [self.REC_INVOKED_OOMKILLER, for rec in [self.REC_INVOKED_OOMKILLER,
self.REC_KILLED, self.REC_KILLED_PROCESS,
self.REC_MEMINFO_1, self.REC_MEMINFO_1,
self.REC_MEMINFO_2, self.REC_MEMINFO_2,
self.REC_OOM_KILL_PROCESS,
self.REC_PAGECACHE, self.REC_PAGECACHE,
self.REC_PAGEINFO, self.REC_PAGEINFO,
self.REC_PID_KERNELVERSION, self.REC_PID_KERNELVERSION,
@ -453,10 +461,11 @@ class OOMAnalyser(object):
]: ]:
match = rec.search(self.oom_entity.text) match = rec.search(self.oom_entity.text)
if match: if match:
gd = match.groupdict()
self.results.update(match.groupdict()) self.results.update(match.groupdict())
for groupname, rec in [('mem_node_info', self.REC_MEM_NODEINFO), for groupname, rec in [('mem_node_info', self.REC_MEM_NODEINFO),
('process_table', self.REC_PROCESSES), ('process_table', self.REC_PROCESS_TABLE),
]: ]:
match = rec.search(self.oom_entity.text) match = rec.search(self.oom_entity.text)
if match: if match:
@ -473,6 +482,21 @@ class OOMAnalyser(object):
call_trace += "{}\n".format(line.strip()) call_trace += "{}\n".format(line.strip())
self.results['call_trace'] = call_trace self.results['call_trace'] = call_trace
# extract process table
self.results['_processes'] = {}
self.oom_entity.find_text('[ pid ]')
for line in self.oom_entity:
if not line.startswith('['):
break
if line.startswith('[ pid ]'):
continue
match = self.REC_PROCESS_LINE.match(line)
if match:
details = match.groupdict()
pid = details.pop('pid')
self.results['_processes'][pid] = {}
self.results['_processes'][pid].update(details)
def _hex2flags(self, hexvalue, flag_definition): def _hex2flags(self, hexvalue, flag_definition):
"""\ """\
Convert the hexadecimal value into flags specified by definition Convert the hexadecimal value into flags specified by definition
@ -544,44 +568,43 @@ class OOMAnalyser(object):
return lvalue return lvalue
def _calc_from_oom_details(self): def _convert_numeric_results_to_integer(self):
""" """Convert all *_pages and *_kb to integer"""
Calculate values from already extracted details
@see: self.results
"""
# convert all *_pages and *_kb to integer
# __pragma__ ('jsiter') # __pragma__ ('jsiter')
for item in self.results: for item in self.results:
if self.results[item] is None: if self.results[item] is None:
self.results[item] = '<not found>' self.results[item] = '<not found>'
continue continue
if item.endswith('_kb') or item.endswith('_pages'): if item.endswith('_kb') or item.endswith('_pages') or item.endswith('_pid') or \
item == 'trigger_proc_order':
try: try:
self.results[item] = int(self.results[item]) self.results[item] = int(self.results[item])
except: except:
error('Converting item {}: {} to integer failed'.format(item, self.results[item])) error('Converting item "{}={}" to integer failed'.format(item, self.results[item]))
# __pragma__ ('nojsiter') # __pragma__ ('nojsiter')
kernel_version = self.results.get('kernel_version', '') def _convert_numeric_process_values_to_integer(self):
if 'x86_64' in kernel_version: """Convert numeric values in process table to integer values"""
self.results['platform'] = 'x86 64bit' ps = self.results['_processes']
else: # TODO Check if transcrypt issue: pragma jsiter for the whole block "for pid_str in ps: ..."
self.results['platform'] = 'unknown' # sets item in "for item in ['uid',..." to 0 instead of 'uid'
# jsiter is necessary to iterate over ps
for pid_str in ps.keys():
converted = {}
process = ps[pid_str]
for item in ['uid', 'tgid', 'total_vm', 'rss', 'nr_ptes', 'swapents', 'oom_score_adj']:
try:
converted[item] = int(process[item])
except:
error('Converting process parameter "{}={}" to integer failed'.format(item, process[item]))
self.results['dist'] = self.guess_distribution(kernel_version) pid_int = int(pid_str)
del ps[pid_str]
# educated guess ps[pid_int] = converted
self.results['page_size'] = 4
self.results['swap_cache_kb'] = self.results['swap_cache_pages'] * self.results['page_size']
del self.results['swap_cache_pages']
# SwapUsed = SwapTotal - SwapFree - SwapCache
self.results['swap_used_kb'] = self.results['swap_total_kb'] - self.results['swap_free_kb'] - \
self.results['swap_cache_kb']
def _calc_trigger_process_values(self):
"""Calculate all values related with the trigger process"""
self.results['trigger_proc_requested_memory'] = 2 ** self.results['trigger_proc_order'] self.results['trigger_proc_requested_memory'] = 2 ** self.results['trigger_proc_order']
self.results['trigger_proc_requested_memory_kbytes'] = self.results['page_size'] self.results['trigger_proc_requested_memory_kbytes'] = self.results['page_size']
@ -600,8 +623,28 @@ class OOMAnalyser(object):
# already fully processed and no own element to display -> delete otherwise an error msg will be shown # already fully processed and no own element to display -> delete otherwise an error msg will be shown
del self.results['trigger_proc_gfp_flags'] del self.results['trigger_proc_gfp_flags']
def guess_distribution(self, kernel_version): def _calc_killed_process_values(self):
"""Guess distribution from kernel version""" """Calculate all values related with the killed process"""
self.results['killed_proc_rss_kb'] = self.results['_processes'][self.results['killed_proc_pid']]['rss']
self.results['killed_proc_vm_kb'] = self.results['_processes'][self.results['killed_proc_pid']]['total_vm']
def _calc_swap_values(self):
"""Calculate all swap related values"""
self.results['swap_cache_kb'] = self.results['swap_cache_pages'] * self.results['page_size']
del self.results['swap_cache_pages']
# SwapUsed = SwapTotal - SwapFree - SwapCache
self.results['swap_used_kb'] = self.results['swap_total_kb'] - self.results['swap_free_kb'] - \
self.results['swap_cache_kb']
def _determinate_platform_and_distribution(self):
"""Determinate platform and distribution"""
kernel_version = self.results.get('kernel_version', '')
if 'x86_64' in kernel_version:
self.results['platform'] = 'x86 64bit'
else:
self.results['platform'] = 'unknown'
dist = 'unknown' dist = 'unknown'
if '.el7uek' in kernel_version: if '.el7uek' in kernel_version:
dist = 'Oracle Linux 7 (Unbreakable Enterprise Kernel)' dist = 'Oracle Linux 7 (Unbreakable Enterprise Kernel)'
@ -615,7 +658,23 @@ class OOMAnalyser(object):
dist = 'Arch Linux' dist = 'Arch Linux'
elif '_generic' in kernel_version: elif '_generic' in kernel_version:
dist = 'Ubuntu' dist = 'Ubuntu'
return dist self.results['dist'] = dist
def _calc_from_oom_details(self):
"""
Calculate values from already extracted details
@see: self.results
"""
# educated guess
self.results['page_size'] = 4
self._convert_numeric_results_to_integer()
self._convert_numeric_process_values_to_integer()
self._calc_trigger_process_values()
self._calc_killed_process_values()
self._calc_swap_values()
self._determinate_platform_and_distribution()
def analyse(self): def analyse(self):
"""Extract and calculate values from the given OOM object""" """Extract and calculate values from the given OOM object"""
@ -1062,8 +1121,8 @@ Killed process 6576 (java) total-vm:33914892kB, anon-rss:20629004kB, file-rss:0k
show_element('analysis') show_element('analysis')
# copy entries for explanation section # copy entries for explanation section
for i in ('killed_proc_name', 'killed_proc_pid', 'killed_proc_shmem_rss_kb', 'page_size', 'ram_pages', for i in ('killed_proc_name', 'killed_proc_pid', 'killed_proc_rss_kb', 'killed_proc_score', 'page_size',
'swap_total_kb', 'swap_used_kb', 'trigger_proc_name', 'trigger_proc_pid', 'ram_pages', 'swap_total_kb', 'swap_used_kb', 'trigger_proc_name', 'trigger_proc_pid',
'trigger_proc_requested_memory', 'trigger_proc_requested_memory_kbytes', 'trigger_proc_requested_memory', 'trigger_proc_requested_memory_kbytes',
): ):
self.oom_details['explain_'+i] = self.oom_details.get(i) self.oom_details['explain_'+i] = self.oom_details.get(i)
@ -1071,9 +1130,6 @@ Killed process 6576 (java) total-vm:33914892kB, anon-rss:20629004kB, file-rss:0k
# calculate remaining explanation values # calculate remaining explanation values
self.oom_details['explain_ram_kb'] = self.oom_details['ram_pages'] * self.oom_details['page_size'] self.oom_details['explain_ram_kb'] = self.oom_details['ram_pages'] * self.oom_details['page_size']
self.oom_details['explain_killed_proc_rss_kb'] = self.oom_details['killed_proc_anon_rss_kb'] + \
self.oom_details['killed_proc_file_rss_kb']
self.oom_details['explain_killed_proc_rss_percent'] = int(100 * self.oom_details['explain_killed_proc_rss_percent'] = int(100 *
self.oom_details['explain_killed_proc_rss_kb'] / self.oom_details['explain_killed_proc_rss_kb'] /
self.oom_details['explain_ram_kb']) self.oom_details['explain_ram_kb'])
@ -1088,6 +1144,9 @@ Killed process 6576 (java) total-vm:33914892kB, anon-rss:20629004kB, file-rss:0k
# self.oom_details['swap_total_kb']) # self.oom_details['swap_total_kb'])
for item in self.oom_details.keys(): for item in self.oom_details.keys():
# ignore internal items
if item.startswith('_'):
continue
self._set_single_item(item) self._set_single_item(item)
# generate swap usage diagram # generate swap usage diagram