From 6528b96adb0c31a6fbce9904275f902234f571a9 Mon Sep 17 00:00:00 2001 From: Carsten Grohmann Date: Tue, 14 Sep 2021 20:18:37 +0200 Subject: [PATCH] Restructure regex for better error messages --- OOMAnalyser.py | 225 ++++++++++++++++++++++++------------------------- test.py | 19 ++--- 2 files changed, 117 insertions(+), 127 deletions(-) diff --git a/OOMAnalyser.py b/OOMAnalyser.py index cbec306..7362237 100644 --- a/OOMAnalyser.py +++ b/OOMAnalyser.py @@ -353,115 +353,115 @@ class OOMEntity(object): class OOMAnalyser(object): """Analyse an OOM object and calculate additional values""" - REC_INVOKED_OOMKILLER = re.compile( - r'^(?P[\S ]+) invoked oom-killer: ' - r'gfp_mask=(?P0x[a-z0-9]+)(\((?P[A-Z_|]+)\))?, ' - r'(nodemask=(?P([\d,-]+|\(null\))), )?' - r'order=(?P\d+), ' - r'oom_score_adj=(?P\d+)', - re.MULTILINE) + EXTRACT_PATTERN = { + 'invoked oom-killer': ( + r'^(?P[\S ]+) invoked oom-killer: ' + r'gfp_mask=(?P0x[a-z0-9]+)(\((?P[A-Z_|]+)\))?, ' + r'(nodemask=(?P([\d,-]+|\(null\))), )?' + r'order=(?P\d+), ' + r'oom_score_adj=(?P\d+)', + True, + ), + 'Trigger process and kernel version': ( + r'^CPU: \d+ PID: (?P\d+) ' + r'Comm: .* (Not tainted|Tainted:.*) ' + r'(?P\d[\w.-]+) #\d', + True, + ), - REC_PID_KERNELVERSION = re.compile( - r'^CPU: \d+ PID: (?P\d+) ' - r'Comm: .* (Not tainted|Tainted:.*) ' - r'(?P\d[\w.-]+) #\d', - re.MULTILINE - ) + # split caused by a limited number of iterations during converting PY regex into JS regex + 'Mem-Info (part 1)': ( + r'^Mem-Info:.*' + r'(?:\n)' - # split caused by a limited number of iterations during converting PY regex into JS regex - REC_MEMINFO_1 = re.compile( - # head line - r'^Mem-Info:.*' + # first line (starting w/o a space) + r'^active_anon:(?P\d+) inactive_anon:(?P\d+) ' + r'isolated_anon:(?P\d+)' + r'(?:\n)' - # first line break - r'(?:\n)' + # remaining lines (w/ leading space) + r'^ active_file:(?P\d+) inactive_file:(?P\d+) ' + r'isolated_file:(?P\d+)' + r'(?:\n)' - # first line (starting with a space) - r'^active_anon:(?P\d+) inactive_anon:(?P\d+) ' - r'isolated_anon:(?P\d+)' - - # next line break - r'(?:\n)' - - # remaining lines (with leading space) - r'^ active_file:(?P\d+) inactive_file:(?P\d+) ' - r'isolated_file:(?P\d+)' - - # next line break - r'(?:\n)' - - r'^ unevictable:(?P\d+) dirty:(?P\d+) writeback:(?P\d+) ' - r'unstable:(?P\d+)' - - # # next line break - # r'(?:\n)' - # - , re.MULTILINE - ) - - REC_MEMINFO_2 = re.compile( - r'^ slab_reclaimable:(?P\d+) slab_unreclaimable:(?P\d+)' - r'(?:\n)' - r'^ mapped:(?P\d+) shmem:(?P\d+) pagetables:(?P\d+) ' - r'bounce:(?P\d+)' - r'(?:\n)' - r'^ free:(?P\d+) free_pcp:(?P\d+) free_cma:(?P\d+)', - re.MULTILINE - ) - - REC_MEM_NODEINFO = re.compile(r'(^Node \d+ (DMA|Normal|hugepages).*(:?\n))+', re.MULTILINE) - - REC_PAGECACHE = re.compile(r'^(?P\d+) total pagecache pages.*$', re.MULTILINE) - - REC_SWAP = re.compile( - r'^(?P\d+) pages in swap cache' - r'(?:\n)' - r'^Swap cache stats: add \d+, delete \d+, find \d+\/\d+' - r'(?:\n)' - r'^Free swap = (?P\d+)kB' - r'(?:\n)' - r'^Total swap = (?P\d+)kB', - re.MULTILINE) - - REC_PAGEINFO = re.compile( - r'^(?P\d+) pages RAM' - r'(' - r'(?:\n)' - r'^(?P\d+) pages HighMem/MovableOnly' - r')?' - r'(?:\n)' - r'^(?P\d+) pages reserved' - r'(' - r'(?:\n)' - r'^(?P\d+) pages cma reserved' - r')?' - r'(' - r'(?:\n)' - r'^(?P\d+) pages in pagetable cache' - r')?' - r'(' - r'(?:\n)' - r'^(?P\d+) pages hwpoisoned' - r')?', - re.MULTILINE) + r'^ unevictable:(?P\d+) dirty:(?P\d+) writeback:(?P\d+) ' + r'unstable:(?P\d+)', + True, + ), + 'Mem-Info (part 2)': ( + r'^ slab_reclaimable:(?P\d+) slab_unreclaimable:(?P\d+)' + r'(?:\n)' + r'^ mapped:(?P\d+) shmem:(?P\d+) pagetables:(?P\d+) ' + r'bounce:(?P\d+)' + r'(?:\n)' + r'^ free:(?P\d+) free_pcp:(?P\d+) free_cma:(?P\d+)', + True, + ), + 'Memory node information': ( + r'(^Node \d+ (DMA|Normal|hugepages).*(:?\n))+', + False, + ), + 'Page cache': ( + r'^(?P\d+) total pagecache pages.*$', + True, + ), + 'Swap usage information': ( + r'^(?P\d+) pages in swap cache' + r'(?:\n)' + r'^Swap cache stats: add \d+, delete \d+, find \d+\/\d+' + r'(?:\n)' + r'^Free swap = (?P\d+)kB' + r'(?:\n)' + r'^Total swap = (?P\d+)kB', + True, + ), + 'Page information': ( + r'^(?P\d+) pages RAM' + r'(' + r'(?:\n)' + r'^(?P\d+) pages HighMem/MovableOnly' + r')?' + r'(?:\n)' + r'^(?P\d+) pages reserved' + r'(' + r'(?:\n)' + r'^(?P\d+) pages cma reserved' + r')?' + r'(' + r'(?:\n)' + r'^(?P\d+) pages in pagetable cache' + r')?' + r'(' + r'(?:\n)' + r'^(?P\d+) pages hwpoisoned' + r')?', + True, + ), + 'Process killed by OOM': ( + r'^Out of memory: Kill process (?P\d+) \((?P[\S ]+)\) ' + r'score (?P\d+) or sacrifice child', + True, + ), + 'Details of process killed by OOM': ( + r'^Killed process \d+ \(.*\)' + r'(, UID \d+,)?' + r' total-vm:(?P\d+)kB, anon-rss:(?P\d+)kB, ' + r'file-rss:(?P\d+)kB, shmem-rss:(?P\d+)kB.*', + True, + ), + } + """ + RE pattern to extract information from OOM. + + The first item is the RE pattern and the second is whether it is mandatory to find this pattern. + + :type: dict(tuple(str, bool)) + """ REC_PROCESS_LINE = re.compile( r'^\[(?P[ \d]+)\]\s+(?P\d+)\s+(?P\d+)\s+(?P\d+)\s+(?P\d+)\s+' r'(?P\d+)\s+(?P\d+)\s+(?P-?\d+)\s+(?P.+)\s*') - REC_OOM_KILL_PROCESS = re.compile( - r'^Out of memory: Kill process (?P\d+) \((?P[\S ]+)\) ' - r'score (?P\d+) or sacrifice child', - re.MULTILINE - ) - - REC_KILLED_PROCESS = re.compile( - r'^Killed process \d+ \(.*\)' - r'(, UID \d+,)?' - r' total-vm:(?P\d+)kB, anon-rss:(?P\d+)kB, ' - r'file-rss:(?P\d+)kB, shmem-rss:(?P\d+)kB.*', - re.MULTILINE) - lines = [] """All lines of an OOM without leading timestamps""" @@ -500,26 +500,17 @@ class OOMAnalyser(object): """Extract details from OOM message text""" self.results = {} - - for rec in [self.REC_INVOKED_OOMKILLER, - self.REC_KILLED_PROCESS, - self.REC_MEMINFO_1, - self.REC_MEMINFO_2, - self.REC_OOM_KILL_PROCESS, - self.REC_PAGECACHE, - self.REC_PAGEINFO, - self.REC_PID_KERNELVERSION, - self.REC_SWAP, - ]: + # __pragma__ ('jsiter') + for k in self.EXTRACT_PATTERN: + pattern, is_mandatory = self.EXTRACT_PATTERN[k] + rec = re.compile(pattern, re.MULTILINE) match = rec.search(self.oom_entity.text) if match: self.results.update(match.groupdict()) - else: - warning('No match for regex: "{}"'.format(rec.pattern)) - - match = self.REC_MEM_NODEINFO.search(self.oom_entity.text) - if match: - self.results['mem_node_info'] = match.group() + elif is_mandatory: + error('Failed to extract information from OOM text. The regular expression "{}" (pattern "{}") ' + 'does not find anything. This will cause subsequent errors.'.format(k, pattern)) + # __pragma__ ('nojsiter') self.results['hardware_info'] = self._extract_block_from_next_pos('Hardware name:') diff --git a/test.py b/test.py index be4f16c..920ccab 100755 --- a/test.py +++ b/test.py @@ -20,6 +20,7 @@ import http.server import logging import os +import re import socketserver import threading import unittest @@ -286,28 +287,26 @@ class TestPython(TestBase): def test_001_trigger_proc_space(self): """Test RE to find name of trigger process""" first = self.get_first_line(OOMAnalyser.OOMDisplay.example) - rec = OOMAnalyser.OOMAnalyser.REC_INVOKED_OOMKILLER + pattern = OOMAnalyser.OOMAnalyser.EXTRACT_PATTERN['invoked oom-killer'][0] + rec = re.compile(pattern, re.MULTILINE) match = rec.search(first) - self.assertTrue(match, 'Error: re.search(REC_INVOKED_OOMKILLER) failed for simple ' - 'process name') + self.assertTrue(match, "Error: re.search('invoked oom-killer') failed for simple process name") first = first.replace('sed', 'VM Monitoring Task') match = rec.search(first) - self.assertTrue(match, 'Error: re.search(REC_INVOKED_OOMKILLER) failed for process name ' - 'with space') + self.assertTrue(match, "Error: re.search('invoked oom-killer') failed for process name with space") def test_002_killed_proc_space(self): """Test RE to find name of killed process""" last = self.get_last_line(OOMAnalyser.OOMDisplay.example) - rec = OOMAnalyser.OOMAnalyser.REC_OOM_KILL_PROCESS + pattern = OOMAnalyser.OOMAnalyser.EXTRACT_PATTERN['Process killed by OOM'][0] + rec = re.compile(pattern, re.MULTILINE) match = rec.search(last) - self.assertTrue(match, 'Error: re.search(REC_OOM_KILL_PROCESS) failed for simple ' - 'process name') + self.assertTrue(match, "Error: re.search('Process killed by OOM') failed for simple process name") last = last.replace('sed', 'VM Monitoring Task') match = rec.search(last) - self.assertTrue(match, 'Error: re.search(REC_OOM_KILL_PROCESS) failed for process name ' - 'with space') + self.assertTrue(match, "Error: re.search('Process killed by OOM') failed for process name with space") def test_003_OOMEntity_number_of_columns_to_strip(self): """Test stripping useless / leading columns"""