%PDF- %PDF-
Direktori : /usr/share/apport/general-hooks/ |
Current File : //usr/share/apport/general-hooks/parse_segv.py |
#!/usr/bin/python3 # # Copyright 2009-2010 Canonical, Ltd. # Author: Kees Cook <kees@ubuntu.com> # # This program is free software; you can redistribute it and/or modify it # under the terms of the GNU General Public License as published by the # Free Software Foundation; either version 2 of the License, or (at your # option) any later version. See http://www.gnu.org/copyleft/gpl.html for # the full text of the license. """Examine the crash files saved by apport to attempt to determine the cause of a segfault. Currently very very simplistic, and only finds commonly understood situations for x86/x86_64.""" # TODO: Address following pylint complaints # pylint: disable=invalid-name,missing-class-docstring,missing-function-docstring import logging import re import sys class ParseSegv: def __init__(self, registers, disassembly, maps): self.regs = self.parse_regs(registers) self.sp = None for reg in ("rsp", "esp"): if reg in self.regs: self.sp = self.regs[reg] (self.line, self.pc, self.insn, self.src, self.dest) = self.parse_disassembly( disassembly ) self.stack_vma = None self.maps = self.parse_maps(maps) def find_vma(self, addr): for vma in self.maps: if vma["start"] <= addr < vma["end"]: return vma return None def parse_maps(self, maps_str): maps = [] for line in maps_str.splitlines(): items = line.strip().split() if len(items) < 4: raise ValueError(f"Cannot parse maps line: {line.strip()}") span, perms = items[0:2] if len(items) == 5: name = None else: name = items[5] start, end = [int(x, 16) for x in span.split("-")] if name == "[stack]": self.stack_vma = len(maps) maps.append({"start": start, "end": end, "perms": perms, "name": name}) logging.debug( "start: %s, end: %s, perms: %s, name: %s", start, end, perms, name ) return maps @staticmethod def parse_regs(reg_str): regs = {} for line in reg_str.splitlines(): reg, hexvalue = line.split()[0:2] regs[reg] = int(hexvalue, 16) logging.debug("%s:0x%08x", reg, regs[reg]) return regs def parse_disassembly(self, disassembly): # TODO: Split into smaller functions/methods # pylint: disable=too-many-branches if not self.regs: raise ValueError("Registers not loaded yet!?") lines = disassembly.splitlines() # Throw away possible 'Dump' gdb report line if len(lines) > 0 and lines[0].startswith("Dump"): lines.pop(0) if len(lines) < 1: raise ValueError("Failed to load empty disassembly") line = lines[0].strip() # Drop GDB 7.1's leading $pc mark if line.startswith("=>"): line = line[2:].strip() logging.debug(line) pc_str = line.split()[0] if pc_str.startswith("0x"): pc = int(pc_str.split(":")[0], 16) else: # Could not identify this instruction line raise ValueError( f'Could not parse PC "{pc_str}" from disassembly line: {line}' ) logging.debug("pc: 0x%08x", pc) full_insn_str = line.split(":", 1)[1].strip() # Handle invalid memory if "Cannot access memory at address" in full_insn_str or ( full_insn_str == "" and len(lines) == 1 ): return line, pc, None, None, None # Handle wrapped lines if full_insn_str == "" and lines[1].startswith(" "): line = f"{line} {lines[1].strip()}" full_insn_str = line.split(":", 1)[1].strip() insn_parts = full_insn_str.split() # Drop call target names "call 0xb7a805af <_Unwind_Find_FDE@plt+111>" if insn_parts[-1].endswith(">") and insn_parts[-1].startswith("<"): insn_parts.pop(-1) # Attempt to find arguments args_str = "" if len(insn_parts) > 1: args_str = insn_parts.pop(-1) # Assume remainder is the insn itself insn = " ".join(insn_parts) logging.debug("insn: %s", insn) args = [] src = None dest = None if args_str == "": # Could not find insn args args = None else: logging.debug('args: "%s"', args_str) for m in re.finditer(r"([^,\(]*(\(:?[^\)]+\))*)", args_str): if len(m.group(0)): args.append(m.group(0)) if len(args) > 0: src = args[0] logging.debug("src: %s", src) if len(args) > 1: dest = args[1] logging.debug("dest: %s", dest) # Set up possible implicit memory destinations (stack actions) if insn in {"push", "pop", "pushl", "popl", "call", "callq", "ret", "retq"}: for reg in ("rsp", "esp"): if reg in self.regs: dest = f"(%{reg})" break return line, pc, insn, src, dest def validate_vma(self, perm, addr, name): perm_name = { "x": ["executable", "executing"], "r": ["readable", "reading"], "w": ["writable", "writing"], } vma = self.find_vma(addr) if vma is None: alarmist = "unknown" if addr < 65536: alarmist = "NULL" return ( False, f"{name} (0x{addr:08x}) not located in a known VMA region" f" (needed {perm_name[perm][0]} region)!", f"{perm_name[perm][1]} {alarmist} VMA", ) if perm not in vma["perms"]: alarmist = "" if perm == "x": if "w" in vma["perms"]: alarmist = "writable " else: alarmist = "non-writable " short = f"{perm_name[perm][1]} {alarmist}VMA {vma['name']}" return ( False, f"{name} (0x{addr:08x}) in non-{perm_name[perm][0]} VMA" f" region: 0x{vma['start']:08x}-0x{vma['end']:08x}" f" {vma['perms']} {vma['name']}", short, ) return (True, f"{name} (0x{addr:08x}) ok", f"{perm_name[perm][1]} ok") def register_value(self, reg): reg_orig = reg mask = 0 if reg.startswith("%"): reg = reg[1:] if reg in self.regs: return self.regs[reg] if len(reg) == 2 and reg.endswith("l"): mask |= 0xFF00 reg = f"{reg[0]}x" if reg in self.regs: return self.regs[reg] & ~mask if len(reg) == 2 and reg.endswith("x"): mask |= 0xFFFF0000 reg = f"e{reg}" if reg in self.regs: return self.regs[reg] & ~mask if len(reg) == 3 and reg.startswith("e"): mask |= 0xFFFFFFFF00000000 reg = f"r{reg[1:]}" if reg in self.regs: return self.regs[reg] & ~mask raise ValueError(f"Could not resolve register '{reg_orig}'") def calculate_arg(self, arg): # TODO: Split into smaller functions/methods # pylint: disable=too-many-branches # Check for and pre-remove segment offset segment = 0 if arg.startswith("%") and ":" in arg: parts = arg.split(":", 1) segment = self.regs[parts[0][1:]] arg = parts[1] # Handle standard offsets parts = arg.split("(") offset = parts[0] # Handle negative signs sign = 1 if offset.startswith("-"): sign = -1 offset = offset[1:] # Skip call target dereferences if offset.startswith("*"): offset = offset[1:] if len(offset) > 0: if offset.startswith("%"): # Handle the *%REG case add = self.regs[offset[1:]] else: if not offset.startswith("0x"): raise ValueError(f"Unknown offset literal: {parts[0]}") add = int(offset[2:], 16) * sign else: add = 0 def _reg_val(self, text, val=0): if text.startswith("%"): val = self.regs[text[1:]] elif text == "": val = 0 else: val = int(text) return val # (%ebx, %ecx, 4) style value = 0 if len(parts) > 1: parens = parts[1][0:-1] reg_list = parens.split(",") base = 0 if len(reg_list) > 0: base = _reg_val(self, reg_list[0], base) index = 0 if len(reg_list) > 1: index = _reg_val(self, reg_list[1], index) scale = 1 if len(reg_list) > 2: scale = _reg_val(self, reg_list[2], scale) value = base + index * scale value = segment + value + add if "esp" in self.regs: # 32bit return value % 0x100000000 # 64bit return value % 0x10000000000000000 def report(self): # TODO: Split into smaller functions/methods # pylint: disable=too-many-branches,too-many-statements understood = False reason = [] details = [f"Segfault happened at: {self.line}"] # Verify PC is in an executable region valid, out, short = self.validate_vma("x", self.pc, "PC") details.append(out) if not valid: reason.append(short) understood = True if self.insn in {"lea", "leal"}: # Short-circuit for instructions that do not cause vma access details.append(f"insn ({self.insn}) does not access VMA") else: # Verify source is readable if self.src: if ( ":" not in self.src and (self.src[0] in {"%", "$", "*"}) and not self.src.startswith("*%") ): details.append(f'source "{self.src}" ok') else: addr = self.calculate_arg(self.src) valid, out, short = self.validate_vma( "r", addr, f'source "{self.src}"' ) details.append(out) if not valid: reason.append(short) understood = True # Verify destination is writable if self.dest: if ":" not in self.dest and (self.dest[0] in {"%", "$", "*"}): details.append(f'destination "{self.dest}" ok') else: addr = self.calculate_arg(self.dest) valid, out, short = self.validate_vma( "w", addr, f'destination "{self.dest}"' ) details.append(out) if not valid: reason.append(short) understood = True # Handle I/O port operations if self.insn in {"out", "in"} and not understood: msg = ( f"disallowed I/O port operation" f" on port {self.register_value(self.src)}" ) reason.append(msg) details.append(msg) understood = True # Note position of SP with regard to "[stack]" VMA if self.sp is not None: if self.stack_vma is not None: if self.sp < self.maps[self.stack_vma]["start"]: details.append("Stack memory exhausted (SP below stack segment)") if self.sp >= self.maps[self.stack_vma]["end"]: details.append("Stack pointer not within stack segment") if not understood: valid, out, short = self.validate_vma("r", self.sp, "SP") details.append(out) if not valid: reason.append(short) understood = True if not understood: vma = self.find_vma(self.pc) msg = "Reason could not be automatically determined." if vma and (vma["name"] == "[vdso]" or vma["name"] == "[vsyscall]"): msg += " (Unhandled exception in kernel code?)" reason.append(msg) details.append(msg) return understood, "\n".join(reason), "\n".join(details) def add_info(report): # Only interested in segmentation faults... if report.get("Signal", "0") != "11": return needed = ["Signal", "Architecture", "Disassembly", "ProcMaps", "Registers"] for field in needed: if field not in report: report["SegvAnalysis"] = f'Skipped: missing required field "{field}"' return # Only run on segv for x86 and x86_64... if not report["Architecture"] in {"i386", "amd64"}: return try: segv = ParseSegv(report["Registers"], report["Disassembly"], report["ProcMaps"]) understood, reason, details = segv.report() if understood: report["SegvReason"] = reason report["SegvAnalysis"] = details except Exception as error: # pylint: disable=broad-except report["SegvAnalysis"] = f"Failure: {str(error)}" # pylint: disable-next=missing-function-docstring def main(): if len(sys.argv) != 4 or sys.argv[1] in {"-h", "--help"}: print("To run self-test, run without any arguments (or with -v)") print("To do stand-alone crash parsing:") print(f" Usage: {sys.argv[0]} Registers.txt Disassembly.txt ProcMaps.txt") sys.exit(0) with open(sys.argv[1], encoding="utf-8") as registers_file: registers = registers_file.read() with open(sys.argv[2], encoding="utf-8") as disassembly_file: disassembly = disassembly_file.read() with open(sys.argv[3], encoding="utf-8") as maps_file: maps = maps_file.read() segv = ParseSegv(registers, disassembly, maps) understood, reason, details = segv.report() print(f"{reason}\n\n{details}") rc = 0 if not understood: rc = 1 sys.exit(rc) if __name__ == "__main__": main()