blob: 721c97a51723c1fe4728b78d070fab48cfffafd5 [file] [log] [blame]
#-------------------------------------------------------------------------------
# elftools: dwarf/descriptions.py
#
# Textual descriptions of the various values and enums of DWARF
#
# Eli Bendersky (eliben@gmail.com)
# This code is in the public domain
#-------------------------------------------------------------------------------
from collections import defaultdict
from .constants import *
from .dwarf_expr import GenericExprVisitor
from .die import DIE
from ..common.utils import preserve_stream_pos, dwarf_assert
from ..common.py3compat import bytes2str
from .callframe import instruction_name, CIE, FDE
def set_global_machine_arch(machine_arch):
global _MACHINE_ARCH
_MACHINE_ARCH = machine_arch
def describe_attr_value(attr, die, section_offset):
""" Given an attribute attr, return the textual representation of its
value, suitable for tools like readelf.
To cover all cases, this function needs some extra arguments:
die: the DIE this attribute was extracted from
section_offset: offset in the stream of the section the DIE belongs to
"""
descr_func = _ATTR_DESCRIPTION_MAP[attr.form]
val_description = descr_func(attr, die, section_offset)
# For some attributes we can display further information
extra_info_func = _EXTRA_INFO_DESCRIPTION_MAP[attr.name]
extra_info = extra_info_func(attr, die, section_offset)
return str(val_description) + '\t' + extra_info
def describe_CFI_instructions(entry):
""" Given a CFI entry (CIE or FDE), return the textual description of its
instructions.
"""
def _assert_FDE_instruction(instr):
dwarf_assert(
isinstance(entry, FDE),
'Unexpected instruction "%s" for a CIE' % instr)
def _full_reg_name(regnum):
return 'r%s (%s)' % (regnum, describe_reg_name(regnum))
if isinstance(entry, CIE):
cie = entry
else: # FDE
cie = entry.cie
pc = entry['initial_location']
s = ''
for instr in entry.instructions:
name = instruction_name(instr.opcode)
if name in ('DW_CFA_offset',
'DW_CFA_offset_extended', 'DW_CFA_offset_extended_sf',
'DW_CFA_val_offset', 'DW_CFA_val_offset_sf'):
s += ' %s: %s at cfa%+d\n' % (
name, _full_reg_name(instr.args[0]),
instr.args[1] * cie['data_alignment_factor'])
elif name in ( 'DW_CFA_restore', 'DW_CFA_restore_extended',
'DW_CFA_undefined', 'DW_CFA_same_value',
'DW_CFA_def_cfa_register'):
s += ' %s: %s\n' % (name, _full_reg_name(instr.args[0]))
elif name == 'DW_CFA_register':
s += ' %s: %s in %s' % (
name, _full_reg_name(instr.args[0]),
_full_reg_name(instr.args[1]))
elif name == 'DW_CFA_set_loc':
pc = instr.args[0]
s += ' %s: %08x\n' % (name, pc)
elif name in ( 'DW_CFA_advance_loc1', 'DW_CFA_advance_loc2',
'DW_CFA_advance_loc4', 'DW_CFA_advance_loc'):
_assert_FDE_instruction(instr)
factored_offset = instr.args[0] * cie['code_alignment_factor']
s += ' %s: %s to %08x\n' % (
name, factored_offset, factored_offset + pc)
pc += factored_offset
elif name in ( 'DW_CFA_remember_state', 'DW_CFA_restore_state',
'DW_CFA_nop'):
s += ' %s\n' % name
elif name == 'DW_CFA_def_cfa':
s += ' %s: %s ofs %s\n' % (
name, _full_reg_name(instr.args[0]), instr.args[1])
elif name == 'DW_CFA_def_cfa_sf':
s += ' %s: %s ofs %s\n' % (
name, _full_reg_name(instr.args[0]),
instr.args[1] * cie['data_alignment_factor'])
elif name == 'DW_CFA_def_cfa_offset':
s += ' %s: %s\n' % (name, instr.args[0])
elif name == 'DW_CFA_def_cfa_expression':
expr_dumper = ExprDumper(entry.structs)
expr_dumper.process_expr(instr.args[0])
s += ' %s: (%s)\n' % (name, expr_dumper.get_str())
elif name == 'DW_CFA_expression':
expr_dumper = ExprDumper(entry.structs)
expr_dumper.process_expr(instr.args[1])
s += ' %s: %s (%s)\n' % (
name, _full_reg_name(instr.args[0]), expr_dumper.get_str())
else:
s += ' %s: <??>\n' % name
return s
def describe_CFI_register_rule(rule):
s = _DESCR_CFI_REGISTER_RULE_TYPE[rule.type]
if rule.type in ('OFFSET', 'VAL_OFFSET'):
s += '%+d' % rule.arg
elif rule.type == 'REGISTER':
s += describe_reg_name(rule.arg)
return s
def describe_CFI_CFA_rule(rule):
if rule.expr:
return 'exp'
else:
return '%s%+d' % (describe_reg_name(rule.reg), rule.offset)
def describe_DWARF_expr(expr, structs):
""" Textual description of a DWARF expression encoded in 'expr'.
structs should come from the entity encompassing the expression - it's
needed to be able to parse it correctly.
"""
# Since this function can be called a lot, initializing a fresh new
# ExprDumper per call is expensive. So a rudimentary caching scheme is in
# place to create only one such dumper per instance of structs.
cache_key = id(structs)
if cache_key not in _DWARF_EXPR_DUMPER_CACHE:
_DWARF_EXPR_DUMPER_CACHE[cache_key] = \
ExprDumper(structs)
dwarf_expr_dumper = _DWARF_EXPR_DUMPER_CACHE[cache_key]
dwarf_expr_dumper.clear()
dwarf_expr_dumper.process_expr(expr)
return '(' + dwarf_expr_dumper.get_str() + ')'
def describe_reg_name(regnum, machine_arch=None):
""" Provide a textual description for a register name, given its serial
number. The number is expected to be valid.
"""
if machine_arch is None:
machine_arch = _MACHINE_ARCH
if machine_arch == 'x86':
return _REG_NAMES_x86[regnum]
elif machine_arch == 'x64':
return _REG_NAMES_x64[regnum]
else:
return '<none>'
#-------------------------------------------------------------------------------
# The machine architecture. Set globally via set_global_machine_arch
#
_MACHINE_ARCH = None
def _describe_attr_ref(attr, die, section_offset):
return '<0x%x>' % (attr.value + die.cu.cu_offset)
def _describe_attr_value_passthrough(attr, die, section_offset):
return attr.value
def _describe_attr_hex(attr, die, section_offset):
return '0x%x' % (attr.value)
def _describe_attr_hex_addr(attr, die, section_offset):
return '<0x%x>' % (attr.value)
def _describe_attr_split_64bit(attr, die, section_offset):
low_word = attr.value & 0xFFFFFFFF
high_word = (attr.value >> 32) & 0xFFFFFFFF
return '0x%x 0x%x' % (low_word, high_word)
def _describe_attr_strp(attr, die, section_offset):
return '(indirect string, offset: 0x%x): %s' % (
attr.raw_value, bytes2str(attr.value))
def _describe_attr_string(attr, die, section_offset):
return bytes2str(attr.value)
def _describe_attr_debool(attr, die, section_offset):
""" To be consistent with readelf, generate 1 for True flags, 0 for False
flags.
"""
return '1' if attr.value else '0'
def _describe_attr_block(attr, die, section_offset):
s = '%s byte block: ' % len(attr.value)
s += ' '.join('%x' % item for item in attr.value)
return s
_ATTR_DESCRIPTION_MAP = defaultdict(
lambda: _describe_attr_value_passthrough, # default_factory
DW_FORM_ref1=_describe_attr_ref,
DW_FORM_ref2=_describe_attr_ref,
DW_FORM_ref4=_describe_attr_ref,
DW_FORM_ref8=_describe_attr_split_64bit,
DW_FORM_ref_udata=_describe_attr_ref,
DW_FORM_ref_addr=_describe_attr_hex_addr,
DW_FORM_data4=_describe_attr_hex,
DW_FORM_data8=_describe_attr_split_64bit,
DW_FORM_addr=_describe_attr_hex,
DW_FORM_sec_offset=_describe_attr_hex,
DW_FORM_flag=_describe_attr_debool,
DW_FORM_data1=_describe_attr_value_passthrough,
DW_FORM_data2=_describe_attr_value_passthrough,
DW_FORM_sdata=_describe_attr_value_passthrough,
DW_FORM_udata=_describe_attr_value_passthrough,
DW_FORM_string=_describe_attr_string,
DW_FORM_strp=_describe_attr_strp,
DW_FORM_block1=_describe_attr_block,
DW_FORM_block2=_describe_attr_block,
DW_FORM_block4=_describe_attr_block,
DW_FORM_block=_describe_attr_block,
)
_DESCR_DW_INL = {
DW_INL_not_inlined: '(not inlined)',
DW_INL_inlined: '(inlined)',
DW_INL_declared_not_inlined: '(declared as inline but ignored)',
DW_INL_declared_inlined: '(declared as inline and inlined)',
}
_DESCR_DW_LANG = {
DW_LANG_C89: '(ANSI C)',
DW_LANG_C: '(non-ANSI C)',
DW_LANG_Ada83: '(Ada)',
DW_LANG_C_plus_plus: '(C++)',
DW_LANG_Cobol74: '(Cobol 74)',
DW_LANG_Cobol85: '(Cobol 85)',
DW_LANG_Fortran77: '(FORTRAN 77)',
DW_LANG_Fortran90: '(Fortran 90)',
DW_LANG_Pascal83: '(ANSI Pascal)',
DW_LANG_Modula2: '(Modula 2)',
DW_LANG_Java: '(Java)',
DW_LANG_C99: '(ANSI C99)',
DW_LANG_Ada95: '(ADA 95)',
DW_LANG_Fortran95: '(Fortran 95)',
DW_LANG_PLI: '(PLI)',
DW_LANG_ObjC: '(Objective C)',
DW_LANG_ObjC_plus_plus: '(Objective C++)',
DW_LANG_UPC: '(Unified Parallel C)',
DW_LANG_D: '(D)',
DW_LANG_Python: '(Python)',
DW_LANG_Mips_Assembler: '(MIPS assembler)',
DW_LANG_Upc: '(nified Parallel C)',
DW_LANG_HP_Bliss: '(HP Bliss)',
DW_LANG_HP_Basic91: '(HP Basic 91)',
DW_LANG_HP_Pascal91: '(HP Pascal 91)',
DW_LANG_HP_IMacro: '(HP IMacro)',
DW_LANG_HP_Assembler: '(HP assembler)',
}
_DESCR_DW_ATE = {
DW_ATE_void: '(void)',
DW_ATE_address: '(machine address)',
DW_ATE_boolean: '(boolean)',
DW_ATE_complex_float: '(complex float)',
DW_ATE_float: '(float)',
DW_ATE_signed: '(signed)',
DW_ATE_signed_char: '(signed char)',
DW_ATE_unsigned: '(unsigned)',
DW_ATE_unsigned_char: '(unsigned char)',
DW_ATE_imaginary_float: '(imaginary float)',
DW_ATE_decimal_float: '(decimal float)',
DW_ATE_packed_decimal: '(packed_decimal)',
DW_ATE_numeric_string: '(numeric_string)',
DW_ATE_edited: '(edited)',
DW_ATE_signed_fixed: '(signed_fixed)',
DW_ATE_unsigned_fixed: '(unsigned_fixed)',
DW_ATE_HP_float80: '(HP_float80)',
DW_ATE_HP_complex_float80: '(HP_complex_float80)',
DW_ATE_HP_float128: '(HP_float128)',
DW_ATE_HP_complex_float128: '(HP_complex_float128)',
DW_ATE_HP_floathpintel: '(HP_floathpintel)',
DW_ATE_HP_imaginary_float80: '(HP_imaginary_float80)',
DW_ATE_HP_imaginary_float128: '(HP_imaginary_float128)',
}
_DESCR_DW_ACCESS = {
DW_ACCESS_public: '(public)',
DW_ACCESS_protected: '(protected)',
DW_ACCESS_private: '(private)',
}
_DESCR_DW_VIS = {
DW_VIS_local: '(local)',
DW_VIS_exported: '(exported)',
DW_VIS_qualified: '(qualified)',
}
_DESCR_DW_VIRTUALITY = {
DW_VIRTUALITY_none: '(none)',
DW_VIRTUALITY_virtual: '(virtual)',
DW_VIRTUALITY_pure_virtual: '(pure virtual)',
}
_DESCR_DW_ID_CASE = {
DW_ID_case_sensitive: '(case_sensitive)',
DW_ID_up_case: '(up_case)',
DW_ID_down_case: '(down_case)',
DW_ID_case_insensitive: '(case_insensitive)',
}
_DESCR_DW_CC = {
DW_CC_normal: '(normal)',
DW_CC_program: '(program)',
DW_CC_nocall: '(nocall)',
}
_DESCR_DW_ORD = {
DW_ORD_row_major: '(row major)',
DW_ORD_col_major: '(column major)',
}
_DESCR_CFI_REGISTER_RULE_TYPE = dict(
UNDEFINED='u',
SAME_VALUE='s',
OFFSET='c',
VAL_OFFSET='v',
REGISTER='',
EXPRESSION='exp',
VAL_EXPRESSION='vexp',
ARCHITECTURAL='a',
)
def _make_extra_mapper(mapping, default, default_interpolate_value=False):
""" Create a mapping function from attribute parameters to an extra
value that should be displayed.
"""
def mapper(attr, die, section_offset):
if default_interpolate_value:
d = default % attr.value
else:
d = default
return mapping.get(attr.value, d)
return mapper
def _make_extra_string(s=''):
""" Create an extra function that just returns a constant string.
"""
def extra(attr, die, section_offset):
return s
return extra
_DWARF_EXPR_DUMPER_CACHE = {}
def _location_list_extra(attr, die, section_offset):
# According to section 2.6 of the DWARF spec v3, class loclistptr means
# a location list, and class block means a location expression.
#
if attr.form in ('DW_FORM_data4', 'DW_FORM_data8'):
return '(location list)'
else:
return describe_DWARF_expr(attr.value, die.cu.structs)
def _import_extra(attr, die, section_offset):
# For DW_AT_import the value points to a DIE (that can be either in the
# current DIE's CU or in another CU, depending on the FORM). The extra
# information for it is the abbreviation number in this DIE and its tag.
if attr.form == 'DW_FORM_ref_addr':
# Absolute offset value
ref_die_offset = section_offset + attr.value
else:
# Relative offset to the current DIE's CU
ref_die_offset = attr.value + die.cu.cu_offset
# Now find the CU this DIE belongs to (since we have to find its abbrev
# table). This is done by linearly scanning through all CUs, looking for
# one spanning an address space containing the referred DIE's offset.
for cu in die.dwarfinfo.iter_CUs():
if cu['unit_length'] + cu.cu_offset > ref_die_offset >= cu.cu_offset:
# Once we have the CU, we can actually parse this DIE from the
# stream.
with preserve_stream_pos(die.stream):
ref_die = DIE(cu, die.stream, ref_die_offset)
#print '&&& ref_die', ref_die
return '[Abbrev Number: %s (%s)]' % (
ref_die.abbrev_code, ref_die.tag)
return '[unknown]'
_EXTRA_INFO_DESCRIPTION_MAP = defaultdict(
lambda: _make_extra_string(''), # default_factory
DW_AT_inline=_make_extra_mapper(
_DESCR_DW_INL, '(Unknown inline attribute value: %x',
default_interpolate_value=True),
DW_AT_language=_make_extra_mapper(
_DESCR_DW_LANG, '(Unknown: %x)', default_interpolate_value=True),
DW_AT_encoding=_make_extra_mapper(_DESCR_DW_ATE, '(unknown type)'),
DW_AT_accessibility=_make_extra_mapper(
_DESCR_DW_ACCESS, '(unknown accessibility)'),
DW_AT_visibility=_make_extra_mapper(
_DESCR_DW_VIS, '(unknown visibility)'),
DW_AT_virtuality=_make_extra_mapper(
_DESCR_DW_VIRTUALITY, '(unknown virtuality)'),
DW_AT_identifier_case=_make_extra_mapper(
_DESCR_DW_ID_CASE, '(unknown case)'),
DW_AT_calling_convention=_make_extra_mapper(
_DESCR_DW_CC, '(unknown convention)'),
DW_AT_ordering=_make_extra_mapper(
_DESCR_DW_ORD, '(undefined)'),
DW_AT_frame_base=_location_list_extra,
DW_AT_location=_location_list_extra,
DW_AT_string_length=_location_list_extra,
DW_AT_return_addr=_location_list_extra,
DW_AT_data_member_location=_location_list_extra,
DW_AT_vtable_elem_location=_location_list_extra,
DW_AT_segment=_location_list_extra,
DW_AT_static_link=_location_list_extra,
DW_AT_use_location=_location_list_extra,
DW_AT_allocated=_location_list_extra,
DW_AT_associated=_location_list_extra,
DW_AT_data_location=_location_list_extra,
DW_AT_stride=_location_list_extra,
DW_AT_import=_import_extra,
)
# 8 in a line, for easier counting
_REG_NAMES_x86 = [
'eax', 'ecx', 'edx', 'ebx', 'esp', 'ebp', 'esi', 'edi',
'eip', 'eflags', '<none>', 'st0', 'st1', 'st2', 'st3', 'st4',
'st5', 'st6', 'st7', '<none>', '<none>', 'xmm0', 'xmm1', 'xmm2',
'xmm3', 'xmm4', 'xmm5', 'xmm6', 'xmm7', 'mm0', 'mm1', 'mm2',
'mm3', 'mm4', 'mm5', 'mm6', 'mm7', 'fcw', 'fsw', 'mxcsr',
'es', 'cs', 'ss', 'ds', 'fs', 'gs', '<none>', '<none>', 'tr', 'ldtr'
]
_REG_NAMES_x64 = [
'rax', 'rdx', 'rcx', 'rbx', 'rsi', 'rdi', 'rbp', 'rsp',
'r8', 'r9', 'r10', 'r11', 'r12', 'r13', 'r14', 'r15',
'rip', 'xmm0', 'xmm1', 'xmm2', 'xmm3', 'xmm4', 'xmm5', 'xmm6',
'xmm7', 'xmm8', 'xmm9', 'xmm10', 'xmm11', 'xmm12', 'xmm13', 'xmm14',
'xmm15', 'st0', 'st1', 'st2', 'st3', 'st4', 'st5', 'st6',
'st7', 'mm0', 'mm1', 'mm2', 'mm3', 'mm4', 'mm5', 'mm6',
'mm7', 'rflags', 'es', 'cs', 'ss', 'ds', 'fs', 'gs',
'<none>', '<none>', 'fs.base', 'gs.base', '<none>', '<none>', 'tr', 'ldtr',
'mxcsr', 'fcw', 'fsw'
]
class ExprDumper(GenericExprVisitor):
""" A concrete visitor for DWARF expressions that dumps a textual
representation of the complete expression.
Usage: after creation, call process_expr, and then get_str for a
semicolon-delimited string representation of the decoded expression.
"""
def __init__(self, structs):
super(ExprDumper, self).__init__(structs)
self._init_lookups()
self._str_parts = []
def clear(self):
self._str_parts = []
def get_str(self):
return '; '.join(self._str_parts)
def _init_lookups(self):
self._ops_with_decimal_arg = set([
'DW_OP_const1u', 'DW_OP_const1s', 'DW_OP_const2u', 'DW_OP_const2s',
'DW_OP_const4u', 'DW_OP_const4s', 'DW_OP_constu', 'DW_OP_consts',
'DW_OP_pick', 'DW_OP_plus_uconst', 'DW_OP_bra', 'DW_OP_skip',
'DW_OP_fbreg', 'DW_OP_piece', 'DW_OP_deref_size',
'DW_OP_xderef_size', 'DW_OP_regx',])
for n in range(0, 32):
self._ops_with_decimal_arg.add('DW_OP_breg%s' % n)
self._ops_with_two_decimal_args = set([
'DW_OP_const8u', 'DW_OP_const8s', 'DW_OP_bregx', 'DW_OP_bit_piece'])
self._ops_with_hex_arg = set(
['DW_OP_addr', 'DW_OP_call2', 'DW_OP_call4', 'DW_OP_call_ref'])
def _after_visit(self, opcode, opcode_name, args):
self._str_parts.append(self._dump_to_string(opcode, opcode_name, args))
def _dump_to_string(self, opcode, opcode_name, args):
if len(args) == 0:
if opcode_name.startswith('DW_OP_reg'):
regnum = int(opcode_name[9:])
return '%s (%s)' % (
opcode_name,
describe_reg_name(regnum, _MACHINE_ARCH))
else:
return opcode_name
elif opcode_name in self._ops_with_decimal_arg:
if opcode_name.startswith('DW_OP_breg'):
regnum = int(opcode_name[10:])
return '%s (%s): %s' % (
opcode_name,
describe_reg_name(regnum, _MACHINE_ARCH),
args[0])
elif opcode_name.endswith('regx'):
# applies to both regx and bregx
return '%s: %s (%s)' % (
opcode_name,
args[0],
describe_reg_name(args[0], _MACHINE_ARCH))
else:
return '%s: %s' % (opcode_name, args[0])
elif opcode_name in self._ops_with_hex_arg:
return '%s: %x' % (opcode_name, args[0])
elif opcode_name in self._ops_with_two_decimal_args:
return '%s: %s %s' % (opcode_name, args[0], args[1])
else:
return '<unknown %s>' % opcode_name