| # Copyright 2014 The ChromiumOS Authors |
| # Use of this source code is governed by a BSD-style license that can be |
| # found in the LICENSE file. |
| |
| """ELF parsing related helper functions/classes.""" |
| |
| import io |
| import os |
| import struct |
| from typing import Union |
| |
| from chromite.third_party import lddtree |
| from chromite.third_party.pyelftools.elftools.common import utils |
| from chromite.third_party.pyelftools.elftools.elf import elffile |
| |
| |
| def GetSymbolTableSize(elf): |
| """Get Symbole Table size by parsing section header.""" |
| for i in range(elf["e_shnum"]): |
| # pylint: disable=protected-access |
| section_header = elf._get_section_header(i) |
| if section_header["sh_type"] == "SHT_DYNSYM": |
| return section_header["sh_size"] |
| return 0 |
| |
| |
| def ParseELFSymbols(elf): |
| """Parses list of symbols in an ELF file. |
| |
| Args: |
| elf: An elffile.ELFFile instance. |
| |
| Returns: |
| A 2-tuple of (imported, exported) symbols, each of which is a set. |
| """ |
| imp = set() |
| exp = set() |
| |
| if elf.header.e_type not in ("ET_DYN", "ET_EXEC"): |
| return imp, exp |
| |
| for segment in elf.iter_segments(): |
| if segment.header.p_type != "PT_DYNAMIC": |
| continue |
| |
| # Find strtab and symtab virtual addresses. |
| symtab_ptr = None |
| dthash_ptr = None |
| symbol_size = elf.structs.Elf_Sym.sizeof() |
| for tag in segment.iter_tags(): |
| if tag.entry.d_tag == "DT_SYMTAB": |
| symtab_ptr = tag.entry.d_ptr |
| if tag.entry.d_tag == "DT_SYMENT": |
| assert symbol_size == tag.entry.d_val |
| if tag.entry.d_tag == "DT_HASH": |
| dthash_ptr = tag.entry.d_ptr |
| |
| stringtable = ( |
| segment._get_stringtable() # pylint: disable=protected-access |
| ) |
| |
| symtab_offset = next(elf.address_offsets(symtab_ptr)) |
| |
| if dthash_ptr: |
| # DT_SYMTAB provides no information on the number of symbols table |
| # entries. Instead, we use DT_HASH's nchain value, which according |
| # to the spec, "should equal the number of symbol table entries". |
| # nchain is the second 32-bit integer at the address pointed by |
| # DT_HASH, both for ELF and ELF64 formats. |
| fmt = "<I" if elf.little_endian else ">I" |
| nchain_offset = next(elf.address_offsets(dthash_ptr + 4)) |
| elf.stream.seek(nchain_offset) |
| nsymbols = struct.unpack(fmt, elf.stream.read(4))[0] |
| else: |
| # Get the size of DYNSYM section from section header. |
| symtab_size = int(GetSymbolTableSize(elf)) |
| nsymbols = symtab_size // symbol_size |
| |
| # The first symbol is always local undefined, unnamed so we ignore it. |
| for i in range(1, nsymbols): |
| symbol_offset = symtab_offset + (i * symbol_size) |
| symbol = utils.struct_parse( |
| elf.structs.Elf_Sym, elf.stream, symbol_offset |
| ) |
| if symbol["st_info"]["bind"] == "STB_LOCAL": |
| # Ignore local symbols. |
| continue |
| symbol_name = stringtable.get_string(symbol.st_name) |
| if symbol["st_shndx"] == "SHN_UNDEF": |
| if symbol["st_info"]["bind"] == "STB_GLOBAL": |
| # Global undefined --> required symbols. |
| # We ignore weak undefined symbols. |
| imp.add(symbol_name) |
| elif symbol["st_other"]["visibility"] == "STV_DEFAULT": |
| # Exported symbols must have default visibility. |
| exp.add(symbol_name) |
| |
| return imp, exp |
| |
| |
| def ParseELF( |
| root: Union[str, os.PathLike], rel_path, ldpaths=None, parse_symbols=False |
| ): |
| """Parse the ELF file. |
| |
| Loads and parses the passed elf file. |
| |
| Args: |
| root: Path to the directory where the rootfs is mounted. |
| rel_path: The path to the parsing file relative to root. |
| ldpaths: The dict() with the ld path information. See |
| lddtree.LoadLdpaths() for details. |
| parse_symbols: Whether the result includes the dynamic symbols 'imp_sym' |
| and 'exp_sym' sections. Disabling it reduces the time for large |
| files with many symbols. |
| |
| Returns: |
| None: If the passed file isn't a supported ELF file. |
| dict: Otherwise, contains information about the parsed ELF. |
| """ |
| # TODO(vapier): Convert to Path instead. |
| root = str(root) |
| |
| # Ensure root has a trailing / so removing the root prefix also removes any |
| # / from the beginning of the path. |
| root = root.rstrip("/") + "/" |
| |
| with open(os.path.join(root, rel_path), "rb") as f: |
| if f.read(4) != b"\x7fELF": |
| # Ignore non-ELF files. This check is done to speedup the process. |
| return |
| f.seek(0) |
| # Continue reading and cache the whole file to speedup seeks. |
| stream = io.BytesIO(f.read()) |
| |
| try: |
| elf = elffile.ELFFile(stream) |
| except elffile.ELFError: |
| # Ignore unsupported ELF files. |
| return |
| if elf.header.e_type == "ET_REL": |
| # Don't parse relocatable ELF files (mostly kernel modules). |
| return { |
| "type": elf.header.e_type, |
| "realpath": rel_path, |
| } |
| |
| if ldpaths is None: |
| ldpaths = lddtree.LoadLdpaths(root) |
| |
| result = lddtree.ParseELF( |
| os.path.join(root, rel_path), root=root, ldpaths=ldpaths |
| ) |
| # Convert files to relative paths. |
| for libdef in result["libs"].values(): |
| for path in ("realpath", "path"): |
| if not libdef[path] is None and libdef[path].startswith(root): |
| libdef[path] = libdef[path][len(root) :] |
| |
| for path in ("interp", "realpath"): |
| if not result[path] is None and result[path].startswith(root): |
| # pylint: disable=unsubscriptable-object |
| result[path] = result[path][len(root) :] |
| |
| result["type"] = elf.header.e_type |
| result["sections"] = dict( |
| (str(sec.name), sec["sh_size"]) for sec in elf.iter_sections() |
| ) |
| result["segments"] = set(seg["p_type"] for seg in elf.iter_segments()) |
| |
| # Some libraries (notably, the libc, which you can execute as a normal |
| # binary) have the interp set. We use the file extension in those cases |
| # because exec files shouldn't have a .so extension. |
| result["is_lib"] = ( |
| result["interp"] is None or rel_path[-3:] == ".so" |
| ) and elf.header.e_type == "ET_DYN" |
| |
| if parse_symbols: |
| result["imp_sym"], result["exp_sym"] = ParseELFSymbols(elf) |
| return result |