blob: 849c968cd8875e35d221c8ac9230c4e7ab55a7ca [file] [log] [blame]
# Copyright 2014 The ChromiumOS Authors
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
"""ELF parsing related helper functions/classes."""
import io
import os
import struct
from typing import Union
from chromite.third_party import lddtree
from chromite.third_party.pyelftools.elftools.common import utils
from chromite.third_party.pyelftools.elftools.elf import elffile
def GetSymbolTableSize(elf):
"""Get Symbole Table size by parsing section header."""
for i in range(elf["e_shnum"]):
# pylint: disable=protected-access
section_header = elf._get_section_header(i)
if section_header["sh_type"] == "SHT_DYNSYM":
return section_header["sh_size"]
return 0
def ParseELFSymbols(elf):
"""Parses list of symbols in an ELF file.
Args:
elf: An elffile.ELFFile instance.
Returns:
A 2-tuple of (imported, exported) symbols, each of which is a set.
"""
imp = set()
exp = set()
if elf.header.e_type not in ("ET_DYN", "ET_EXEC"):
return imp, exp
for segment in elf.iter_segments():
if segment.header.p_type != "PT_DYNAMIC":
continue
# Find strtab and symtab virtual addresses.
symtab_ptr = None
dthash_ptr = None
symbol_size = elf.structs.Elf_Sym.sizeof()
for tag in segment.iter_tags():
if tag.entry.d_tag == "DT_SYMTAB":
symtab_ptr = tag.entry.d_ptr
if tag.entry.d_tag == "DT_SYMENT":
assert symbol_size == tag.entry.d_val
if tag.entry.d_tag == "DT_HASH":
dthash_ptr = tag.entry.d_ptr
stringtable = (
segment._get_stringtable() # pylint: disable=protected-access
)
symtab_offset = next(elf.address_offsets(symtab_ptr))
if dthash_ptr:
# DT_SYMTAB provides no information on the number of symbols table
# entries. Instead, we use DT_HASH's nchain value, which according
# to the spec, "should equal the number of symbol table entries".
# nchain is the second 32-bit integer at the address pointed by
# DT_HASH, both for ELF and ELF64 formats.
fmt = "<I" if elf.little_endian else ">I"
nchain_offset = next(elf.address_offsets(dthash_ptr + 4))
elf.stream.seek(nchain_offset)
nsymbols = struct.unpack(fmt, elf.stream.read(4))[0]
else:
# Get the size of DYNSYM section from section header.
symtab_size = int(GetSymbolTableSize(elf))
nsymbols = symtab_size // symbol_size
# The first symbol is always local undefined, unnamed so we ignore it.
for i in range(1, nsymbols):
symbol_offset = symtab_offset + (i * symbol_size)
symbol = utils.struct_parse(
elf.structs.Elf_Sym, elf.stream, symbol_offset
)
if symbol["st_info"]["bind"] == "STB_LOCAL":
# Ignore local symbols.
continue
symbol_name = stringtable.get_string(symbol.st_name)
if symbol["st_shndx"] == "SHN_UNDEF":
if symbol["st_info"]["bind"] == "STB_GLOBAL":
# Global undefined --> required symbols.
# We ignore weak undefined symbols.
imp.add(symbol_name)
elif symbol["st_other"]["visibility"] == "STV_DEFAULT":
# Exported symbols must have default visibility.
exp.add(symbol_name)
return imp, exp
def ParseELF(
root: Union[str, os.PathLike], rel_path, ldpaths=None, parse_symbols=False
):
"""Parse the ELF file.
Loads and parses the passed elf file.
Args:
root: Path to the directory where the rootfs is mounted.
rel_path: The path to the parsing file relative to root.
ldpaths: The dict() with the ld path information. See
lddtree.LoadLdpaths() for details.
parse_symbols: Whether the result includes the dynamic symbols 'imp_sym'
and 'exp_sym' sections. Disabling it reduces the time for large
files with many symbols.
Returns:
None: If the passed file isn't a supported ELF file.
dict: Otherwise, contains information about the parsed ELF.
"""
# TODO(vapier): Convert to Path instead.
root = str(root)
# Ensure root has a trailing / so removing the root prefix also removes any
# / from the beginning of the path.
root = root.rstrip("/") + "/"
with open(os.path.join(root, rel_path), "rb") as f:
if f.read(4) != b"\x7fELF":
# Ignore non-ELF files. This check is done to speedup the process.
return
f.seek(0)
# Continue reading and cache the whole file to speedup seeks.
stream = io.BytesIO(f.read())
try:
elf = elffile.ELFFile(stream)
except elffile.ELFError:
# Ignore unsupported ELF files.
return
if elf.header.e_type == "ET_REL":
# Don't parse relocatable ELF files (mostly kernel modules).
return {
"type": elf.header.e_type,
"realpath": rel_path,
}
if ldpaths is None:
ldpaths = lddtree.LoadLdpaths(root)
result = lddtree.ParseELF(
os.path.join(root, rel_path), root=root, ldpaths=ldpaths
)
# Convert files to relative paths.
for libdef in result["libs"].values():
for path in ("realpath", "path"):
if not libdef[path] is None and libdef[path].startswith(root):
libdef[path] = libdef[path][len(root) :]
for path in ("interp", "realpath"):
if not result[path] is None and result[path].startswith(root):
# pylint: disable=unsubscriptable-object
result[path] = result[path][len(root) :]
result["type"] = elf.header.e_type
result["sections"] = dict(
(str(sec.name), sec["sh_size"]) for sec in elf.iter_sections()
)
result["segments"] = set(seg["p_type"] for seg in elf.iter_segments())
# Some libraries (notably, the libc, which you can execute as a normal
# binary) have the interp set. We use the file extension in those cases
# because exec files shouldn't have a .so extension.
result["is_lib"] = (
result["interp"] is None or rel_path[-3:] == ".so"
) and elf.header.e_type == "ET_DYN"
if parse_symbols:
result["imp_sym"], result["exp_sym"] = ParseELFSymbols(elf)
return result