| # Copyright 2014 The ChromiumOS Authors |
| # Use of this source code is governed by a BSD-style license that can be |
| # found in the LICENSE file. |
| |
| """Script to discover dependencies and other file information from a build. |
| |
| Some files in the image are installed to provide some functionality, such as |
| chrome, shill or bluetoothd provide different functionality that can be |
| present or not on a given build. Many other files are dependencies from these |
| files that need to be present in the image for them to work. These dependencies |
| come from needed shared libraries, executed files and other configuration files |
| read. |
| |
| This script currently discovers dependencies between ELF files for libraries |
| required at load time (libraries loaded by the dynamic linker) but not |
| libraries loaded at runtime with dlopen(). It also computes size and file type |
| in several cases to help understand the contents of the built image. |
| """ |
| |
| import json |
| import logging |
| import multiprocessing |
| import os |
| import stat |
| from typing import Union |
| |
| from chromite.third_party import lddtree |
| |
| from chromite.lib import commandline |
| from chromite.lib import filetype |
| from chromite.lib import parseelf |
| from chromite.lib import portage_util |
| |
| |
| # Regex to parse Gentoo atoms. This should match the following ebuild names, |
| # splitting the package name from the version. |
| # without version: |
| # chromeos-base/tty |
| # chromeos-base/libchrome-271506 |
| # sys-kernel/chromeos-kernel-3_8 |
| # with version: |
| # chromeos-base/tty-0.0.1-r4 |
| # chromeos-base/libchrome-271506-r5 |
| # sys-kernel/chromeos-kernel-3_8-3.8.11-r35 |
| RE_EBUILD_WITHOUT_VERSION = r"^([a-z0-9\-]+/[a-zA-Z0-9\_\+\-]+)$" |
| RE_EBUILD_WITH_VERSION = ( |
| r"^=?([a-z0-9\-]+/[a-zA-Z0-9\_\+\-]+)\-([^\-]+(\-r\d+)?)$" |
| ) |
| |
| |
| def ParseELFWithArgs(args): |
| """Wrapper to parseelf.ParseELF accepting a single arg. |
| |
| This wrapper is required to use multiprocessing.Pool.map function. |
| |
| Returns: |
| A 2-tuple with the passed relative path and the result of ParseELF(). On |
| error, when ParseELF() returns None, this function returns None. |
| """ |
| elf = parseelf.ParseELF(*args) |
| if elf is None: |
| return |
| return args[1], elf |
| |
| |
| class DepTracker: |
| """Tracks dependencies and file information in a root directory. |
| |
| This class computes dependencies and other information related to the files |
| in the root image. |
| """ |
| |
| def __init__(self, root: Union[str, os.PathLike], jobs: int = 1) -> None: |
| # TODO(vapier): Convert this to Path. |
| root = str(root) |
| root_st = os.lstat(root) |
| if not stat.S_ISDIR(root_st.st_mode): |
| raise Exception("root (%s) must be a directory" % root) |
| self._root = root.rstrip("/") + "/" |
| self._file_type_decoder = filetype.FileTypeDecoder(root) |
| |
| # A wrapper to the multiprocess map function. We avoid launching a pool |
| # of processes when jobs is 1 so python exceptions kill the main |
| # process, useful for debugging. |
| if jobs > 1: |
| # Pool is close()d in DepTracker's destructor. |
| # pylint: disable=consider-using-with |
| self._pool = multiprocessing.Pool(jobs) |
| self._imap = self._pool.map |
| else: |
| self._pool = None |
| self._imap = map |
| |
| self._files = {} |
| self._ebuilds = {} |
| |
| # Mapping of rel_paths for symlinks and hardlinks. Hardlinks are assumed |
| # to point to the lowest lexicographically file with the same inode. |
| self._symlinks = {} |
| self._hardlinks = {} |
| |
| def __del__(self) -> None: |
| """Destructor method to free up self._pool resource.""" |
| if self._pool is not None: |
| self._pool.close() |
| |
| def Init(self) -> None: |
| """Generates the initial list of files.""" |
| # First iteration over all the files in root searching for symlinks and |
| # non-regular files. |
| seen_inodes = {} |
| for basepath, _, filenames in sorted(os.walk(self._root)): |
| for filename in sorted(filenames): |
| full_path = os.path.join(basepath, filename) |
| rel_path = full_path[len(self._root) :] |
| st = os.lstat(full_path) |
| |
| file_data = { |
| "size": st.st_size, |
| } |
| self._files[rel_path] = file_data |
| |
| # Track symlinks. |
| if stat.S_ISLNK(st.st_mode): |
| link_path = os.readlink(full_path) |
| # lddtree's normpath handles a little more cases than the |
| # os.path version. In particular, it handles the '//' case. |
| self._symlinks[rel_path] = ( |
| link_path.lstrip("/") |
| if link_path and link_path[0] == "/" |
| else lddtree.normpath( |
| os.path.join(os.path.dirname(rel_path), link_path) |
| ) |
| ) |
| file_data["deps"] = {"symlink": [self._symlinks[rel_path]]} |
| |
| # Track hardlinks. |
| if st.st_ino in seen_inodes: |
| self._hardlinks[rel_path] = seen_inodes[st.st_ino] |
| continue |
| seen_inodes[st.st_ino] = rel_path |
| |
| def SaveJSON(self, filename) -> None: |
| """Save the computed information to a JSON file. |
| |
| Args: |
| filename: The destination JSON file. |
| """ |
| data = { |
| "files": self._files, |
| "ebuilds": self._ebuilds, |
| } |
| with open(filename, "w", encoding="utf-8") as f: |
| json.dump(data, f) |
| |
| def ComputeEbuildDeps(self, sysroot) -> None: |
| """Compute the dependencies between ebuilds and files. |
| |
| Iterates over the list of ebuilds in the database and annotates the |
| files with the ebuilds they are in. For each ebuild installing a file in |
| the root, also compute the direct dependencies. Stores the information |
| internally. |
| |
| Args: |
| sysroot: The path to the sysroot, for example "/build/link". |
| """ |
| portage_db = portage_util.PortageDB(sysroot) |
| if not os.path.exists(portage_db.db_path): |
| logging.warning( |
| "PortageDB directory not found: %s", portage_db.db_path |
| ) |
| return |
| |
| for pkg in portage_db.InstalledPackages(): |
| pkg_files = [] |
| pkg_size = 0 |
| cpf = "%s/%s" % (pkg.category, pkg.pf) |
| for typ, rel_path in pkg.ListContents(): |
| # We ignore other entries like for example "dir". |
| if not typ in (pkg.OBJ, pkg.SYM): |
| continue |
| # We ignore files installed in the SYSROOT that weren't copied |
| # to the image. |
| if not rel_path in self._files: |
| continue |
| pkg_files.append(rel_path) |
| file_data = self._files[rel_path] |
| if "ebuild" in file_data: |
| logging.warning( |
| "Duplicated entry for %s: %s and %s", |
| rel_path, |
| file_data["ebuild"], |
| cpf, |
| ) |
| file_data["ebuild"] = cpf |
| pkg_size += file_data["size"] |
| # Ignore packages that don't install any file. |
| if not pkg_files: |
| continue |
| self._ebuilds[cpf] = { |
| "size": pkg_size, |
| "files": len(pkg_files), |
| "atom": "%s/%s" % (pkg.category, pkg.package), |
| "version": pkg.version, |
| } |
| # TODO(deymo): Parse dependencies between ebuilds. |
| |
| def ComputeELFFileDeps(self) -> None: |
| """Computes the dependencies between files. |
| |
| Computes the dependencies between the files in the root directory passed |
| during construction. The dependencies are inferred for ELF files. |
| The list of dependencies for each file in the passed rootfs as a dict(). |
| The result's keys are the relative path of the files and the value of |
| each file is a list of dependencies. A dependency is a tuple (dep_path, |
| dep_type) where the dep_path is relative path from the passed root to |
| the dependent file and dep_type is one of the following strings stating |
| how the dependency was discovered: |
| 'ldd': The dependent ELF file is listed as needed in the dynamic |
| section. |
| 'symlink': The dependent file is a symlink to the depending. |
| If there are dependencies of a given type whose target file wasn't |
| determined, a tuple (None, dep_type) is included. This is the case for |
| example is a program uses library that wasn't found. |
| """ |
| ldpaths = lddtree.LoadLdpaths(self._root) |
| |
| # First iteration over all the files in root searching for symlinks and |
| # non-regular files. |
| parseelf_args = [] |
| for rel_path, file_data in self._files.items(): |
| if rel_path in self._symlinks or rel_path in self._hardlinks: |
| continue |
| |
| full_path = os.path.join(self._root, rel_path) |
| st = os.lstat(full_path) |
| if not stat.S_ISREG(st.st_mode): |
| continue |
| parseelf_args.append((self._root, rel_path, ldpaths)) |
| |
| # Parallelize the ELF lookup step since it is quite expensive. |
| elfs = dict( |
| x |
| for x in self._imap(ParseELFWithArgs, parseelf_args) |
| if not x is None |
| ) |
| |
| for rel_path, elf in elfs.items(): |
| file_data = self._files[rel_path] |
| # Fill in the ftype if not set yet. We complete this value at this |
| # point to avoid re-parsing the ELF file later. |
| if not "ftype" in file_data: |
| ftype = self._file_type_decoder.GetType(rel_path, elf=elf) |
| if ftype: |
| file_data["ftype"] = ftype |
| |
| file_deps = file_data.get("deps", {}) |
| # Dependencies based on the result of ldd. |
| for lib in elf.get("needed", []): |
| lib_path = elf["libs"][lib]["path"] |
| if not "ldd" in file_deps: |
| file_deps["ldd"] = [] |
| file_deps["ldd"].append(lib_path) |
| |
| if file_deps: |
| file_data["deps"] = file_deps |
| |
| def ComputeFileTypes(self) -> None: |
| """Computes all the missing file type for the files in the root.""" |
| for rel_path, file_data in self._files.items(): |
| if "ftype" in file_data: |
| continue |
| ftype = self._file_type_decoder.GetType(rel_path) |
| if ftype: |
| file_data["ftype"] = ftype |
| |
| |
| def ParseArgs(argv): |
| """Return parsed commandline arguments.""" |
| |
| parser = commandline.ArgumentParser(description=__doc__, jobs=True) |
| parser.add_argument( |
| "--sysroot", |
| type="str_path", |
| metavar="SYSROOT", |
| help="parse portage DB for ebuild information from the provided " |
| "sysroot.", |
| ) |
| parser.add_argument( |
| "--json", type="str_path", help="store information in JSON file." |
| ) |
| |
| parser.add_argument( |
| "root", |
| type="str_path", |
| help="path to the directory where the rootfs is mounted.", |
| ) |
| |
| opts = parser.parse_args(argv) |
| opts.Freeze() |
| return opts |
| |
| |
| def main(argv) -> None: |
| """Main function to start the script.""" |
| opts = ParseArgs(argv) |
| logging.debug("Options are %s", opts) |
| |
| dt = DepTracker(opts.root, jobs=opts.jobs) |
| dt.Init() |
| |
| dt.ComputeELFFileDeps() |
| dt.ComputeFileTypes() |
| |
| if opts.sysroot: |
| dt.ComputeEbuildDeps(opts.sysroot) |
| |
| if opts.json: |
| dt.SaveJSON(opts.json) |