| # -*- coding: utf-8 -*- |
| # Copyright 2014 The Chromium OS Authors. All rights reserved. |
| # Use of this source code is governed by a BSD-style license that can be |
| # found in the LICENSE file. |
| |
| """Script to discover dependencies and other file information from a build. |
| |
| Some files in the image are installed to provide some functionality, such as |
| chrome, shill or bluetoothd provide different functionality that can be |
| present or not on a given build. Many other files are dependencies from these |
| files that need to be present in the image for them to work. These dependencies |
| come from needed shared libraries, executed files and other configuration files |
| read. |
| |
| This script currently discovers dependencies between ELF files for libraries |
| required at load time (libraries loaded by the dynamic linker) but not |
| libraries loaded at runtime with dlopen(). It also computes size and file type |
| in several cases to help understand the contents of the built image. |
| """ |
| |
| from __future__ import print_function |
| |
| import itertools |
| import json |
| import multiprocessing |
| import os |
| import stat |
| |
| from chromite.lib import commandline |
| from chromite.lib import cros_logging as logging |
| from chromite.lib import filetype |
| from chromite.lib import parseelf |
| from chromite.lib import portage_util |
| from chromite.scripts import lddtree |
| |
| |
| # Regex to parse Gentoo atoms. This should match the following ebuild names, |
| # splitting the package name from the version. |
| # without version: |
| # chromeos-base/tty |
| # chromeos-base/libchrome-271506 |
| # sys-kernel/chromeos-kernel-3_8 |
| # with version: |
| # chromeos-base/tty-0.0.1-r4 |
| # chromeos-base/libchrome-271506-r5 |
| # sys-kernel/chromeos-kernel-3_8-3.8.11-r35 |
| RE_EBUILD_WITHOUT_VERSION = r'^([a-z0-9\-]+/[a-zA-Z0-9\_\+\-]+)$' |
| RE_EBUILD_WITH_VERSION = ( |
| r'^=?([a-z0-9\-]+/[a-zA-Z0-9\_\+\-]+)\-([^\-]+(\-r\d+)?)$') |
| |
| |
| def ParseELFWithArgs(args): |
| """Wrapper to parseelf.ParseELF accepting a single arg. |
| |
| This wrapper is required to use multiprocessing.Pool.map function. |
| |
| Returns: |
| A 2-tuple with the passed relative path and the result of ParseELF(). On |
| error, when ParseELF() returns None, this function returns None. |
| """ |
| elf = parseelf.ParseELF(*args) |
| if elf is None: |
| return |
| return args[1], elf |
| |
| |
| class DepTracker(object): |
| """Tracks dependencies and file information in a root directory. |
| |
| This class computes dependencies and other information related to the files |
| in the root image. |
| """ |
| |
| def __init__(self, root, jobs=1): |
| root_st = os.lstat(root) |
| if not stat.S_ISDIR(root_st.st_mode): |
| raise Exception('root (%s) must be a directory' % root) |
| self._root = root.rstrip('/') + '/' |
| self._file_type_decoder = filetype.FileTypeDecoder(root) |
| |
| # A wrapper to the multiprocess map function. We avoid launching a pool |
| # of processes when jobs is 1 so python exceptions kill the main process, |
| # useful for debugging. |
| if jobs > 1: |
| self._pool = multiprocessing.Pool(jobs) |
| self._imap = self._pool.map |
| else: |
| self._imap = itertools.imap |
| |
| self._files = {} |
| self._ebuilds = {} |
| |
| # Mapping of rel_paths for symlinks and hardlinks. Hardlinks are assumed |
| # to point to the lowest lexicographically file with the same inode. |
| self._symlinks = {} |
| self._hardlinks = {} |
| |
| def Init(self): |
| """Generates the initial list of files.""" |
| # First iteration over all the files in root searching for symlinks and |
| # non-regular files. |
| seen_inodes = {} |
| for basepath, _, filenames in sorted(os.walk(self._root)): |
| for filename in sorted(filenames): |
| full_path = os.path.join(basepath, filename) |
| rel_path = full_path[len(self._root):] |
| st = os.lstat(full_path) |
| |
| file_data = { |
| 'size': st.st_size, |
| } |
| self._files[rel_path] = file_data |
| |
| # Track symlinks. |
| if stat.S_ISLNK(st.st_mode): |
| link_path = os.readlink(full_path) |
| # lddtree's normpath handles a little more cases than the os.path |
| # version. In particular, it handles the '//' case. |
| self._symlinks[rel_path] = ( |
| link_path.lstrip('/') if link_path and link_path[0] == '/' else |
| lddtree.normpath(os.path.join(os.path.dirname(rel_path), |
| link_path))) |
| file_data['deps'] = { |
| 'symlink': [self._symlinks[rel_path]] |
| } |
| |
| # Track hardlinks. |
| if st.st_ino in seen_inodes: |
| self._hardlinks[rel_path] = seen_inodes[st.st_ino] |
| continue |
| seen_inodes[st.st_ino] = rel_path |
| |
| def SaveJSON(self, filename): |
| """Save the computed information to a JSON file. |
| |
| Args: |
| filename: The destination JSON file. |
| """ |
| data = { |
| 'files': self._files, |
| 'ebuilds': self._ebuilds, |
| } |
| json.dump(data, open(filename, 'w')) |
| |
| def ComputeEbuildDeps(self, sysroot): |
| """Compute the dependencies between ebuilds and files. |
| |
| Iterates over the list of ebuilds in the database and annotates the files |
| with the ebuilds they are in. For each ebuild installing a file in the root, |
| also compute the direct dependencies. Stores the information internally. |
| |
| Args: |
| sysroot: The path to the sysroot, for example "/build/link". |
| """ |
| portage_db = portage_util.PortageDB(sysroot) |
| if not os.path.exists(portage_db.db_path): |
| logging.warning('PortageDB directory not found: %s', portage_db.db_path) |
| return |
| |
| for pkg in portage_db.InstalledPackages(): |
| pkg_files = [] |
| pkg_size = 0 |
| cpf = '%s/%s' % (pkg.category, pkg.pf) |
| for typ, rel_path in pkg.ListContents(): |
| # We ignore other entries like for example "dir". |
| if not typ in (pkg.OBJ, pkg.SYM): |
| continue |
| # We ignore files installed in the SYSROOT that weren't copied to the |
| # image. |
| if not rel_path in self._files: |
| continue |
| pkg_files.append(rel_path) |
| file_data = self._files[rel_path] |
| if 'ebuild' in file_data: |
| logging.warning('Duplicated entry for %s: %s and %s', |
| rel_path, file_data['ebuild'], cpf) |
| file_data['ebuild'] = cpf |
| pkg_size += file_data['size'] |
| # Ignore packages that don't install any file. |
| if not pkg_files: |
| continue |
| self._ebuilds[cpf] = { |
| 'size': pkg_size, |
| 'files': len(pkg_files), |
| 'atom': '%s/%s' % (pkg.category, pkg.package), |
| 'version': pkg.version, |
| } |
| # TODO(deymo): Parse dependencies between ebuilds. |
| |
| def ComputeELFFileDeps(self): |
| """Computes the dependencies between files. |
| |
| Computes the dependencies between the files in the root directory passed |
| during construction. The dependencies are inferred for ELF files. |
| The list of dependencies for each file in the passed rootfs as a dict(). |
| The result's keys are the relative path of the files and the value of each |
| file is a list of dependencies. A dependency is a tuple (dep_path, |
| dep_type) where the dep_path is relative path from the passed root to the |
| dependent file and dep_type is one the following strings stating how the |
| dependency was discovered: |
| 'ldd': The dependent ELF file is listed as needed in the dynamic section. |
| 'symlink': The dependent file is a symlink to the depending. |
| If there are dependencies of a given type whose target file wasn't |
| determined, a tuple (None, dep_type) is included. This is the case for |
| example is a program uses library that wasn't found. |
| """ |
| ldpaths = lddtree.LoadLdpaths(self._root) |
| |
| # First iteration over all the files in root searching for symlinks and |
| # non-regular files. |
| parseelf_args = [] |
| for rel_path, file_data in self._files.iteritems(): |
| if rel_path in self._symlinks or rel_path in self._hardlinks: |
| continue |
| |
| full_path = os.path.join(self._root, rel_path) |
| st = os.lstat(full_path) |
| if not stat.S_ISREG(st.st_mode): |
| continue |
| parseelf_args.append((self._root, rel_path, ldpaths)) |
| |
| # Parallelize the ELF lookup step since it is quite expensive. |
| elfs = dict(x for x in self._imap(ParseELFWithArgs, parseelf_args) |
| if not x is None) |
| |
| for rel_path, elf in elfs.iteritems(): |
| file_data = self._files[rel_path] |
| # Fill in the ftype if not set yet. We complete this value at this point |
| # to avoid re-parsing the ELF file later. |
| if not 'ftype' in file_data: |
| ftype = self._file_type_decoder.GetType(rel_path, elf=elf) |
| if ftype: |
| file_data['ftype'] = ftype |
| |
| file_deps = file_data.get('deps', {}) |
| # Dependencies based on the result of ldd. |
| for lib in elf.get('needed', []): |
| lib_path = elf['libs'][lib]['path'] |
| if not 'ldd' in file_deps: |
| file_deps['ldd'] = [] |
| file_deps['ldd'].append(lib_path) |
| |
| if file_deps: |
| file_data['deps'] = file_deps |
| |
| def ComputeFileTypes(self): |
| """Computes all the missing file type for the files in the root.""" |
| for rel_path, file_data in self._files.iteritems(): |
| if 'ftype' in file_data: |
| continue |
| ftype = self._file_type_decoder.GetType(rel_path) |
| if ftype: |
| file_data['ftype'] = ftype |
| |
| |
| def ParseArgs(argv): |
| """Return parsed commandline arguments.""" |
| |
| parser = commandline.ArgumentParser() |
| parser.add_argument( |
| '-j', '--jobs', type=int, default=multiprocessing.cpu_count(), |
| help='number of simultaneous jobs.') |
| parser.add_argument( |
| '--sysroot', type='path', metavar='SYSROOT', |
| help='parse portage DB for ebuild information from the provided sysroot.') |
| parser.add_argument( |
| '--json', type='path', |
| help='store information in JSON file.') |
| |
| parser.add_argument( |
| 'root', type='path', |
| help='path to the directory where the rootfs is mounted.') |
| |
| opts = parser.parse_args(argv) |
| opts.Freeze() |
| return opts |
| |
| |
| def main(argv): |
| """Main function to start the script.""" |
| opts = ParseArgs(argv) |
| logging.debug('Options are %s', opts) |
| |
| dt = DepTracker(opts.root, jobs=opts.jobs) |
| dt.Init() |
| |
| dt.ComputeELFFileDeps() |
| dt.ComputeFileTypes() |
| |
| if opts.sysroot: |
| dt.ComputeEbuildDeps(opts.sysroot) |
| |
| if opts.json: |
| dt.SaveJSON(opts.json) |