blob: 1191505a6c257348faf317f8997f8df31f283489 [file] [log] [blame]
# Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
# TODO?: recursively look in packages to see if they have license files not
# at their top level.
# FIXME(merlin): remove this after fixing the current code.
# pylint: disable-msg=W0621
"""Script that attempts to generate an HTML file containing license
information and homepage links for all installed packages.
WARNING: this script in its current form is not finished or considered
production quality/code style compliant. This is an intermediate checkin
to allow for incremental cleanups and improvements that will make it
production quality.
For this script to work, you must have built the architecture
this is being run against, _after_ you've last run repo sync.
Otherwise, it will query newer source code and then fail to work on packages
that are out of date in your build.
Recommended build:
export board=x86-alex
sudo rm -rf /build/$board
cd ~/trunk/src/scripts
./setup_board --board=$board
./build_packages --board=$board --nowithautotest --nowithtest --nowithdev
cd ~/trunk/chromite/scripts/license-generation
./ --debug $board out.html | tee output.sav
The script is still experimental. Review at least ERROR output from it.
The output file is meant to update +
(gclient config svn://
For an example CL, see
It is recommended that you use a fancy differ like 'meld' to review license
diffs. GNU diff will show too much irrelevant noise and not resync properly.
UPDATE: gcl will probably fail now, because the file is too big. Before it
gets moved somewhere else, you should just use svn diff and svn commit.
If you don't get this in before the freeze window, it'll need to be merged into
the branch being released, which is done by adding a Merge-Requested label to
Iteration-xx in the tracking bug.
Once it's been updated to "Merge-Approved" by a TPM, please merge into the
required release branch. You can ask karen@ for merge approve help.
import cgi
import logging
import os
import portage
import subprocess
import sys
# This keeps track of whether we have an incomplete license file due to package
# errors during parsing.
# Any non empty list at the end shows the list of packages that caused errors.
EQUERY_BASE = '/usr/local/bin/equery-%s'
os.path.abspath(os.path.join(os.path.dirname(__file__), 'licenses')),
# Virtual packages don't need to have a license and often don't, so we skip them
# chromeos-base contains google platform packages that are covered by the
# general license at top of tree, so we skip those too.
'chromeos-base', # TODO: this shouldn't be excluded.
# Fix these packages by adding a real license in the code.
# You should not skip packages just because the license scraping doesn't
# work. Stick those special cases into PACKAGE_LICENSES.
# Packages should only be here because they are sub/split packages already
# covered by the license of the main package.
# These are Chrome-OS-specific packages, copyright BSD-Google
'sys-kernel/chromeos-kernel', # already manually credit Linux
# These have been split across several packages, so we skip listing the
# individual components (and just list the main package instead).
# Portage metapackage.
# These are covered by app-i18n/ibus-mozc (BSD, copyright Google).
# These are all sub-packages; shouldn't be any need to list them
# individually.
ARCHIVE_SUFFIXES = [ '.tar.gz', '.tgz', '.tar.bz2', '.tbz2' ]
# TODO(merlin): Normalize case in comparison check without mangling the case
# Matching is done in lowercase, you MUST give lowercase names.
'licence', # used by openssh
'license.txt', # used by hdparm, used by NumPy, glew
'ipa_font_license_agreement_v1.0.txt', # used by ja-ipafonts
# FIXME: check whether this should be excluded.
# These are _temporary_ license mappings for packages that do not have a valid
# stock license, or LICENSE file we can use.
# Once this script runs earlier (during the package build process), it will
# block new source without a LICENSE file.
# At that point, new packages will get fixed to include LICENSE instead of
# adding workaround mappings like those below.
# We should also fix the packages listed below so that the hardcoded
# mappings can be obsoleted.
# One off licenses. Should we check in a custom LICENSE file in upstream?
'dev-python/netifaces': ['netiface'],
'net-dialup/ppp': ['ppp-2.4.4'],
'sys-libs/ncurses': ['ncurses'],
# BSD and MIT license authorship mapping.
# Ideally we should have a custom LICENSE file in the upstream source.
# TODO: BSD-2: bsdiff is missing a license file, add one upstream.
'dev-util/bsdiff': ['BSD-bsdiff'],
# TODO: libevent is missing a license file, add one upstream.
'dev-libs/libevent': ['BSD-libevent'],
# TODO: dhcpcd is missing a license file, (c) in README. Add one upstream.
'net-misc/dhcpcd': ['BSD-dhcpcd'],
# TODO: iputils is missing a license file, add one upstream.
'net-misc/iputils': ['BSD-iputils'],
# TODO: c-ares is missing a license file, add one upstream.
'net-dns/c-ares': ['MIT-MIT'],
# TODO: We should just check in a LICENSE file in all of these:
'app-i18n/input-tools': ['BSD-Google'],
'app-i18n/nacl-mozc': ['BSD-Google'],
'app-i18n/ibus-mozc': ['BSD-Google'],
'media-plugins/o3d': ['BSD-Google'],
'dev-python/unittest2': ['BSD-Google'],
# These packages are not in Alex, check and remove later (might be used in
# other platforms).
#'media-libs/freeimage': ['GPL-2'],
#'sys-libs/talloc': ['LGPL-3'], # ebuild incorrectly says GPL-3
#'app-crypt/nss': ['MPL-1.1'],
#'media-libs/jpeg': ['jpeg'],
#'app-editors/gentoo-editor': ['MIT-gentoo-editor'],
# 'media-fonts/font-util': ['font-util'], # COPYING file from git repo
# 'net-wireless/iwl1000-ucode': ['Intel-iwl1000'],
# 'sys-process/vixie-cron': ['vixie-cron'],
'BSD', # requires distribution of copyright notice
'BSD-2',# so does BSD-2
'BSD-3',# and BSD-3 ?
'MIT', # requires distribution of copyright notice
'MIT-with-advertising', # requires distribution of copyright notice
'app-editors/vim': [''],
'x11-proto/glproto': [''],
TEMPLATE_FILE = 'about_credits.tmpl'
ENTRY_TEMPLATE_FILE = 'about_credits_entry.tmpl'
class PackageInfo:
def __init__(self, category=None, name=None, version=None, revision=None):
self.category = category = name
self.version = version
if revision is not None:
revision = str(revision).lstrip('r')
if revision == '0':
revision = None
self.revision = revision
self.description = None
self.homepages = []
self.license_names = []
self.license_text = None
def fullnamerev(self):
s = '%s-%s' % (self.fullname, self.version)
if self.revision:
s += '-r%s' % self.revision
return s
def fullname(self):
return '%s/%s' % (self.category,
def _RunEbuildPhases(self, path, *phases, **kwargs):
"""Receives something like:
path = /mnt/host/source/src/
phases = ['clean', 'fetch'] or ['unpack']."""
#logging.debug('ebuild-%s | %s | %s', board, path, str(list(phases)))
['ebuild-%s' % board, path] + list(phases), **kwargs)
def ExtractLicense(self):
"""Try to get a license from the package by unpacking it with ebuild
and looking for license files in the unpacked tree.
# Some packages have hardcoded licenses and are generated in
# GetPackageInfo, so we skip the license extraction and exit early.
# FIXME, this is wrong: a hardcoded license now should mean that we're
# adding a license to whatever can also be found in the source.
# Here's an example:
# Read licenses from ebuild for net-dialup/ppp-2.4.5-r3: BSD,GPL-2
# We need to both get the substitution file for BSD, and display the GPL
# license.
# The new code will display both the licenses mapped manually, and the ones
# found in the source code.
if self.fullname in PACKAGE_LICENSES:
return False
path = GetEbuildPath(board, self.fullnamerev)
path, 'clean', 'fetch',
stdout=open('/dev/null', 'wb'),
self._RunEbuildPhases(path, 'unpack')
return self._ExtractLicense()
if not debug:
# In debug mode, leave unpacked trees so that we can look for files
# inside them.
self._RunEbuildPhases(path, 'clean')
def _ExtractLicense(self):
"""Scan the unpacked source code for what looks like license files
as defined in LICENSE_FILENAMES.
p = subprocess.Popen(['portageq-%s' % board, 'envvar',
'PORTAGE_TMPDIR'], stdout=subprocess.PIPE)
tmpdir = p.communicate()[0].strip()
ret = p.wait()
if ret != 0:
raise AssertionError('exit code was not 0: got %s' % ret)
# tmpdir gets something like /build/daisy/tmp/
workdir = os.path.join(tmpdir, 'portage', self.fullnamerev, 'work')
args = ['find', workdir + '/', '-maxdepth', '3',
'-mindepth', '1', '-type', 'f']
p = subprocess.Popen(args, stdout=subprocess.PIPE)
files = p.communicate()[0].splitlines()
ret = p.wait()
if ret != 0:
raise AssertionError('exit code was not 0: got %s' % ret)
files = [x[len(workdir):].lstrip('/') for x in files]
licenses = []
for name in files:
if os.path.basename(name).lower() in LICENSE_FILENAMES and \
(name.count('/') == 1 or (name.count('/') == 2 and
name.split('/')[1] == 'doc')):
has_skipped_component = False
# FIXME: Should we really exclude third_party?
if comp in name:
has_skipped_component = True
if not has_skipped_component:
if not licenses:
logging.warn("%s: couldn't find license file in %s",
self.fullnamerev, workdir)
return False
# Examples of multiple license matches:
# dev-lang/swig-2.0.4-r1: swig-2.0.4/COPYRIGHT swig-2.0.4/LICENSE
# dev-libs/glib-2.32.4-r1: glib-2.32.4/COPYING pkg-config-0.26/COPYING
# dev-libs/libnl-3.2.14: libnl-doc-3.2.14/COPYING libnl-3.2.14/COPYING
# dev-libs/libpcre-8.30-r2: pcre-8.30/LICENCE pcre-8.30/COPYING
# dev-libs/libusb-0.1.12-r6: libusb-0.1.12/COPYING libusb-0.1.12/LICENSE
# dev-libs/pyzy-0.1.0-r1: db/COPYING pyzy-0.1.0/COPYING
# net-misc/strongswan-5.0.2-r4: strongswan-5.0.2/COPYING
# strongswan-5.0.2/LICENSE
# sys-process/procps-3.2.8_p11: debian/copyright procps-3.2.8/COPYING'License(s) for %s: %s', self.fullnamerev, ' '.join(licenses))
self.license_text = ""
for license_file in licenses:
logging.debug("Adding license %s:", os.path.join(workdir, license_file))
self.license_text += "Source license %s:\n\n" % license_file
self.license_text += open(os.path.join(workdir, license_file)).read()
self.license_text += "\n\n"
return True
# See if the ebuild file itself contains a license, in case there is no text
# license in the source code.
def GetStockLicense(self):
if not self.license_names:
logging.warning('%s: no stock licenses from ebuild', self.fullnamerev)
return False'%s: using stock license(s) %s',
self.fullnamerev, ','.join(self.license_names))
license_texts = []
for license_name in self.license_names:
logging.debug("looking for license %s for %s", license_name,
license_path = None
for directory in STOCK_LICENSE_DIRS:
path = '%s/%s' % (directory, license_name)
if os.access(path, os.F_OK):
license_path = path
if license_path:'%s: reading license %s', self.fullnamerev, license_path)
license_texts.append("Gentoo Package Provided Stock License %s:" %
# If a package with multiple stock licenses has one that we don't have,
# we report this, but it's ok to continue since we only have to honor/
# repeat one of the licenses. Still, worth looking into just in case.
# sys-apps/hwids currently has a LICENSE field that triggers this:
# LICENSE="|| ( GPL-2 BSD )"
logging.error('%s: stock license %s could not be found in %s',
self.fullnamerev, license_name, '\n'.join(STOCK_LICENSE_DIRS))
if not license_texts:
logging.error('%s: couldn\'t find any stock licenses', self.fullnamerev)
return False
self.license_text = '\n'.join(license_texts)
return True
def ListInstalledPackages(board):
"""Return a list of all packages installed for a particular board."""
# FIXME(merlin): davidjames pointed out that this is
# not the right way to get the package list as it does not apply
# filters. This should change to ~/trunk/src/scripts/get_package_list
args = [EQUERY_BASE % board, 'list', '*']
p = subprocess.Popen(args, stdout=subprocess.PIPE)
return [s.strip() for s in p.stdout.readlines()]
def BuildMetaPackages():
pkgs = []
pkg = PackageInfo('x11-base', 'X.Org', '1.9.3')
pkg.homepages = [ '' ]
pkg.license_names = [ 'X' ]
pkg = PackageInfo('sys-kernel', 'Linux', '2.6')
pkg.homepages = [ '' ]
pkg.license_names = [ 'GPL-2' ]
for pkg in pkgs:
return pkgs
def GetEbuildPath(board, name):
"""Turns (x86-alex, net-misc/wget-1.12) into
p = subprocess.Popen(
['equery-%s' % board, 'which', name], stdout=subprocess.PIPE)
stdout = p.communicate()[0]
path = stdout.strip()
logging.debug("equery-%s which %s", board, name)
logging.debug(" -> %s", path)
if not path:
raise AssertionError('GetEbuildPath for %s failed.\n'
'Is your tree clean? Delete /build/%s and rebuild' %
(name, board))
return path
def GetPackageInfo(fullnamewithrev):
"""Create a PackageInfo object and populate its license, homepage and
description if they are valid.
Returns a package info object if the package isn't in a skip list.
A package without license is returned with incomplete data, a return of
None actually means we don't want to keep track of this package."""
# (category, name, version, revision)
info = PackageInfo(*portage.versions.catpkgsplit(fullnamewithrev))
# The above will error if portage returns Null because you fed a bad package
# name, or forgot to append the version number:
# TypeError: PackageInfo constructor argument after * must be a sequence,
# not NoneType
if info.category in SKIPPED_CATEGORIES:"%s in SKIPPED_CATEGORIES, skip info object creation",
return None
if info.fullname in SKIPPED_PACKAGES:"%s in SKIPPED_PACKAGES, skip info object creation",
return None
ebuild = GetEbuildPath(board, info.fullnamerev)
# FIXME(merlin): Is it ok to just return an unprocessed object if the
# ebuild can't be found? I think not. Consider dying here.
if not os.access(ebuild, os.F_OK):
logging.error("Can't access %s", ebuild)
return info
cmd = [
'/build/%s' % board,
p = subprocess.Popen(cmd, stdout=subprocess.PIPE)
lines = [s.strip() for s in p.stdout.readlines()]
if p.returncode != 0:
raise AssertionError("%s failed" % cmd)
# Runs:
# portageq metadata /build/x86-alex ebuild net-misc/wget-1.12-r2 \
# Returns:
# GPL-3
# Network utility to retrieve files from the WWW
(info.homepages, licenses, info.description) = (
lines[0].split(), lines[1].split(), lines[2:])
# NOTE: the ebuild license field can look like:
# LICENSE="GPL-3 LGPL-3 Apache-2.0" (this means AND, as in all 3)
# for third_party/portage-stable/app-admin/rsyslog/rsyslog-5.8.11.ebuild
# LICENSE="|| ( LGPL-2.1 MPL-1.1 )"
# for third_party/portage-stable/x11-libs/cairo/cairo-1.8.8.ebuild
# LICENSE="Marvell International Ltd."
# for net-wireless/marvell_sd8787/marvell_sd8787-
# The parser does not know the || ( X Y ) OR logic
# It partially ignores the AND logic by skipping BSD licenses
# And it craps out on the Marvel license trying to look for
# 'Marvel' (found), 'International' (not found), 'Ltd' (not found).
# This blows...
# Solution: show all licenses listed, ignore AND/OR and ignore errors
# like not finding an 'Ltd' license (show a warning, but do not die).
# This might skip a copyright attribution for a BSD license that does not
# include its own license file. We can fix on case by case basis with override
# mappings or putting a LICENSE file in the upstream code.
# TODO(merlin): have a list of licenses tokens to ignore to cut down on
# warnings '||' '(' ')' 'International' 'Ltd.'
if info.fullname in PACKAGE_HOMEPAGES:
info.homepages = PACKAGE_HOMEPAGES[info.fullname]
# Packages with missing licenses or licenses that need mapping (like BSD/MIT)
# are hardcoded here:
if info.fullname in PACKAGE_LICENSES:
licenses = PACKAGE_LICENSES[info.fullname]
logging.debug("Static license mapping for %s: %s", info.fullnamerev,
logging.debug("Read licenses from ebuild for %s: %s", info.fullnamerev,
info.license_names = []
for license_name in licenses:
# Licenses like BSD or MIT can't be used as it because they do not contain
# copyright info. They have to be replaced by a custom file generated by us.
if license_name in INVALID_STOCK_LICENSES:
logging.warning('%s: cannot use stock license %s, skipping...',
info.fullnamerev, license_name)
return info
def EvaluateTemplate(template, env, escape=True):
"""Expand a template with variables like {{foo}} using a
dictionary of expansions."""
for key, val in env.iteritems():
if escape:
val = cgi.escape(val)
template = template.replace('{{%s}}' % key, val)
return template
def ProcessPkg(package):
# First, we try to retrieve package and license data from the ebuild, and
# return this in a created info object.
# This will also set static license mappings stored in this script.
info = GetPackageInfo(package)
# None is returned if the package is in a skip list, not if the license is
# invalid or missing.
if not info:
return None
# From that info object, either get a mapped license file as per the
# PACKAGE_LICENSES dict, or retrieve/unpack the source to look for license
# files (scanning recursively by file name).
# Note that finding a license file OVERRIDES any license specified in
# the ebuild.
if (not info.ExtractLicense() and
not info.GetStockLicense()):
# ^^^^^^^^^^^^^^^^^^^^^^
# If no license looking file is found in source archive, use our own copy
# of a stock license file matching what's defined in the ebuild (if any):
%s: unable to find usable license.
Typically this will happen because the ebuild says it's MIT or BSD, but there
was no license file that this script could find to include along with a
copyright attribution (required for BSD/MIT).
Go investigate the unpacked source in /tmp/boardname/tmp/portage/..., and
find which license to assign. Once you found it, add a static mapping to the
If there was a usable license file, you may also want to teach this script to
find it if you have time.""",
return info
if __name__ == '__main__':
debug = False
if len(sys.argv) > 1 and sys.argv[1] == "--debug":
debug = True
logging.basicConfig(format='%(levelname)s:%(message)s', level=logging.DEBUG)
logging.basicConfig(format='%(levelname)s:%(message)s', level=logging.INFO)
if len(sys.argv) != 3:
print >> sys.stderr, (__doc__)
entry_template = open(ENTRY_TEMPLATE_FILE, 'rb').read()
# We have a hardcoded list of skipped packages for various reasons, but we
# also exclude any google platform package from needing a license since they
# are covered by the top license in the tree.
cmd = "cros_workon info --all --host | grep src/platform/ |"\
"awk '{print $1}'"
p = subprocess.Popen(cmd, stdout=subprocess.PIPE, shell=True)
packages = p.communicate()[0].splitlines()
ret = p.wait()
if ret != 0:
raise AssertionError('%s exit code was not 0: got %s' % (cmd, ret))
# FIXME(merlin): we should have proper command line parsing and allow passing
# a single package to generate a license for, as an argument.
board = sys.argv[1]
packages = ListInstalledPackages(board)
# If the caller forgets to set $board, it'll default to beaglebone, and return
# no packages. Catch this and give a hint that the wrong board was given.
if not packages:
raise AssertionError('FATAL: Could not get any packages for board %s' %
# For temporary single package debugging (make sure to include trailing -ver):
#packages = [ "dev-python/unittest2-0.5.1" ]
logging.debug("Package list to work through:")
logging.debug("Will skip these packages:")
infos = filter(None, (ProcessPkg(x) for x in packages))
infos += BuildMetaPackages()
infos.sort(key=lambda x: (, x.version, x.revision))
entries = []
seen_package_names = set()
for info in infos:
if in seen_package_names:
env = {
'url': info.homepages[0] if info.homepages else '',
'license': info.license_text or '',
entries.append(EvaluateTemplate(entry_template, env))
file_template = open(TEMPLATE_FILE, 'rb').read()
out_file = open(sys.argv[2], "w")
{ 'entries': '\n'.join(entries) },
raise AssertionError("""
Some packages are missing due to errors, please look at errors generated during
this run.
List of packages with errors:
""" % '\n'.join(OUTPUT_INCOMPLETE))