| #!/usr/bin/python |
| # |
| # Copyright (c) 2012 The Chromium OS Authors. All rights reserved. |
| # Use of this source code is governed by a BSD-style license that can be |
| # found in the LICENSE file. |
| # |
| # FIXME(merlin): remove this after fixing the current code. |
| # pylint: disable-msg=W0621 |
| |
| """Script that attempts to generate an HTML file containing license |
| information and homepage links for all installed packages. |
| |
| WARNING: this script in its current form is not finished or considered |
| production quality/code style compliant. This is an intermediate checkin |
| to allow for incremental cleanups and improvements that will make it |
| production quality. |
| |
| Usage: |
| For this script to work, you must have built the architecture |
| this is being run against, _after_ you've last run repo sync. |
| Otherwise, it will query newer source code and then fail to work on packages |
| that are out of date in your build. |
| |
| Recommended build: |
| cros_sdk |
| export board=x86-alex |
| sudo rm -rf /build/$board |
| cd ~/trunk/src/scripts |
| ./setup_board --board=$board |
| ./build_packages --board=$board --nowithautotest --nowithtest --nowithdev |
| cd ~/trunk/chromite/scripts/license-generation |
| ./licenses.py [--debug] $board out.html 2>&1 | tee output.sav |
| |
| For debugging during development, you can get a faster run of just one package |
| with: |
| ./licenses.py --testpkg "dev-libs/libatomic_ops-7.2d" $board out.html |
| |
| The output file is meant to update |
| http://src.chromium.org/viewvc/chrome/trunk/src/chrome/browser/resources/ + |
| chromeos/about_os_credits.html?view=log |
| (gclient config svn://svn.chromium.org/chrome/trunk/src) |
| For an example CL, see https://codereview.chromium.org/13496002/ |
| |
| It is recommended that you use a fancy differ like 'meld' to review license |
| diffs. GNU diff will show too much irrelevant noise and not resync properly. |
| |
| UPDATE: gcl will probably fail now, because the file is too big. Before it |
| gets moved somewhere else, you should just use svn diff and svn commit. |
| |
| Recommended way to diff the html, go outside of the cros_sdk chroot: |
| grep -E -A5 '(class="title|class="homepage|Provided Stock|Source license)' \ |
| out.html > /tmp/new |
| grep -E -A5 '(class="title|class="homepage|Provided Stock|Source license)' \ |
| out-sav-new3.html > /tmp/old |
| meld /tmp/old /tmp/new (or your favourite fancy diff program) |
| |
| If you don't get this in before the freeze window, it'll need to be merged into |
| the branch being released, which is done by adding a Merge-Requested label to |
| Iteration-xx in the tracking bug. |
| Once it's been updated to "Merge-Approved" by a TPM, please merge into the |
| required release branch. You can ask karen@ for merge approve help. |
| Example: http://crbug.com/221281 |
| """ |
| |
| import cgi |
| import codecs |
| import getopt |
| import logging |
| import os |
| import portage |
| import subprocess |
| import sys |
| |
| EQUERY_BASE = '/usr/local/bin/equery-%s' |
| |
| STOCK_LICENSE_DIRS = [ |
| os.path.expanduser('~/trunk/src/third_party/portage/licenses'), |
| os.path.expanduser('~/trunk/src/third_party/portage-stable/licenses'), |
| os.path.abspath(os.path.join(os.path.dirname(__file__), 'licenses')), |
| ] |
| |
| # Virtual packages don't need to have a license and often don't, so we skip them |
| # chromeos-base contains google platform packages that are covered by the |
| # general license at top of tree, so we skip those too. |
| SKIPPED_CATEGORIES = [ |
| 'chromeos-base', # TODO: this shouldn't be excluded ? |
| 'virtual', |
| ] |
| |
| SKIPPED_PACKAGES = [ |
| # Fix these packages by adding a real license in the code. |
| # You should not skip packages just because the license scraping doesn't |
| # work. Stick those special cases into PACKAGE_LICENSES. |
| # Packages should only be here because they are sub/split packages already |
| # covered by the license of the main package. |
| |
| # These are Chrome-OS-specific packages, copyright BSD-Google |
| 'sys-kernel/chromeos-kernel', # already manually credit Linux |
| |
| # These have been split across several packages, so we skip listing the |
| # individual components (and just list the main package instead). |
| 'app-editors/vim-core', |
| 'x11-apps/mesa-progs', |
| |
| # Portage metapackage. |
| 'x11-base/xorg-drivers', |
| |
| # These are covered by app-i18n/ibus-mozc (BSD, copyright Google). |
| 'app-i18n/ibus-mozc-chewing', |
| 'app-i18n/ibus-mozc-hangul', |
| 'app-i18n/ibus-mozc-pinyin', |
| |
| # Those have License: Proprietary in the ebuild. |
| 'app-i18n/GoogleChineseInput-cangjie', |
| 'app-i18n/GoogleChineseInput-pinyin', |
| 'app-i18n/GoogleChineseInput-wubi', |
| 'app-i18n/GoogleChineseInput-zhuyin', |
| 'app-i18n/GoogleKoreanInput', |
| |
| # These are all X.org sub-packages; shouldn't be any need to list them |
| # individually. |
| 'media-fonts/encodings', |
| 'x11-apps/iceauth', |
| 'x11-apps/intel-gpu-tools', |
| 'x11-apps/mkfontdir', |
| 'x11-apps/rgb', |
| 'x11-apps/setxkbmap', |
| 'x11-apps/xauth', |
| 'x11-apps/xcursorgen', |
| 'x11-apps/xdpyinfo', |
| 'x11-apps/xdriinfo', |
| 'x11-apps/xev', |
| 'x11-apps/xgamma', |
| 'x11-apps/xhost', |
| 'x11-apps/xinit', |
| 'x11-apps/xinput', |
| 'x11-apps/xkbcomp', |
| 'x11-apps/xlsatoms', |
| 'x11-apps/xlsclients', |
| 'x11-apps/xmodmap', |
| 'x11-apps/xprop', |
| 'x11-apps/xrandr', |
| 'x11-apps/xrdb', |
| 'x11-apps/xset', |
| 'x11-apps/xset-mini', |
| 'x11-apps/xwininfo', |
| 'x11-base/xorg-server', |
| 'x11-drivers/xf86-input-evdev', |
| 'x11-drivers/xf86-input-keyboard', |
| 'x11-drivers/xf86-input-mouse', |
| 'x11-drivers/xf86-input-synaptics', |
| 'x11-drivers/xf86-video-intel', |
| 'x11-drivers/xf86-video-vesa', |
| 'x11-drivers/xf86-video-vmware', |
| 'x11-libs/libICE', |
| 'x11-libs/libSM', |
| 'x11-libs/libX11', |
| 'x11-libs/libXScrnSaver', |
| 'x11-libs/libXau', |
| 'x11-libs/libXcomposite', |
| 'x11-libs/libXcursor', |
| 'x11-libs/libXdamage', |
| 'x11-libs/libXdmcp', |
| 'x11-libs/libXext', |
| 'x11-libs/libXfixes', |
| 'x11-libs/libXfont', |
| 'x11-libs/libXfontcache', |
| 'x11-libs/libXft', |
| 'x11-libs/libXi', |
| 'x11-libs/libXinerama', |
| 'x11-libs/libXmu', |
| 'x11-libs/libXp', |
| 'x11-libs/libXrandr', |
| 'x11-libs/libXrender', |
| 'x11-libs/libXres', |
| 'x11-libs/libXt', |
| 'x11-libs/libXtst', |
| 'x11-libs/libXv', |
| 'x11-libs/libXvMC', |
| 'x11-libs/libXxf86vm', |
| 'x11-libs/libdrm', |
| 'x11-libs/libfontenc', |
| 'x11-libs/libpciaccess', |
| 'x11-libs/libxkbfile', |
| 'x11-libs/libxkbui', |
| 'x11-libs/pixman', |
| 'x11-libs/xtrans', |
| 'x11-misc/util-macros', |
| 'x11-misc/xbitmaps', |
| 'x11-proto/bigreqsproto', |
| 'x11-proto/compositeproto', |
| 'x11-proto/damageproto', |
| 'x11-proto/dri2proto', |
| 'x11-proto/fixesproto', |
| 'x11-proto/fontcacheproto', |
| 'x11-proto/fontsproto', |
| 'x11-proto/inputproto', |
| 'x11-proto/kbproto', |
| 'x11-proto/printproto', |
| 'x11-proto/randrproto', |
| 'x11-proto/recordproto', |
| 'x11-proto/renderproto', |
| 'x11-proto/resourceproto', |
| 'x11-proto/scrnsaverproto', |
| 'x11-proto/trapproto', |
| 'x11-proto/videoproto', |
| 'x11-proto/xcmiscproto', |
| 'x11-proto/xextproto', |
| 'x11-proto/xf86bigfontproto', |
| 'x11-proto/xf86dgaproto', |
| 'x11-proto/xf86driproto', |
| 'x11-proto/xf86rushproto', |
| 'x11-proto/xf86vidmodeproto', |
| 'x11-proto/xineramaproto', |
| 'x11-proto/xproto', |
| ] |
| |
| # TODO(merlin): replace matching with regex matching to simplify this |
| # Matching is done in lowercase, you MUST give lowercase names. |
| LICENSE_FILENAMES = [ |
| 'copying', |
| 'copyright', |
| 'licence', # used by openssh |
| 'license', |
| 'license.txt', # used by hdparm, used by NumPy, glew |
| 'licensing.txt', # used by libatomic_ops |
| 'copyright', |
| 'ipa_font_license_agreement_v1.0.txt', # used by ja-ipafonts |
| ] |
| |
| SKIPPED_LICENSE_FILENAME_COMPONENTS = [ |
| # FIXME: check whether this should be excluded. |
| 'third_party' |
| ] |
| |
| # These are _temporary_ license mappings for packages that do not have a valid |
| # stock license, or LICENSE file we can use. |
| # Once this script runs earlier (during the package build process), it will |
| # block new source without a LICENSE file if the ebuild contains a license |
| # that requires copyright assignment (BSD and friends). |
| # At that point, new packages will get fixed to include LICENSE instead of |
| # adding workaround mappings like those below. |
| # We should also fix the packages listed below so that the hardcoded |
| # mappings can be obsoleted (i.e. FIXME for this entire list). |
| PACKAGE_LICENSES = { |
| # One off licenses. Should we check in a custom LICENSE file in upstream? |
| 'dev-python/netifaces': ['netiface'], |
| 'net-dialup/ppp': ['ppp-2.4.4'], |
| 'sys-libs/ncurses': ['ncurses'], |
| |
| # BSD and MIT license authorship mapping. |
| # Ideally we should have a custom LICENSE file in the upstream source. |
| # TODO: BSD-2: bsdiff is missing a license file, add one upstream. |
| 'dev-util/bsdiff': ['BSD-bsdiff'], |
| # TODO: libevent is missing a license file, add one upstream. |
| 'dev-libs/libevent': ['BSD-libevent'], |
| # TODO: dhcpcd is missing a license file, (c) in README. Add one upstream. |
| 'net-misc/dhcpcd': ['BSD-dhcpcd'], |
| # TODO: iputils is missing a license file, add one upstream. |
| 'net-misc/iputils': ['BSD-iputils'], |
| # TODO: c-ares is missing a license file, add one upstream. |
| 'net-dns/c-ares': ['MIT-MIT'], |
| |
| # TODO: We should just check in a LICENSE file in all of these: |
| 'app-i18n/input-tools': ['BSD-Google'], |
| 'app-i18n/nacl-mozc': ['BSD-Google'], |
| 'app-i18n/ibus-mozc': ['BSD-Google'], |
| 'media-plugins/o3d': ['BSD-Google'], |
| 'dev-python/unittest2': ['BSD-Google'], |
| |
| # Fix ebuild multi license definitions when they define licenses that do |
| # not apply to us because we don't use the resulting binaries. |
| |
| # Mesa ebuild says MIT and seems to omit LGPL-3 and SGI-B-2.0 mentioned in the |
| # docs directory? Either way, I had to create a text license file like so: |
| # mesa-9.1-r9/work/Mesa-9.1/docs$ lynx --dump license.html -nolist > license |
| 'media-libs/mesa': [ 'MIT-Mesa', 'LGPL-3','SGI-B-2.0' ], |
| |
| # TODO: Ebuild seems to wrongfully say BSD + public-domain. |
| # I scanned the unpacked source with licensecheck and didn't find any BSD. |
| # FIXME: Do a second review and fix upstream gentoo package |
| 'sys-libs/timezone-data': [ 'public-domain' ], |
| |
| # Ebuild only says 'LGPL-2.1', but source disagrees. I'll include 'as-is' |
| # to force reading files from the source (which states some parts are as-is). |
| # FIXME? Should the ebuild license be updated to match xz-4.999.9beta/COPYING? |
| 'app-arch/xz-utils': [ 'public-domain', 'as-is', 'LGPL-2.1', 'GPL-2' ], |
| |
| # These packages are not in Alex, check and remove later (might be used in |
| # other platforms). |
| #'media-libs/freeimage': ['GPL-2'], |
| #'sys-libs/talloc': ['LGPL-3'], # ebuild incorrectly says GPL-3 |
| #'app-crypt/nss': ['MPL-1.1'], |
| #'media-libs/jpeg': ['jpeg'], |
| #'app-editors/gentoo-editor': ['MIT-gentoo-editor'], |
| # |
| # 'media-fonts/font-util': ['font-util'], # COPYING file from git repo |
| # 'net-wireless/iwl1000-ucode': ['Intel-iwl1000'], |
| # 'sys-process/vixie-cron': ['vixie-cron'], |
| } |
| |
| # Any license listed list here found in the ebuild will make the code look for |
| # license files inside the package source code in order to get copyright |
| # attribution from them. |
| COPYRIGHT_ATTRIBUTION_LICENSES = [ |
| 'BSD', # requires distribution of copyright notice |
| 'BSD-2', # so does BSD-2 http://opensource.org/licenses/BSD-2-Clause |
| 'BSD-3', # and BSD-3? http://opensource.org/licenses/BSD-3-Clause |
| 'BSD-4', # and 4? |
| 'BSD-with-attribution', |
| 'Old-MIT', |
| 'MIT', |
| 'MIT-with-advertising', |
| ] |
| |
| # The following licenses are not invalid or to show as a less helpful stock |
| # license, but it's better to look in the source code for a more specific |
| # license if there is one, but not an error if no better one is found. |
| # Note that you don't want to set just anything here since any license here |
| # will be included once in stock form and a second time in custom form if |
| # found (there is no good way to know that a license we found on disk is the |
| # better version of the stock version, so we show both). |
| LOOK_IN_SOURCE_LICENSES = [ |
| 'as-is', # The stock license is very vague, source always has more details. |
| 'PSF-2', # The custom license in python is more complete than the template. |
| |
| # As far as I know, we have no requirement to do copyright attribution for |
| # these licenses, but it's simple and reliable to do it, so go for it. |
| 'BZIP2', # Single use license, do copyright attribution because it's easy. |
| 'OFL', # Almost single use license, do copyright attribution. |
| 'OFL-1.1', # Almost single use license, do copyright attribution. |
| 'UoI-NCSA', # Only used by NSCA, might as well show their custom copyright. |
| ] |
| |
| PACKAGE_HOMEPAGES = { |
| 'app-editors/vim': ['http://www.vim.org/'], |
| 'x11-proto/glproto': ['http://www.x.org/'], |
| } |
| |
| # These are tokens found in LICENSE= in an ebuild that aren't licenses we |
| # can actually read from disk. |
| # You should not use this to blacklist real licenses. |
| LICENCES_IGNORE = [ |
| ')', # Ignore OR tokens from LICENSE="|| ( LGPL-2.1 MPL-1.1 )" |
| '(', |
| '||', |
| 'International', # Workaround for LICENSE="Marvell International Ltd." |
| 'Ltd.', # Find Marvell and ignore the other 2 tokens (FIXME upstream) |
| ] |
| |
| TEMPLATE_FILE = 'about_credits.tmpl' |
| ENTRY_TEMPLATE_FILE = 'about_credits_entry.tmpl' |
| |
| class PackageLicenseError(Exception): |
| """This gets thrown any time something fails while getting license information |
| for a package. This will cause the processing to error in the end.""" |
| |
| class PackageSkipped(Exception): |
| """This is a non error to exclude packages from license processing.""" |
| |
| class PackageInfo: |
| def __init__(self): |
| |
| self.revision = None |
| |
| # Array of scanned license texts. |
| self.license_text_scanned = [] |
| |
| self.category = None |
| self.name = None |
| self.version = None |
| |
| # Array of license names retrieved from ebuild or override in this code. |
| self.ebuild_license_names = [] |
| self.description = None |
| self.homepages = [] |
| # This contains stock licenses names we can read from Gentoo. |
| self.stock_license_names = [] |
| |
| # We set this if the ebuild has a BSD/MIT like license that requires |
| # scanning for a LICENSE file in the source code, or a static mapping |
| # in PACKAGE_LICENSES. Not finding one once this is set, is fatal. |
| self.need_copyright_attribution = False |
| # This flag just says we'd like to include licenses from the source, but |
| # not finding any is not fatal. |
| self.scan_source_for_licenses = False |
| |
| @property |
| def fullnamerev(self): |
| s = '%s-%s' % (self.fullname, self.version) |
| if self.revision: |
| s += '-r%s' % self.revision |
| return s |
| |
| @property |
| def fullname(self): |
| return '%s/%s' % (self.category, self.name) |
| |
| def _RunEbuildPhases(self, path, *phases, **kwargs): |
| """Receives something like: |
| path = /mnt/host/source/src/ |
| third_party/portage-stable/net-misc/rsync/rsync-3.0.8.ebuild |
| phases = ['clean', 'fetch'] or ['unpack'].""" |
| |
| #logging.debug('ebuild-%s | %s | %s', board, path, str(list(phases))) |
| subprocess.check_call( |
| ['ebuild-%s' % board, path] + list(phases), **kwargs) |
| |
| def _ExtractLicenses(self): |
| """Try to get licenses from the package by unpacking it with ebuild |
| and looking for license files in the unpacked tree. |
| This is only called if we couldn't get usable licenses from the ebuild, |
| or one of them is BSD/MIT like which forces us to look for a file with |
| copyright attribution in the source code itself. |
| It'll scan the unpacked source code for what looks like license files |
| as defined in LICENSE_FILENAMES. |
| """ |
| |
| path = self._GetEbuildPath(board, self.fullnamerev) |
| self._RunEbuildPhases( |
| path, 'clean', 'fetch', |
| stdout=open('/dev/null', 'wb'), |
| stderr=subprocess.STDOUT) |
| self._RunEbuildPhases(path, 'unpack') |
| |
| p = subprocess.Popen(['portageq-%s' % board, 'envvar', |
| 'PORTAGE_TMPDIR'], stdout=subprocess.PIPE) |
| tmpdir = p.communicate()[0].strip() |
| ret = p.wait() |
| if ret != 0: |
| raise AssertionError('exit code was not 0: got %s' % ret) |
| |
| # tmpdir gets something like /build/daisy/tmp/ |
| workdir = os.path.join(tmpdir, 'portage', self.fullnamerev, 'work') |
| |
| # You may wonder how deep should we go? |
| # In case of packages with sub-packages, it could be deep. |
| # Let's just be safe and get everything we can find. |
| # In the case of libatomic_ops, it's actually required to look deep |
| # to find the MIT license: |
| # dev-libs/libatomic_ops-7.2d/work/gc-7.2/libatomic_ops/doc/LICENSING.txt |
| args = ['find', workdir, '-type', 'f'] |
| p = subprocess.Popen(args, stdout=subprocess.PIPE) |
| files = p.communicate()[0].splitlines() |
| ret = p.wait() |
| if ret != 0: |
| raise AssertionError('exit code was not 0: got %s' % ret) |
| |
| files = [x[len(workdir):].lstrip('/') for x in files] |
| license_files = [] |
| for name in files: |
| if os.path.basename(name).lower() in LICENSE_FILENAMES: |
| has_skipped_component = False |
| # FIXME: Should we really exclude third_party? |
| # (someone coded it that way with no comments as to why). |
| for comp in SKIPPED_LICENSE_FILENAME_COMPONENTS: |
| if comp in name: |
| has_skipped_component = True |
| break |
| if not has_skipped_component: |
| license_files.append(name) |
| |
| if not license_files: |
| if self.need_copyright_attribution: |
| logging.error("%s used license with copyright attribution, but " |
| "couldn't find license file in %s", |
| self.fullnamerev, workdir) |
| raise PackageLicenseError() |
| else: |
| # We can get called for a license like as-is where it's preferable |
| # to find a better one in the source, but not fatal if we didn't. |
| logging.info("Was not able to find a better license for %s " |
| "in %s to replace the more generic one from ebuild", |
| self.fullnamerev, workdir) |
| |
| # Examples of multiple license matches: |
| # dev-lang/swig-2.0.4-r1: swig-2.0.4/COPYRIGHT swig-2.0.4/LICENSE |
| # dev-libs/glib-2.32.4-r1: glib-2.32.4/COPYING pkg-config-0.26/COPYING |
| # dev-libs/libnl-3.2.14: libnl-doc-3.2.14/COPYING libnl-3.2.14/COPYING |
| # dev-libs/libpcre-8.30-r2: pcre-8.30/LICENCE pcre-8.30/COPYING |
| # dev-libs/libusb-0.1.12-r6: libusb-0.1.12/COPYING libusb-0.1.12/LICENSE |
| # dev-libs/pyzy-0.1.0-r1: db/COPYING pyzy-0.1.0/COPYING |
| # net-misc/strongswan-5.0.2-r4: strongswan-5.0.2/COPYING |
| # strongswan-5.0.2/LICENSE |
| # sys-process/procps-3.2.8_p11: debian/copyright procps-3.2.8/COPYING |
| logging.info('License(s) for %s: %s', self.fullnamerev, |
| ' '.join(license_files)) |
| for license_file in sorted(license_files): |
| # Joy and pink ponies. Some license_files are encoded as latin1 while |
| # others are utf-8 and of course you can't know but only guess. |
| license_path = os.path.join(workdir, license_file) |
| try: |
| license_txt = codecs.open(license_path, encoding="utf-8").read() |
| logging.info("Adding license %s: (guessed UTF-8)", license_path) |
| except UnicodeDecodeError: |
| license_txt = codecs.open(license_path, encoding="latin1").read() |
| logging.info("Adding license %s: (guessed latin1)", license_path) |
| |
| self.license_text_scanned += [ |
| "Scanned Source license %s:\n\n%s" % (license_file, license_txt) ] |
| |
| # We used to clean up here, but there have been many instances where |
| # looking at unpacked source to see where the licenses were, was useful |
| # so let's disable this for now |
| #self._RunEbuildPhases(path, 'clean') |
| |
| def _GetEbuildPath(self, board, name): |
| """Turns (x86-alex, net-misc/wget-1.12) into |
| /mnt/host/source/src/third_party/portage-stable/net-misc/wget/wget-1.12.ebui |
| """ |
| p = subprocess.Popen( |
| ['equery-%s' % board, 'which', name], stdout=subprocess.PIPE) |
| stdout = p.communicate()[0] |
| p.wait() |
| path = stdout.strip() |
| logging.debug("equery-%s which %s", board, name) |
| logging.debug(" -> %s", path) |
| if not path: |
| raise AssertionError('GetEbuildPath for %s failed.\n' |
| 'Is your tree clean? Delete /build/%s and rebuild' % |
| (name, board)) |
| return path |
| |
| def _GetPackageInfo(self, fullnamewithrev): |
| """Create a PackageInfo object and populate its license, homepage and |
| description if they are valid. |
| Some packages have static license mappings applied to them. |
| |
| self.ebuild_license_names will not be filled the package is skipped |
| or if there was an issue getting data from the ebuild. |
| self.stock_license_names will only get the licenses that we can paste |
| as stock licenses and scan_source_for_licenses will be set if we should |
| unpack the source to look for licenses (need_copyright_attribution will |
| make not finding one fatal later). |
| """ |
| |
| try: |
| self.category, self.name, self.version, self.revision = \ |
| portage.versions.catpkgsplit(fullnamewithrev) |
| except TypeError: |
| raise AssertionError("portage couldn't find %s, missing version number?" % |
| fullnamewithrev) |
| |
| if self.revision is not None: |
| self.revision = str(self.revision).lstrip('r') |
| if self.revision == '0': |
| self.revision = None |
| |
| if self.category in SKIPPED_CATEGORIES: |
| raise PackageSkipped("%s in SKIPPED_CATEGORIES, skip package" % |
| self.fullname) |
| |
| if self.fullname in SKIPPED_PACKAGES: |
| raise PackageSkipped("%s in SKIPPED_PACKAGES, skip package" % |
| self.fullname) |
| |
| ebuild = self._GetEbuildPath(board, self.fullnamerev) |
| |
| if not os.access(ebuild, os.F_OK): |
| logging.error("Can't access %s", ebuild) |
| raise PackageLicenseError() |
| |
| cmd = [ |
| 'portageq', |
| 'metadata', |
| '/build/%s' % board, |
| 'ebuild', |
| self.fullnamerev, |
| 'HOMEPAGE', 'LICENSE', 'DESCRIPTION', |
| ] |
| p = subprocess.Popen(cmd, stdout=subprocess.PIPE) |
| lines = [s.strip() for s in p.stdout.readlines()] |
| p.wait() |
| |
| if p.returncode != 0: |
| raise AssertionError("%s failed" % cmd) |
| |
| # Runs: |
| # portageq metadata /build/x86-alex ebuild net-misc/wget-1.12-r2 \ |
| # HOMEPAGE LICENSE DESCRIPTION |
| # Returns: |
| # http://www.gnu.org/software/wget/ |
| # GPL-3 |
| # Network utility to retrieve files from the WWW |
| |
| (self.homepages, self.ebuild_license_names, self.description) = ( |
| lines[0].split(), lines[1].split(), lines[2:]) |
| |
| if self.fullname in PACKAGE_HOMEPAGES: |
| self.homepages = PACKAGE_HOMEPAGES[self.fullname] |
| |
| # Packages with missing licenses or licenses that need mapping (like |
| # BSD/MIT) are hardcoded here: |
| if self.fullname in PACKAGE_LICENSES: |
| self.ebuild_license_names = PACKAGE_LICENSES[self.fullname] |
| logging.info("Static license mapping for %s: %s", self.fullnamerev, |
| ",".join(self.ebuild_license_names)) |
| else: |
| logging.info("Read licenses from ebuild for %s: %s", self.fullnamerev, |
| ",".join(self.ebuild_license_names)) |
| |
| |
| def GetLicenses(self, fullnamewithrev): |
| """After populating the package info and stock licenses, this figures |
| out whether the package source should be scanned to add licenses found |
| there. |
| |
| Raises: PackageSkipped, PackageLicenseErrorCall (via _GetPackageInfo). |
| """ |
| |
| # First populate the package basic information |
| self._GetPackageInfo(fullnamewithrev) |
| |
| # The ebuild license field can look like: |
| # LICENSE="GPL-3 LGPL-3 Apache-2.0" (this means AND, as in all 3) |
| # for third_party/portage-stable/app-admin/rsyslog/rsyslog-5.8.11.ebuild |
| # LICENSE="|| ( LGPL-2.1 MPL-1.1 )" |
| # for third_party/portage-stable/x11-libs/cairo/cairo-1.8.8.ebuild |
| # LICENSE="Marvell International Ltd." <- invalid syntax to fix upstream |
| # for net-wireless/marvell_sd8787/marvell_sd8787-14.64.2.47-r16.ebuild |
| |
| # The parser isn't very smart and only has basic support for the |
| # || ( X Y ) OR logic to do the following: |
| # In order to save time needlessly unpacking packages and looking or a |
| # cleartext license (which is really a crapshoot), if we have a license |
| # like BSD that requires looking for copyright attribution, but we can |
| # chose another license like GPL, we do that. |
| |
| if not self.ebuild_license_names: |
| logging.error("%s: no license found in ebuild. FIXME!", self.fullnamerev) |
| # In a bind, you could comment this out. I'm making the output fail to |
| # get your attention since this error really should be fixed, but if you |
| # comment out the next line, the script will try to find a license inside |
| # the source. |
| raise PackageLicenseError() |
| |
| self.ebuild_license_names = self.ebuild_license_names |
| |
| # This is not invalid, but the parser can't deal with it, so if it ever |
| # happens, error out to tell the programmer to do something. |
| if "||" in self.ebuild_license_names[1:]: |
| raise AssertionError("%s: Can't parse || in the middle of a license: %s" |
| % (self.fullnamerev, |
| ' '.join(self.ebuild_license_names))) |
| |
| self.stock_license_names = [] |
| or_licenses_and_one_is_no_attribution = False |
| # We do a quick early pass first so that the longer pass below can |
| # run accordingly. |
| for license_name in [ x for x in self.ebuild_license_names |
| if x not in LICENCES_IGNORE ]: |
| # Here we have an OR case, and one license that we can use stock, so |
| # we remember that in order to be able to skip license attributions if |
| # any were in the OR. |
| if (self.ebuild_license_names[0] == "||" and |
| license_name not in COPYRIGHT_ATTRIBUTION_LICENSES): |
| or_licenses_and_one_is_no_attribution = True |
| |
| for license_name in [ x for x in self.ebuild_license_names |
| if x not in LICENCES_IGNORE ]: |
| # Licenses like BSD or MIT can't be used as is because they do not contain |
| # copyright self. They have to be replaced by copyright file given in the |
| # source code, or manually mapped by us in PACKAGE_LICENSES |
| if license_name in COPYRIGHT_ATTRIBUTION_LICENSES: |
| # To limit needless efforts, if a package is BSD or GPL, we ignore BSD |
| # and use GPL to avoid scanning the package, but we can only do this if |
| # or_licenses_and_one_is_no_attribution has been set above. |
| # This ensures that if we have License: || (BSD3 BSD4), we will |
| # look in the source. |
| if or_licenses_and_one_is_no_attribution: |
| logging.info("%s: ignore license %s because ebuild LICENSES had %s", |
| self.fullnamerev, license_name, |
| ' '.join(self.ebuild_license_names)) |
| else: |
| logging.info("%s: can't use %s, will scan source code for copyright", |
| self.fullnamerev, license_name) |
| self.need_copyright_attribution = True |
| self.scan_source_for_licenses = True |
| else: |
| self.stock_license_names.append(license_name) |
| # We can't display just 2+ because it only contains text that says to |
| # read v2 or v3. |
| if license_name == 'GPL-2+': |
| self.stock_license_names.append('GPL-2') |
| if license_name == 'LGPL-2+': |
| self.stock_license_names.append('LGPL-2') |
| |
| if license_name in LOOK_IN_SOURCE_LICENSES: |
| logging.info("%s: Got %s, will try to find better license in source...", |
| self.fullnamerev, license_name) |
| self.scan_source_for_licenses = True |
| if self.stock_license_names: |
| logging.info('%s: using stock license(s) %s', |
| self.fullnamerev, ','.join(self.stock_license_names)) |
| |
| # If the license(s) could not be found, or one requires copyright |
| # attribution, dig in the source code for license files: |
| # For instance: |
| # Read licenses from ebuild for net-dialup/ppp-2.4.5-r3: BSD,GPL-2 |
| # We need get the substitution file for BSD and add it to GPL. |
| if not self.stock_license_names or self.scan_source_for_licenses: |
| self._ExtractLicenses() |
| |
| if not self.stock_license_names and not self.license_text_scanned: |
| logging.error(""" |
| %s: unable to find usable license. |
| Typically this will happen because the ebuild says it's MIT or BSD, but there |
| was no license file that this script could find to include along with a |
| copyright attribution (required for BSD/MIT). |
| Go investigate the unpacked source in /tmp/boardname/tmp/portage/..., and |
| find which license to assign. Once you found it, add a static mapping to the |
| PACKAGE_LICENSES dict if that license is not in a file, or teach this script |
| to find the license file.""", |
| self.fullname) |
| raise PackageLicenseError() |
| |
| |
| class Licensing: |
| def __init__(self, package_fullnames, |
| entry_template_file=ENTRY_TEMPLATE_FILE): |
| |
| # List of stock licenses referenced in ebuilds. Used to print a report. |
| self.stock_licenses = {} |
| |
| # This keeps track of whether we have an incomplete license file due to |
| # package errors during parsing. |
| # Any non empty list at the end shows the list of packages that caused |
| # errors. |
| self.incomplete_packages = [] |
| |
| self.package_text = {} |
| self.entry_template = codecs.open(entry_template_file, mode='rb', |
| encoding="utf-8").read() |
| self.packages = [ ] |
| self._package_fullnames = package_fullnames |
| |
| @property |
| def sorted_stock_licenses(self): |
| return sorted(self.stock_licenses.keys()) |
| |
| def LicensedPackages(self, license_name): |
| '''Return list of packages using a given license.''' |
| return self.stock_licenses[license_name] |
| |
| def ProcessPackages(self): |
| """Do not call this after adding virtual packages with AddExtraPkg.""" |
| for package in self._package_fullnames: |
| pkg = PackageInfo() |
| try: |
| pkg.GetLicenses(package) |
| # Keep track of which stock licenses are used by which packages. |
| for stock_license in pkg.stock_license_names: |
| try: |
| self.stock_licenses[stock_license] += [ pkg.fullnamerev ] |
| except KeyError: |
| self.stock_licenses[stock_license] = [ pkg.fullnamerev ] |
| self.packages += [ pkg ] |
| except PackageSkipped, e: |
| logging.info(e) |
| except PackageLicenseError: |
| self.incomplete_packages += [ pkg.fullnamerev ] |
| |
| def AddExtraPkg(self, pkg_data): |
| """Allow adding pre-created virtual packages. GetLicenses will not work |
| on them, so add them after having run ProcessPackages.""" |
| pkg = PackageInfo() |
| pkg.category = pkg_data[0] |
| pkg.name = pkg_data[1] |
| pkg.version = pkg_data[2] |
| pkg.homepages = pkg_data[3] # this is a list |
| pkg.stock_license_names = pkg_data[4] # this is also a list |
| pkg.ebuild_license_names = pkg_data[4] |
| self.packages += [ pkg ] |
| |
| def _ReadStockLicense(self, stock_license_name, package): |
| '''Read and return stock license file specified in an ebuild.''' |
| |
| license_path = None |
| for directory in STOCK_LICENSE_DIRS: |
| path = '%s/%s' % (directory, stock_license_name) |
| if os.access(path, os.F_OK): |
| license_path = path |
| break |
| if license_path: |
| # Joy and pink ponies. Some stock licenses are encoded as latin1 while |
| # others are utf-8 and of course you can't know but only guess. |
| try: |
| license_txt = codecs.open(license_path, encoding="utf-8").read() |
| logging.info('%s: read license %s (UTF-8)', package.fullnamerev, |
| license_path) |
| except UnicodeDecodeError: |
| license_txt = codecs.open(license_path, encoding="latin1").read() |
| logging.info('%s: read license %s (latin1)', package.fullnamerev, |
| license_path) |
| return license_txt |
| else: |
| logging.error('%s: stock license %s could not be found in %s', |
| package.fullnamerev, stock_license_name, |
| '\n'.join(STOCK_LICENSE_DIRS)) |
| self.incomplete_packages += [ package.fullnamerev ] |
| return "" |
| |
| def _GeneratePackageLicenseText(self, package): |
| '''Concatenate all stock gentoo licenses and licenses read the package |
| source tree, if any.''' |
| |
| license_text = [] |
| for license_text_scanned in package.license_text_scanned: |
| license_text.append(license_text_scanned) |
| license_text.append("-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=" |
| "-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=\n") |
| |
| for stock_license_name in package.stock_license_names: |
| license_text.append("Gentoo Package Provided Stock License %s:" % |
| stock_license_name) |
| license_text.append(self._ReadStockLicense(stock_license_name, package)) |
| license_text.append("-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=" |
| "-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=\n") |
| |
| # This should get caught earlier, but one extra check. |
| if not license_text: |
| raise AssertionError('Ended up with no license_text') |
| |
| env = { |
| 'name': "%s-%s" % (package.name, package.version), |
| 'url': package.homepages[0] if package.homepages else '', |
| 'license': '\n'.join(license_text) or 'ERROR FINDING LICENSE(S)', |
| } |
| self.package_text[package] = EvaluateTemplate(self.entry_template, env) |
| |
| |
| def GenerateHTMLLicenseOutput(self, output_file, |
| output_template_file=TEMPLATE_FILE): |
| sorted_license_txt = [] |
| self.packages.sort(key=lambda x:(x.name, x.version, x.revision)) |
| for pkg in self.packages: |
| self._GeneratePackageLicenseText(pkg) |
| sorted_license_txt += [ self.package_text[pkg] ] |
| |
| file_template = codecs.open(output_template_file, mode='rb', |
| encoding="utf-8").read() |
| out_file = codecs.open(output_file, mode="w", encoding="utf-8") |
| out_file.write(EvaluateTemplate(file_template, |
| { 'entries': '\n'.join(sorted_license_txt)}, |
| escape=False)) |
| |
| |
| def ListInstalledPackages(board): |
| """Return a list of all packages installed for a particular board.""" |
| # FIXME(merlin): davidjames pointed out that this is |
| # not the right way to get the package list as it does not apply |
| # filters. This should change to ~/trunk/src/scripts/get_package_list |
| args = [EQUERY_BASE % board, 'list', '*'] |
| p = subprocess.Popen(args, stdout=subprocess.PIPE) |
| return [s.strip() for s in p.stdout.readlines()] |
| |
| |
| def EvaluateTemplate(template, env, escape=True): |
| """Expand a template with variables like {{foo}} using a |
| dictionary of expansions.""" |
| for key, val in env.iteritems(): |
| if escape: |
| val = cgi.escape(val) |
| template = template.replace('{{%s}}' % key, val) |
| return template |
| |
| |
| def usage(): |
| print >> sys.stderr, (__doc__) |
| sys.exit(1) |
| |
| |
| if __name__ == '__main__': |
| debug = False |
| testpkg = None |
| try: |
| opts, args = getopt.getopt(sys.argv[1:], "hdt:", |
| ["help", "debug", "testpkg="]) |
| except getopt.GetoptError: |
| usage() |
| for opt, arg in opts: |
| if opt in ("-h", "--help"): |
| usage() |
| elif opt in ("-d", "--debug"): |
| debug = True |
| elif opt in ("-t", "--testpkg"): |
| testpkg = arg |
| |
| if len(args) != 2: |
| usage() |
| board, output_file = args |
| |
| if debug: |
| logging.basicConfig(format='%(levelname)s:%(message)s', level=logging.DEBUG) |
| else: |
| logging.basicConfig(format='%(levelname)s:%(message)s', level=logging.INFO) |
| |
| # We have a hardcoded list of skipped packages for various reasons, but we |
| # also exclude any google platform package from needing a license since they |
| # are covered by the top license in the tree. |
| cmd = "cros_workon info --all --host | grep src/platform/ |"\ |
| "awk '{print $1}'" |
| p = subprocess.Popen(cmd, stdout=subprocess.PIPE, shell=True) |
| packages = p.communicate()[0].splitlines() |
| ret = p.wait() |
| if ret != 0: |
| raise AssertionError('%s exit code was not 0: got %s' % (cmd, ret)) |
| SKIPPED_PACKAGES += packages |
| |
| |
| # For temporary single package debugging (make sure to include trailing -ver): |
| if testpkg: |
| logging.info("Will only generate license for %s", testpkg) |
| packages = [ testpkg ] |
| else: |
| packages = ListInstalledPackages(board) |
| # If the caller forgets to set $board, it'll default to beaglebone, and return |
| # no packages. Catch this and give a hint that the wrong board was given. |
| if not packages: |
| raise AssertionError('FATAL: Could not get any packages for board %s' % |
| board) |
| logging.debug("Package list to work through:") |
| logging.debug('\n'.join(packages)) |
| logging.debug("Will skip these packages:") |
| logging.debug('\n'.join(SKIPPED_PACKAGES)) |
| |
| licensing = Licensing(packages) |
| licensing.ProcessPackages() |
| if not testpkg: |
| for extra_pkg in [ |
| ['x11-base', 'X.Org', '1.9.3', ['http://www.x.org/' ], [ 'X' ]], |
| ['sys-kernel', 'Linux', '2.6', [ 'http://www.kernel.org/' ], [ 'GPL-2' ]] |
| ]: |
| licensing.AddExtraPkg(extra_pkg) |
| licensing.GenerateHTMLLicenseOutput(output_file) |
| if licensing.incomplete_packages: |
| raise AssertionError(""" |
| DO NOT USE OUTPUT!!! |
| Some packages are missing due to errors, please look at errors generated during |
| this run. |
| List of packages with errors: |
| %s |
| """ % '\n'.join(licensing.incomplete_packages)) |
| |
| logging.info("Non copyright attribution license usage:") |
| for license_name in licensing.sorted_stock_licenses: |
| logging.info("%s: %s", license_name, |
| ", ".join(licensing.LicensedPackages(license_name))) |