# -*- coding: utf-8 -*-
# Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
"""Compares the packages between 2 images by parsing the license file output."""
from __future__ import print_function
import re
from chromite.lib import commandline
def GetPackagesLicensesFromHtml(html_file):
"""Get the list of packages and licenses in a ChromeOS license file.
html_file: which html license file to scan for packages.
tuple of dictionary of packages and version numbers and set of licenses.
AssertionError: if regex failed.
packages = {}
licenses = set()
pkg_rgx = re.compile(r'<span class="title">(.+)-(.+)</span>')
# Do not add <pre> in the regex or it would only show the first entry on
# a package that has multiple hits.
license_rgx1 = re.compile(r'Scanned (Source License .+):', re.IGNORECASE)
license_rgx2 = re.compile(r'(Custom License .+):', re.IGNORECASE)
license_rgx3 = re.compile(r'(Copyright Attribution .+):', re.IGNORECASE)
# This regex isn't as tight because it has to match these:
# Gentoo Package Stock License BZIP2:
# <a ... class="title">Gentoo Package Provided Stock License public-domain</a>
# <a ... class="title">Gentoo Package Stock License public-domain</a>
license_rgx4 = re.compile(r'(Stock License [^<:]+)', re.IGNORECASE)
license_rgx5 = re.compile(r'class="title">(Custom License .+)</a>',
with open(html_file, 'r') as f:
for line in f:
# Grep and turn
# <span class="title">ath6k-34</span>
# into
# ath6k 34
match =
if match:
packages[] =
match =
if match:
# Turn Source license simplejson-2.5.0/LICENSE.txt
# into Source license simplejson/LICENSE.txt
# (we don't want to create diffs based on version numbers)
lic = re.sub(r'(.+)-([^/]+)/(.+)', r'\1/\3',
# Old files had this lowercased.
lic = re.sub(r'Source license', r'Source License', lic)
for rgx in (license_rgx2, license_rgx3, license_rgx4, license_rgx5):
match =
if match:
return (packages, licenses)
def ComparePkgLists(pkg_list1, pkg_list2):
"""Compare the package list in 2 dictionaries and output the differences.
pkg_list1: dict from GetPackagesLicensesFromHtml.
pkg_list2: dict from GetPackagesLicensesFromHtml.
N/A (outputs result on stdout).
for removed_package in sorted(set(pkg_list1) - set(pkg_list2)):
print('Package removed: %s-%s' % (
removed_package, pkg_list1[removed_package]))
for added_package in sorted(set(pkg_list2) - set(pkg_list1)):
print('Package added: %s-%s' % (
added_package, pkg_list2[added_package]))
for changed_package in sorted(set(pkg_list1) & set(pkg_list2)):
ver1 = pkg_list1[changed_package]
ver2 = pkg_list2[changed_package]
if ver1 != ver2:
print('Package updated: %s from %s to %s' % (changed_package, ver1, ver2))
def CompareLicenseSets(set1, set2):
"""Compare the license list in 2 sets and output the differences.
set1: set from GetPackagesLicensesFromHtml.
set2: set from GetPackagesLicensesFromHtml.
N/A (outputs result on stdout).
for removed_license in sorted(set1 - set2):
print('License removed: %s' % (removed_license))
for added_license in sorted(set2 - set1):
print('License added: %s' % (added_license))
def main(args):
parser = commandline.ArgumentParser(usage=__doc__)
parser.add_argument('html1', metavar='license1.html', type='path',
help='old html file')
parser.add_argument('html2', metavar='license2.html', type='path',
help='new html file')
opts = parser.parse_args(args)
pkg_list1 = GetPackagesLicensesFromHtml(opts.html1)
pkg_list2 = GetPackagesLicensesFromHtml(opts.html2)
ComparePkgLists(pkg_list1[0], pkg_list2[0])
CompareLicenseSets(pkg_list1[1], pkg_list2[1])