blob: d430a0a486df6bbb6ba223cc8d7be432a1f0a9d9 [file] [log] [blame]
# Copyright 2022 Google LLC
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# version 2 as published by the Free Software Foundation.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# This script is used to parse licenses of a package.
import re
import os
from chromite.lib import cros_build_lib
from chromite.licensing import licenses_lib
from sbom_info_lib import license_data
LICENSE_MAP = {
"Artistic-2": "Artistic-2.0",
"BSD-2": "BSD-2-Clause",
"BSD-4": "BSD-4-Clause",
"BSD-Google": "BSD-3-Clause",
"BZIP2": "bzip2-1.0.6",
"Boost-1.0": "BSL-1.0",
"FDL-1.1": "GFDL-1.1-only",
"FDL-1.2": "GFDL-1.2-only",
"GPL-2": "GPL-2.0-only",
"GPL-2.0": "GPL-2.0-only",
"GPL-2+": "GPL-2.0-or-later",
"GPL-3": "GPL-3.0-only",
"GPL-3.0": "GPL-3.0-only",
"GPL-3+": "GPL-3.0-or-later",
"LGPL-2": "LGPL-2.0-only",
"LGPL-2.1": "LGPL-2.1-only",
"LGPL-2.1+": "LGPL-2.1-or-later",
"LGPL-3": "LGPL-3.0-only",
"LGPL-3+": "LGPL-3.0-or-later",
"PSF-2": "PSF-2.0",
"RSA": "RSA-MD",
"UoI-NCSA": "NCSA",
"ZLIB": "Zlib",
"ZPL": "ZPL-2.1",
"openssl": "OpenSSL",
"vim": "Vim",
"LICENSE.Cavium_Networks": "BSD-3-Clause", # Used by sys-apps/cavium-n3fips-tools in private board.
}
LICENSE_NAMES_REGEX = [
r"^copyright$",
r"^copyright[.]txt$",
r"^copyright[.]regex$", # llvm
r"^copying.*$",
r"^licen[cs]e.*$",
r"^licensing.*$", # libatomic_ops
r"^ipa_font_license_agreement_v1[.]0[.]txt$", # ja-ipafonts
r"^MIT-LICENSE$", # rake
r"^PKG-INFO$", # copyright assignment for
# some python packages
# (netifaces, unittest2)
r"^NOTICE$", # mit-krb5
]
LICENSE_REF = "LicenseRef-{}"
BSD_VERSION_IDENTIFIER = [
"",
"Redistributions of source code must retain",
"Redistributions in binary form must reproduce",
"endorse or promote products derived",
"All advertising materials mentioning features",
]
REGEX_LICENSE_NAME = "- license_names\n - !!set \{(.*?)\}"
REGEX_LICENSE_TEXT = "- license_text_scanned\n - \[(.*?)\]\n- !!python/tuple"
REGEX_LICENSE_FILE_NAME = "Scanned Source License ([^\n]*?):"
SPECIAL_LICENSE_MAP = {
"lsof": license_data.OTHER_LICENSE_LSOF,
"netcat": license_data.OTHER_LICENSE_NETCAT,
}
class UnknownLicenseError(Exception):
def __init__(self, msg):
super().__init__(msg)
# Parse license.yaml.
def parse_license_yaml(yaml, pkg_name):
# Try to find scanned license text in license.yaml.
saved_scanned_txt = ""
saved_license_files = []
scanned_txt_match = re.findall(REGEX_LICENSE_TEXT, yaml, re.DOTALL)
if scanned_txt_match:
for m in scanned_txt_match:
if not m.strip():
continue
saved_scanned_txt = m.strip()
license_file_match = re.findall(REGEX_LICENSE_FILE_NAME, m, re.DOTALL)
for n in license_file_match:
saved_license_files.append(n.strip())
# Try to find scanned license names in license.yaml.
found = []
license_match = re.findall(REGEX_LICENSE_NAME, yaml, re.DOTALL)
if license_match:
for m in license_match:
for part in m.replace("\n", " ").split(","):
license = part.split(":")[0].strip()
# Being in the public domain is not a license.
if (
not license
or license == "public-domain"
or license == "metapackage"
):
continue
if license in LICENSE_MAP:
license = LICENSE_MAP[license]
found.append(license)
# There are cases where license.yaml contains no license
# but only sanned license text e.g. dev-libs/libpcre.
if not found and saved_license_files:
found.append(pkg_name)
return found, saved_scanned_txt, saved_license_files
def extract_other_licenses(licenses, src_path, saved_scanned_txt, saved_license_files):
# other_license_list format: [
# {
# "license_name": <license_name>,
# "file_names": [file_name],
# "license_txt": <license_txt>
# },{
# ......
# }]
other_license_list = []
all_license_files = list_all_license_files(src_path)
for license in licenses:
if license not in license_data.SPDX_LICENSES:
license_file_names = []
# Find license files same as license names
# e.g. LICENCE.ralink-firmware.txt.
license_txt, license_file_names = find_license_file(
src_path, all_license_files, license
)
if not license_txt:
if not saved_scanned_txt:
# Find general license files e.g. COPYING.
(
saved_scanned_txt,
saved_license_files,
) = find_general_license_txt(src_path, all_license_files)
if not saved_scanned_txt:
raise UnknownLicenseError(
f"unknown license without scanned text: {license}"
)
license_txt = saved_scanned_txt
license_file_names = saved_license_files
# Mark licenses not listed by SPDX spec as license reference.
license_ref = LICENSE_REF.format(license)
other_license_list.append(
{
"license_name": license_ref,
"file_names": license_file_names,
"license_txt": license_txt,
}
)
licenses[licenses.index(license)] = license_ref
# Identify BSD version.
if "BSD" in licenses:
if not saved_scanned_txt:
saved_scanned_txt, _ = find_general_license_txt(src_path)
if not saved_scanned_txt:
raise UnknownLicenseError(
f"no license file found, cannot identify BSD version"
)
bsd_v = find_bsd_version(saved_scanned_txt)
licenses[licenses.index("BSD")] = bsd_v
# Identify OPENLDAP version.
if "OPENLDAP" in licenses:
# Currently, OPENLDAP is only used in mit-krb5.
if "krb5" in saved_scanned_txt:
licenses[licenses.index("OPENLDAP")] = "OLDAP-2.8"
else:
raise UnknownLicenseError(f"license OPENLDAP doesn't have a version")
# Handle special licenses if present.
for license, license_content in SPECIAL_LICENSE_MAP.items():
if license in licenses:
license_ref = LICENSE_REF.format(license)
licenses[licenses.index(license)] = license_ref
other_license_list.append(license_content)
return other_license_list
def find_bsd_version(saved_scanned_txt):
i = len(BSD_VERSION_IDENTIFIER) - 1
while i > 0:
if BSD_VERSION_IDENTIFIER[i] in saved_scanned_txt:
return f"BSD-{i}-Clause"
i -= 1
raise UnknownLicenseError(f"failed to identify BSD version")
def list_all_license_files(src_path):
args = ["find", src_path, "-type", "f"]
result = cros_build_lib.run(args, stdout=True, encoding="utf-8")
# Truncate results to look like this: swig-2.0.4/COPYRIGHT
files = [x[len(src_path) :].lstrip("/") for x in result.stdout.splitlines()]
license_files = []
for name in files:
if ".git/" in name:
continue
basename = os.path.basename(name)
# Looking for license.* brings up things like license.gpl, and we
# never want a GPL license when looking for copyright attribution,
# so we skip them here. We also skip regexes that can return
# license.py (seen in some code).
if re.search(r".*GPL.*", basename) or re.search(r"\.py$", basename):
continue
for regex in LICENSE_NAMES_REGEX:
if re.search(regex, basename, re.IGNORECASE):
license_files.append(name)
break
return license_files
# Find license files same as license names
# e.g. LICENCE.ralink-firmware.txt
# in sys-kernel/linux-firmware.
def find_license_file(src_path, license_files, license):
for name in license_files:
basename = os.path.basename(name)
if os.path.splitext(basename)[0] == license or basename == license:
license_path = os.path.join(src_path, name)
return licenses_lib.ReadUnknownEncodedFile(
license_path, "Adding License"
), [name]
return "", []
# Find general license files e.g. COPYING.
# Partially copy-pasted from chromite.licensing.licenses_lib._ExtractLicenses().
def find_general_license_txt(src_path, license_files):
res = ""
for license_file in sorted(license_files):
# Joy and pink ponies. Some license_files are encoded as latin1 while
# others are utf-8 and of course you can't know but only guess.
license_path = os.path.join(src_path, license_file)
license_txt = licenses_lib.ReadUnknownEncodedFile(
license_path, "Adding License"
)
res += "\n\nScanned Source License %s:\n\n%s" % (license_file, license_txt)
return res, license_files
def get_licenses(build_info_dir, src_path, pkg_name):
if not os.path.exists(os.path.join(build_info_dir, "license.yaml")):
return ""
with open(os.path.join(build_info_dir, "license.yaml"), "r") as l:
licenses, saved_scanned_txt, saved_license_files = parse_license_yaml(
l.read(), pkg_name
)
other_license_list = extract_other_licenses(
licenses, src_path, saved_scanned_txt, saved_license_files
)
return " AND ".join(licenses), other_license_list