| # Copyright 2022 Google LLC |
| # |
| # This program is free software; you can redistribute it and/or |
| # modify it under the terms of the GNU General Public License |
| # version 2 as published by the Free Software Foundation. |
| # |
| # This program is distributed in the hope that it will be useful, |
| # but WITHOUT ANY WARRANTY; without even the implied warranty of |
| # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| # GNU General Public License for more details. |
| # |
| |
| # This script is used to parse licenses of a package. |
| |
| import re |
| import os |
| from chromite.lib import cros_build_lib |
| from chromite.licensing import licenses_lib |
| from sbom_info_lib import license_data |
| |
| |
| LICENSE_MAP = { |
| "Artistic-2": "Artistic-2.0", |
| "BSD-2": "BSD-2-Clause", |
| "BSD-4": "BSD-4-Clause", |
| "BSD-Google": "BSD-3-Clause", |
| "BZIP2": "bzip2-1.0.6", |
| "Boost-1.0": "BSL-1.0", |
| "FDL-1.1": "GFDL-1.1-only", |
| "FDL-1.2": "GFDL-1.2-only", |
| "GPL-2": "GPL-2.0-only", |
| "GPL-2.0": "GPL-2.0-only", |
| "GPL-2+": "GPL-2.0-or-later", |
| "GPL-3": "GPL-3.0-only", |
| "GPL-3.0": "GPL-3.0-only", |
| "GPL-3+": "GPL-3.0-or-later", |
| "LGPL-2": "LGPL-2.0-only", |
| "LGPL-2.1": "LGPL-2.1-only", |
| "LGPL-2.1+": "LGPL-2.1-or-later", |
| "LGPL-3": "LGPL-3.0-only", |
| "LGPL-3+": "LGPL-3.0-or-later", |
| "PSF-2": "PSF-2.0", |
| "RSA": "RSA-MD", |
| "UoI-NCSA": "NCSA", |
| "ZLIB": "Zlib", |
| "ZPL": "ZPL-2.1", |
| "openssl": "OpenSSL", |
| "vim": "Vim", |
| "LICENSE.Cavium_Networks": "BSD-3-Clause", # Used by sys-apps/cavium-n3fips-tools in private board. |
| } |
| |
| |
| LICENSE_NAMES_REGEX = [ |
| r"^copyright$", |
| r"^copyright[.]txt$", |
| r"^copyright[.]regex$", # llvm |
| r"^copying.*$", |
| r"^licen[cs]e.*$", |
| r"^licensing.*$", # libatomic_ops |
| r"^ipa_font_license_agreement_v1[.]0[.]txt$", # ja-ipafonts |
| r"^MIT-LICENSE$", # rake |
| r"^PKG-INFO$", # copyright assignment for |
| # some python packages |
| # (netifaces, unittest2) |
| r"^NOTICE$", # mit-krb5 |
| ] |
| |
| LICENSE_REF = "LicenseRef-{}" |
| |
| |
| BSD_VERSION_IDENTIFIER = [ |
| "", |
| "Redistributions of source code must retain", |
| "Redistributions in binary form must reproduce", |
| "endorse or promote products derived", |
| "All advertising materials mentioning features", |
| ] |
| |
| REGEX_LICENSE_NAME = "- license_names\n - !!set \{(.*?)\}" |
| REGEX_LICENSE_TEXT = "- license_text_scanned\n - \[(.*?)\]\n- !!python/tuple" |
| REGEX_LICENSE_FILE_NAME = "Scanned Source License ([^\n]*?):" |
| |
| |
| SPECIAL_LICENSE_MAP = { |
| "lsof": license_data.OTHER_LICENSE_LSOF, |
| "netcat": license_data.OTHER_LICENSE_NETCAT, |
| } |
| |
| |
| class UnknownLicenseError(Exception): |
| def __init__(self, msg): |
| super().__init__(msg) |
| |
| |
| # Parse license.yaml. |
| def parse_license_yaml(yaml, pkg_name): |
| # Try to find scanned license text in license.yaml. |
| saved_scanned_txt = "" |
| saved_license_files = [] |
| scanned_txt_match = re.findall(REGEX_LICENSE_TEXT, yaml, re.DOTALL) |
| if scanned_txt_match: |
| for m in scanned_txt_match: |
| if not m.strip(): |
| continue |
| saved_scanned_txt = m.strip() |
| license_file_match = re.findall(REGEX_LICENSE_FILE_NAME, m, re.DOTALL) |
| for n in license_file_match: |
| saved_license_files.append(n.strip()) |
| |
| # Try to find scanned license names in license.yaml. |
| found = [] |
| license_match = re.findall(REGEX_LICENSE_NAME, yaml, re.DOTALL) |
| if license_match: |
| for m in license_match: |
| for part in m.replace("\n", " ").split(","): |
| license = part.split(":")[0].strip() |
| # Being in the public domain is not a license. |
| if ( |
| not license |
| or license == "public-domain" |
| or license == "metapackage" |
| ): |
| continue |
| if license in LICENSE_MAP: |
| license = LICENSE_MAP[license] |
| found.append(license) |
| # There are cases where license.yaml contains no license |
| # but only sanned license text e.g. dev-libs/libpcre. |
| if not found and saved_license_files: |
| found.append(pkg_name) |
| return found, saved_scanned_txt, saved_license_files |
| |
| |
| def extract_other_licenses(licenses, src_path, saved_scanned_txt, saved_license_files): |
| # other_license_list format: [ |
| # { |
| # "license_name": <license_name>, |
| # "file_names": [file_name], |
| # "license_txt": <license_txt> |
| # },{ |
| # ...... |
| # }] |
| other_license_list = [] |
| all_license_files = list_all_license_files(src_path) |
| for license in licenses: |
| if license not in license_data.SPDX_LICENSES: |
| license_file_names = [] |
| # Find license files same as license names |
| # e.g. LICENCE.ralink-firmware.txt. |
| license_txt, license_file_names = find_license_file( |
| src_path, all_license_files, license |
| ) |
| if not license_txt: |
| if not saved_scanned_txt: |
| # Find general license files e.g. COPYING. |
| ( |
| saved_scanned_txt, |
| saved_license_files, |
| ) = find_general_license_txt(src_path, all_license_files) |
| if not saved_scanned_txt: |
| raise UnknownLicenseError( |
| f"unknown license without scanned text: {license}" |
| ) |
| license_txt = saved_scanned_txt |
| license_file_names = saved_license_files |
| |
| # Mark licenses not listed by SPDX spec as license reference. |
| license_ref = LICENSE_REF.format(license) |
| other_license_list.append( |
| { |
| "license_name": license_ref, |
| "file_names": license_file_names, |
| "license_txt": license_txt, |
| } |
| ) |
| licenses[licenses.index(license)] = license_ref |
| # Identify BSD version. |
| if "BSD" in licenses: |
| if not saved_scanned_txt: |
| saved_scanned_txt, _ = find_general_license_txt(src_path) |
| if not saved_scanned_txt: |
| raise UnknownLicenseError( |
| f"no license file found, cannot identify BSD version" |
| ) |
| bsd_v = find_bsd_version(saved_scanned_txt) |
| licenses[licenses.index("BSD")] = bsd_v |
| # Identify OPENLDAP version. |
| if "OPENLDAP" in licenses: |
| # Currently, OPENLDAP is only used in mit-krb5. |
| if "krb5" in saved_scanned_txt: |
| licenses[licenses.index("OPENLDAP")] = "OLDAP-2.8" |
| else: |
| raise UnknownLicenseError(f"license OPENLDAP doesn't have a version") |
| # Handle special licenses if present. |
| for license, license_content in SPECIAL_LICENSE_MAP.items(): |
| if license in licenses: |
| license_ref = LICENSE_REF.format(license) |
| licenses[licenses.index(license)] = license_ref |
| other_license_list.append(license_content) |
| return other_license_list |
| |
| |
| def find_bsd_version(saved_scanned_txt): |
| i = len(BSD_VERSION_IDENTIFIER) - 1 |
| while i > 0: |
| if BSD_VERSION_IDENTIFIER[i] in saved_scanned_txt: |
| return f"BSD-{i}-Clause" |
| i -= 1 |
| raise UnknownLicenseError(f"failed to identify BSD version") |
| |
| |
| def list_all_license_files(src_path): |
| args = ["find", src_path, "-type", "f"] |
| result = cros_build_lib.run(args, stdout=True, encoding="utf-8") |
| # Truncate results to look like this: swig-2.0.4/COPYRIGHT |
| files = [x[len(src_path) :].lstrip("/") for x in result.stdout.splitlines()] |
| license_files = [] |
| for name in files: |
| if ".git/" in name: |
| continue |
| basename = os.path.basename(name) |
| # Looking for license.* brings up things like license.gpl, and we |
| # never want a GPL license when looking for copyright attribution, |
| # so we skip them here. We also skip regexes that can return |
| # license.py (seen in some code). |
| if re.search(r".*GPL.*", basename) or re.search(r"\.py$", basename): |
| continue |
| for regex in LICENSE_NAMES_REGEX: |
| if re.search(regex, basename, re.IGNORECASE): |
| license_files.append(name) |
| break |
| return license_files |
| |
| |
| # Find license files same as license names |
| # e.g. LICENCE.ralink-firmware.txt |
| # in sys-kernel/linux-firmware. |
| def find_license_file(src_path, license_files, license): |
| for name in license_files: |
| basename = os.path.basename(name) |
| if os.path.splitext(basename)[0] == license or basename == license: |
| license_path = os.path.join(src_path, name) |
| return licenses_lib.ReadUnknownEncodedFile( |
| license_path, "Adding License" |
| ), [name] |
| return "", [] |
| |
| |
| # Find general license files e.g. COPYING. |
| # Partially copy-pasted from chromite.licensing.licenses_lib._ExtractLicenses(). |
| def find_general_license_txt(src_path, license_files): |
| res = "" |
| for license_file in sorted(license_files): |
| # Joy and pink ponies. Some license_files are encoded as latin1 while |
| # others are utf-8 and of course you can't know but only guess. |
| license_path = os.path.join(src_path, license_file) |
| license_txt = licenses_lib.ReadUnknownEncodedFile( |
| license_path, "Adding License" |
| ) |
| res += "\n\nScanned Source License %s:\n\n%s" % (license_file, license_txt) |
| return res, license_files |
| |
| |
| def get_licenses(build_info_dir, src_path, pkg_name): |
| if not os.path.exists(os.path.join(build_info_dir, "license.yaml")): |
| return "" |
| with open(os.path.join(build_info_dir, "license.yaml"), "r") as l: |
| licenses, saved_scanned_txt, saved_license_files = parse_license_yaml( |
| l.read(), pkg_name |
| ) |
| |
| other_license_list = extract_other_licenses( |
| licenses, src_path, saved_scanned_txt, saved_license_files |
| ) |
| return " AND ".join(licenses), other_license_list |