# Copyright 2022 Google LLC
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# version 2 as published by the Free Software Foundation.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#

# get_download_url() in this script is used to
# find download location for a COS package.

import subprocess
import re
import os
import requests


CROS_GCS_MIRRORS = [
    "gs://chromeos-mirror/gentoo/distfiles/",
    "gs://chromeos-localmirror/distfiles/",
]
GCS_FILE_NOT_FOUND = "One or more URLs matched no objects"
# An allow-list for variables parsed in an ebuild file.
EBUILD_VARS = {
    "MY_P",
    "MY_PV",
    "MY_PN",
    "PARCH",
    "SRC_PV",
    "code_ver",
    "RE2_VER",
    "MODULE_VERSION",
    "GIT_COMMIT",
    "SRC_URI",
    "EGIT_REPO_URI",
    "EGIT_COMMIT",
    "CROS_WORKON_COMMIT",
    "CROS_WORKON_PROJECT",
    "CROS_WORKON_SUBTREE",
    "HOMEPAGE",
    "CROS_GO_SOURCE",
    "GN_X64_SHA1",
    "LLVM_HASH",
    "CROS_WORKON_REPO",
    "GNOME_ORG_MODULE",
    "GRUB2_COMMIT",
    "MY_CODE_VER",
    "MY_DATA_VER",
}
# For packages whose package names are hard to parse or not defined in ebuilds.
PN_REPLACE_DICT = {
    "Locale-gettext": lambda x: "gettext",
    "systemd": lambda x: "systemd-stable" if "." in x else "systemd",
    "perf": lambda x: "patch",
    "gcc-libs": lambda x: "gcc",
}
SRC_URI_VARS = ["SRC_URI", "EGIT_REPO_URI"]
COMMIT_VARS = ["GIT_COMMIT", "EGIT_COMMIT", "LLVM_HASH"]
# REGEX_STRING_VAR finds `var_name=var_value` and `var_name="var_value"` (no new line) in ebuilds.
REGEX_STRING_VAR = '([^\n]*?)="?([^\n]*?)"?\n'
# REGEX_ARRAY_VAR finds `var_name=("var_value1" "var_value2" ...)` (allow new lines) in ebuilds.
REGEX_ARRAY_VAR = "([^\n]*?)=(\(.*?\))"
# REGEX_SRC_URI finds `SRC_URI="uri1 uri2 ..."` (allow new lines) in ebuilds.
REGEX_SRC_URI = 'SRC_URI="(.*?)"'
# REGEX_SRC_URI_PLUS finds `SRC_URI+="uri1 uri2 ..."` (allow new lines) in ebuilds.
REGEX_SRC_URI_PLUS = 'SRC_URI\+="(.*?)"'
# REGEX_PKG_REVISION finds package revision like `-r12` in package full name.
REGEX_PKG_REVISION = "-r[0-9]+$"
# REGEX_PKG_REVISION finds package version like `-1` or `-1.2.3.4` in package full name.
REGEX_PKG_VERSION = "-[0-9]+(\.[0-9]*)*"
# REGEX_FIND_STRING finds string inside double quotes like "string1".
REGEX_FIND_STRING = '"(.*?)"'
# REGEX_EBUILD_REPLACE finds ebuild replacement string `(ver_rs 1- some_string)`.
REGEX_EBUILD_REPLACE = "\$\(ver_rs 1- (.*?)\)"
REGEX_INLINE_COMMENT = "( #[^\n]*?)\n"
GNOME_PN = "GNOME_ORG_MODULE"
GO_SOURCE = "CROS_GO_SOURCE"
CROS_REPO = "CROS_WORKON_REPO"
CROS_COMMIT = "CROS_WORKON_COMMIT"
LLVM_COMMIT = "LLVM_HASH"
CROS_PROJECT = "CROS_WORKON_PROJECT"
CROS_SUBTREE = "CROS_WORKON_SUBTREE"
CROS_GIT_HOST_URL = "https://chromium.googlesource.com"
CROS_GIT_AOSP_URL = "https://android.googlesource.com"
CROS_HOMEPAGE = "HOMEPAGE"
GOB_REPO_DICT = {
    "project-lakitu": "https://cos.googlesource.com/cos/overlays/board-overlays/+/master/project-lakitu/",
    "chromiumos": "https://cos.googlesource.com/third_party/overlays/chromiumos-overlay/+/master/",
    "portage-stable": "https://cos.googlesource.com/third_party/overlays/portage-stable/+/master/",
    "eclass-overlay": "https://cos.googlesource.com/third_party/overlays/eclass-overlay/+/master/",
}
# Packages that use `MODULE_VERSION` as package version.
KEYWORDS_FOR_MODULE_VERSION = ["dev-perl", "perl-core"]
PACKAGES_FROM_GOB = {
    # portage-stable
    "dev-util/meson-format-array",
    "sys-devel/autoconf-wrapper",
    "sys-devel/automake-wrapper",
    "dev-python/namespace-zope",
    "chromeos-base/chromeos-bsp-test-root-lakitu",
    "dev-python/namespace-jaraco",
    "dev-python/namespace-google",
    # project-lakitu
    "app-admin/cgroup-helper",
    "app-admin/extensions-manager",
    "app-admin/kdump-helper",
    "app-admin/stackdriver",
    "app-admin/toolbox-config",
    "app-emulation/cloud-init-config",
    "chromeos-base/chromeos-auth-config-lakitu",
    "chromeos-base/chromeos-base",
    "chromeos-base/chromeos-bsp-lakitu-common",
    "chromeos-base/chromeos-firewall-init-lakitu",
    "chromeos-base/chromeos-init-systemd",
    "chromeos-base/chromeos-init-systemd-dev",
    "chromeos-base/cloud-audit-config",
    "chromeos-base/cloud-filesystem-init",
    "chromeos-base/cloud-network-init",
    "dev-util/cos-dev-tools",
    "net-misc/chrony-config",
    "sys-apps/loadpin-trigger",
    "sys-apps/system-sysdaemons",
    "sys-libs/lakitu-custom-locales",
    "sys-boot/platform-key",
    # chromiumos-overlay
    "chromeos-base/chromeos-ca-certificates",
    "chromeos-base/chromeos-sshd-init",
    "chromeos-base/tty",
    "chromeos-base/update-policy-embedded",
    "dev-util/glib-utils",
    "chromeos-base/openssh-server-init",
    "chromeos-base/autotest-all",
    "chromeos-base/autotest-client",
    "chromeos-base/chromeos-ssh-testkeys",
}


def is_uri_valid(uri):
    if not uri.strip().startswith("http"):
        return False
    try:
        request = requests.get(uri, stream=True)
    except:
        return False
    if request.status_code == 200:
        return True
    return False


def parse_var(s):
    # avoid downloading packages.
    parts = s.split("->")
    if len(parts) > 1:
        s = parts[0]
    # do not evaluate commands.
    if s.startswith("("):
        s = f"'{s}'"
    cmd = f"echo {s}"
    res = subprocess.run(
        ["bash", "-c", cmd], stdout=subprocess.PIPE, stderr=subprocess.PIPE
    )
    if res.stderr:
        return ""
    return res.stdout.decode("utf-8").rstrip()


# Parse an environment variable and return a list.
def parse_var_from_env(key):
    val = os.getenv(key)
    if not val:
        return []
    if val.startswith("("):
        res = []
        match = re.findall(REGEX_FIND_STRING, val, re.DOTALL)
        # in some cases, go src version cannot be parsed in array
        # e.g. chromiumos-overlay/dev-go/protobuf
        for m in match:
            res.append(parse_var(m))
        return res
    return [val]


def find_var_and_set_env(regex, content):
    env_set = set()
    match = re.findall(regex, content, re.DOTALL)
    for m in match:
        key = m[0].strip()
        if key not in EBUILD_VARS:
            continue
        val = parse_var(m[1]).strip()
        if val:
            os.environ[key] = val
            env_set.add(key)
    return env_set


def parse_vars_in_ebuild(content):
    env_set = set()
    # Replace ebuild replacement gramma with bash format.
    match = re.findall(REGEX_EBUILD_REPLACE, content, re.DOTALL)
    if match:
        for m in match:
            content = content.replace(f"$(ver_rs 1- {m})", f"${{PV//./{m}}}")
    env_set.update(find_var_and_set_env(REGEX_STRING_VAR, content))
    env_set.update(find_var_and_set_env(REGEX_ARRAY_VAR, content))
    return env_set


def parse_pkg_name(pf):
    match = re.search(REGEX_PKG_REVISION, pf)
    if match:
        p = pf[: match.start()]
    else:
        p = pf
    match = re.search(REGEX_PKG_VERSION, p)
    pn = p[: match.start()]
    p_name = pn
    pv = p[match.start() + 1 :]
    if pn in PN_REPLACE_DICT:
        pn = PN_REPLACE_DICT[pn](pv)
        p = f"{pn}-{pv}"
    os.environ["PN"] = pn
    os.environ["PV"] = pv
    os.environ["P"] = p
    # possbile package names in CROS GCS mirror buckets.
    return p_name, {f"{p}.tar.gz", f"{p}.tar.xz", f"{p}.tgz", f"{p}.xz"}


def search_pkg_from_gob(repository, category, p_name, license):
    pkg = f"{category}/{p_name}"
    if (
        pkg in PACKAGES_FROM_GOB
        or category == "virtual"
        or repository == "eclass-overlay"
        or license == "metapackage"
    ):
        if repository not in GOB_REPO_DICT:
            return ""
        uri = os.path.join(GOB_REPO_DICT[repository], pkg)
        if is_uri_valid(uri):
            return uri
        return ""
    return ""


def find_cros_uri():
    res = []
    cros_repo = parse_var_from_env(CROS_REPO)
    cros_proj = parse_var_from_env(CROS_PROJECT)
    cros_subtree = parse_var_from_env(CROS_SUBTREE)
    cros_commit = parse_var_from_env(CROS_COMMIT)
    if not cros_repo:
        cros_repo = [CROS_GIT_HOST_URL] * len(cros_proj)
    for i in range(len(cros_proj)):
        uri = os.path.join(cros_repo[i], cros_proj[i])
        if not is_uri_valid(uri):
            continue
        if not cros_commit:
            res.append(uri)
        elif cros_subtree and cros_subtree[i]:
            subtrees = cros_subtree[i].split(" ")
            for subtree in subtrees:
                res.append(f"{uri}@{cros_commit[i]}#{subtree}")
        else:
            res.append(f"{uri}@{cros_commit[i]}")
    return ",".join(res)


def get_gcs_name_from_src_uri(regex, content):
    gcs_names = set()
    match = re.findall(regex, content, re.DOTALL)
    if match:
        for src_uri_group in match:
            for uri_line in src_uri_group.split("\n"):
                for uri in uri_line.split(" "):
                    if uri == "->":
                        continue
                    gcs_names.add(os.path.basename(parse_var(uri)))
    return gcs_names


# Parse ebuild and set environment variables.
# Find possible CROS gcs mirror package names,
# and cros download url.
def parse_ebuild(ebuild):
    gcs_names = set()
    with open(ebuild) as eb:
        content = eb.read()
        # remove inline comments.
        match = re.findall(REGEX_INLINE_COMMENT, content, re.DOTALL)
        for m in match:
            content = content.replace(m, "")
        env_set = parse_vars_in_ebuild(content)
        cros_uri = find_cros_uri()
        for keyword in KEYWORDS_FOR_MODULE_VERSION:
            if keyword in ebuild:
                gcs_names.add(f'{os.getenv("PN")}-{os.getenv("MODULE_VERSION")}.tar.gz')
                break
        gnome_pn = os.getenv(GNOME_PN)
        if gnome_pn:
            gcs_names.add(f'{gnome_pn}-{os.getenv("PV")}.tar.xz')
        gcs_names_src = get_gcs_name_from_src_uri(REGEX_SRC_URI, content)
        if gcs_names_src:
            gcs_names.update(gcs_names_src)
        else:
            gcs_names.update(get_gcs_name_from_src_uri(REGEX_SRC_URI_PLUS, content))
        return env_set, cros_uri, gcs_names


def search_mirror_gcs(gcs_names):
    for name in gcs_names:
        name = name.replace("?", "%3f")
        for bucket in CROS_GCS_MIRRORS:
            link = os.path.join(bucket, name)
            res = subprocess.run(
                ["gsutil", "ls", link], stdout=subprocess.PIPE, stderr=subprocess.PIPE
            )
            if res.stderr and GCS_FILE_NOT_FOUND in res.stderr.decode("utf-8"):
                continue
            else:
                return res.stdout.decode("utf-8").rstrip()
    return ""


def search_src_uri():
    for uri_name in SRC_URI_VARS:
        uri = os.getenv(uri_name)
        if uri and is_uri_valid(uri):
            for commit_name in COMMIT_VARS:
                commit = os.getenv(commit_name)
                if commit:
                    return f"{uri}@{commit}"
            return uri
    return ""


def search_go_source():
    res = []
    go_src = parse_var_from_env(GO_SOURCE)
    for src in go_src:
        parts = src.split(" ")
        if len(parts) == 2:
            version = parts[1]
            sources = parts[0].split(":")
            for uri in sources:
                uri = "https://" + uri
                if is_uri_valid(uri):
                    res.append(f"{uri}@{version}")
                    break
    return ",".join(res)


def search_homepage():
    homepage = os.getenv(CROS_HOMEPAGE)
    if homepage and "chromium.googlesource.com" in homepage and is_uri_valid(homepage):
        commit = os.getenv(CROS_COMMIT)
        if commit:
            return f"{homepage}@{commit}"
        return homepage
    # Special case for sys-libs/libcxxabi.
    if homepage == "http://libcxxabi.llvm.org/":
        commit = os.getenv(LLVM_COMMIT)
        return f"https://chromium.googlesource.com/external/github.com/llvm/llvm-project@{commit}"
    return ""


def search_download_location(gcs_names, cros_uri):
    res = search_mirror_gcs(gcs_names)
    if res:
        return res
    res = search_src_uri()
    if res:
        return res
    if cros_uri:
        return cros_uri
    res = search_go_source()
    if res:
        return res
    res = search_homepage()
    if res:
        return res
    return ""


def unset_env(env_set):
    for var in env_set:
        os.environ[var] = ""


def get_download_url(ebuild, repository, category, pf, license):
    if repository == "private-overlays":
        return ""
    os.environ["CROS_GIT_HOST_URL"] = CROS_GIT_HOST_URL
    os.environ["CROS_GIT_AOSP_URL"] = CROS_GIT_AOSP_URL
    p_name, gcs_names = parse_pkg_name(pf)
    gob_res = search_pkg_from_gob(repository, category, p_name, license)
    if gob_res:
        return gob_res
    env_set, cros_uri, gcs_names_ebuild = parse_ebuild(ebuild)
    gcs_names.update(gcs_names_ebuild)
    gcs_names.discard("")
    res = search_download_location(gcs_names, cros_uri)
    unset_env(env_set)
    return res
