| #!/usr/bin/env python3 |
| # Copyright 2023 The Chromium Authors. All rights reserved. |
| # Use of this source code is governed by a BSD-style license that can be |
| # found in the LICENSE file. |
| |
| import os |
| import re |
| import sys |
| from typing import Optional, List |
| from urllib.parse import urlparse, urlunparse |
| from itertools import filterfalse |
| |
| _THIS_DIR = os.path.abspath(os.path.dirname(__file__)) |
| # The repo's root directory. |
| _ROOT_DIR = os.path.abspath(os.path.join(_THIS_DIR, "..", "..", "..")) |
| |
| # Add the repo's root directory for clearer imports. |
| sys.path.insert(0, _ROOT_DIR) |
| |
| import metadata.fields.field_types as field_types |
| import metadata.fields.util as util |
| import metadata.validation_result as vr |
| |
| _PATTERN_URL_CANONICAL_REPO = re.compile( |
| r"^This is the canonical (public )?repo(sitory)?\.?$", re.IGNORECASE) |
| |
| _SUPPORTED_SCHEMES = { |
| 'http', |
| 'https', |
| 'git', |
| 'ftp', |
| } |
| |
| # URLs can't contain whitespaces. Treat them as delimiters so we can handle cases where URL field contains one URL per line (without comma delimiter). |
| _PATTERN_URL_DELIMITER = re.compile("{}|{}".format( |
| r'\s+', field_types.MetadataField.VALUE_DELIMITER)) |
| |
| |
| def _split_urls(value: str) -> List[str]: |
| """Split url field value into individual URLs.""" |
| urls = _PATTERN_URL_DELIMITER.split(value) |
| return list(filter(lambda x: len(x) > 0, map(str.strip, urls))) |
| |
| |
| def _url_canonicalize(url: str) -> str: |
| """Return the canonicalized URL (e.g. make scheme lower case).""" |
| return urlunparse(urlparse(url)) |
| |
| |
| def _url_is_canonical(url: str) -> bool: |
| return url == _url_canonicalize(url) |
| |
| |
| def _url_is_valid(url: str) -> bool: |
| """Checks whether the given `url` is acceptable: |
| * url is can be parsed without an error. |
| * url uses a supported scheme / protocol. |
| """ |
| try: |
| u = urlparse(url) |
| except: |
| return False |
| |
| if u.scheme not in _SUPPORTED_SCHEMES: |
| return False |
| |
| return True |
| |
| class URLField(field_types.MetadataField): |
| """Custom field for the package URL(s).""" |
| def __init__(self): |
| super().__init__(name="URL") |
| |
| def repo_is_canonical(self, value: str): |
| """Returns if `raw_value` indicates this repository is the canonical repository.""" |
| return util.matches(_PATTERN_URL_CANONICAL_REPO, value.strip()) |
| |
| def validate(self, value: str) -> Optional[vr.ValidationResult]: |
| """Checks the given value has acceptable URL values only. |
| |
| Note: this field supports multiple values. |
| """ |
| if self.repo_is_canonical(value): |
| return None |
| |
| urls = _split_urls(value) |
| if not urls: |
| return vr.ValidationError(reason=f"{self._name} must be provided.") |
| |
| invalid_values = list(filterfalse(_url_is_valid, urls)) |
| |
| if invalid_values: |
| return vr.ValidationError( |
| reason=f"{self._name} is invalid.", |
| additional=[ |
| "URLs must use a protocol scheme in " |
| "[http, https, ftp, git].", |
| f"Separate URLs using a '{self.VALUE_DELIMITER}'.", |
| f"Invalid values: {util.quoted(invalid_values)}.", |
| ]) |
| |
| non_canon_values = list(filterfalse(_url_is_canonical, urls)) |
| if non_canon_values: |
| canon_values = list(map(_url_canonicalize, non_canon_values)) |
| return vr.ValidationWarning( |
| reason=f"{self._name} is contains non-canonical URLs.", |
| additional=[ |
| "URLs should be canonical and well-formed." |
| f"Non canonical values: {util.quoted(non_canon_values)}.", |
| f"Canonicalized URLs should be: {util.quoted(canon_values)}." |
| ]) |
| |
| return None |
| |
| def narrow_type(self, value) -> Optional[List[str]]: |
| if not value: |
| return None |
| |
| if self.repo_is_canonical(value): |
| return None |
| |
| # Filter out invalid URLs, and canonicalize the URLs. |
| return list( |
| map(_url_canonicalize, filter(_url_is_valid, _split_urls(value)))) |