| #!/usr/bin/env python3 |
| # |
| # Copyright 2024 The ChromiumOS Authors |
| # Use of this source code is governed by a BSD-style license that can be |
| # found in the LICENSE file. |
| |
| # This script can be used to find commits which are present in the COS kernel |
| # branch, but not the upstream kernel branch. |
| # The script takes the new kernel version and previous kernel version as |
| # arguments and does the following: |
| # |
| # 1. Gets all of the commits which are specific to COS by listing the commits |
| # which are in the `cos-$PREVIOUS_KERNEL_VERSION` branch but not in the |
| # `upstream-$KERNEL_VERSION` branch. |
| # 2. For each commit: |
| # 1. Checks if the commit is present upstream by: |
| # 1. Checking if a commit with the exact same contents is present |
| # upstream. |
| # 2. Checking if the there are upstream commit hashes in the commit |
| # message. |
| # 3. Checking for an upstream commit with the exact same subject line. |
| # 2. Outputs the commit hash, the upstream commit hash (if present), |
| # whether an exact content match was found, and the commit subject. |
| # |
| # The output will be a tab-separated table in commit order. |
| # |
| # Usage: |
| # If you were upgrading from kernel version 6.1 to 6.6, you could run this |
| # script as follows: |
| # |
| # python3 get_cos_specific_kernel_commits.py -p 6.1 -n 6.6 > commits.tsv |
| |
| import argparse |
| import dataclasses |
| import logging |
| import re |
| from subprocess import PIPE, Popen, run |
| import sys |
| from typing import Mapping, Optional |
| |
| COMMIT_SUBJECT_RE = re.compile(r'(.*?) (.*)') |
| UPSTREAM_COMMIT_PATTERN_1 = re.compile('Upstream commit.*?([a-f0-9]+)') |
| UPSTREAM_COMMIT_PATTERN_2 = re.compile('UPSTREAM\(([a-f0-9]+)\)') |
| |
| |
| @dataclasses.dataclass |
| class Commit: |
| """Class representing information about a git commit.""" |
| |
| _hash: str |
| subject: str |
| message: str |
| has_exact_content_match: bool |
| |
| def get_upstream_commit_hashes( |
| self, |
| subject_to_commit_map: Mapping[str, list[str]], |
| ) -> list[str]: |
| """Gets the hashes of any upstream commits which "match" this one. |
| |
| A commit is determined to "match" this one if it is referred to as an |
| upstream commit in the body of this commit, or if it has the same subject |
| as this commit. |
| |
| Args: |
| subject_to_commit_map: A mapping from commit subjects to a list of hashes |
| for commits with that subject. |
| |
| Returns: |
| A list of all commit hashes which match this one. |
| """ |
| logging.info( |
| f'Getting upstream hashes for commit {self._hash} ({self.subject})' |
| ) |
| if (m := UPSTREAM_COMMIT_PATTERN_1.search(self.message)) is not None: |
| logging.info('Matched upstream pattern 1') |
| return [m.group(1)] |
| elif (m := UPSTREAM_COMMIT_PATTERN_2.search(self.message)) is not None: |
| logging.info('Matched upstream pattern 2') |
| return [m.group(1)] |
| elif subject_to_commit_map: |
| logging.info(f'Checking subject: {self.subject}') |
| return subject_to_commit_map.get(self.subject, []) |
| else: |
| return [] |
| |
| |
| def get_cos_specific_commits( |
| previous_kernel_version: str, new_kernel_version: str, origin: str |
| ) -> list[Commit]: |
| """Gets the commits which are in the previous COS kernel but not the new kernel. |
| |
| For example, this would return all of the commits which were added to |
| cos/cos-6.1 starting from cos/upstream-6.1, but which have not been added to |
| cos/upstream-6.6: |
| get_cos_specific_commits('6.1', '6.6', 'cos') |
| """ |
| |
| old_branch = f'{origin}/cos-{previous_kernel_version}' |
| old_upstream = f'{origin}/upstream-{previous_kernel_version}' |
| new_upstream = f'{origin}/upstream-{new_kernel_version}' |
| |
| return _git_cherry(new_upstream, old_branch, old_upstream) |
| |
| |
| def _git_cherry(upstream: str, head: str, limit: str) -> list[Commit]: |
| """Gets commits corresponding to the output of `git cherry -v upstream head limit` |
| |
| See https://git-scm.com/docs/git-cherry for more information. |
| """ |
| commits = [] |
| |
| with Popen( |
| ['git', 'cherry', '-v', upstream, head, limit], stdout=PIPE |
| ) as commit_lines: |
| for line in commit_lines.stdout: |
| try: |
| line = line.decode('utf-8').strip() |
| except Exception as e: |
| logging.warning(e) |
| continue |
| |
| has_exact_content_match_str, _hash, subject = line.split(maxsplit=2) |
| has_exact_content_match = has_exact_content_match_str == '-' |
| message = _get_commit_message(_hash) |
| |
| commit = Commit( |
| _hash=_hash, |
| subject=subject, |
| message=message, |
| has_exact_content_match=has_exact_content_match, |
| ) |
| |
| commits.append(commit) |
| |
| return commits |
| |
| |
| def _get_commit_message(commit_hash: str) -> str: |
| return run( |
| ['git', 'log', '--format=%B', '-n', '1', commit_hash], stdout=PIPE |
| ).stdout.decode('utf-8') |
| |
| |
| def get_subject_to_commit_map( |
| branch: str, max_commits: Optional[int] = None |
| ) -> dict[str, list[str]]: |
| """Gets the map from commit subjects to their hashes on a specific branch. |
| |
| Args: |
| branch: The branch for which the mapping will be collected. |
| max_commits: The maximum number of commits to iterate over when collecting |
| the subject to commit map. If None, will iterate over all commits. Setting |
| this to a smaller value saves a significant amount of time for large |
| repositories. |
| |
| Returns: |
| A dict from commit subjects to lists of hashes of commits with that |
| subject. Most subjects will only have one corresponding hash, but |
| sometimes two unrelated commits may have the same subject. |
| """ |
| mapping = {} |
| with Popen( |
| ['git', 'log', '--format=%H %s', branch], stdout=PIPE |
| ) as commit_lines: |
| for i, line in enumerate(commit_lines.stdout): |
| if max_commits is not None and i >= max_commits: |
| break |
| try: |
| line = line.decode('utf-8').strip() |
| except Exception as e: |
| logging.warning(e) |
| continue |
| |
| if (m := COMMIT_SUBJECT_RE.match(line)) is not None: |
| commit, subject = m.groups() |
| # A subject can have multiple commits, so we return them all. |
| commits = mapping.get(subject, []) |
| commits.append(commit) |
| mapping[subject] = commits |
| logging.info(f'mapped {len(mapping)} commit subjects') |
| return mapping |
| |
| |
| if __name__ == '__main__': |
| logging.basicConfig(level=logging.INFO) |
| |
| parser = argparse.ArgumentParser( |
| prog='get-cos-specific-kernel-commits', |
| description=( |
| 'Finds commits which are in the COS kernel branch, but not the' |
| ' upstream kernel branch' |
| ), |
| ) |
| parser.add_argument('-n', '--new-kernel-version', type=str, required=True) |
| parser.add_argument( |
| '-p', '--previous-kernel-version', type=str, required=True |
| ) |
| parser.add_argument('-o', '--origin', default='cos', type=str, nargs='?') |
| args = parser.parse_args() |
| |
| logging.info('Finding COS-specific commits...') |
| commits = get_cos_specific_commits( |
| args.previous_kernel_version, args.new_kernel_version, args.origin |
| ) |
| logging.info(f'Done. Found {len(commits)} commits.') |
| |
| logging.info('Getting map from commit subjects to commit hashes...') |
| # Get the subject mapping for the 100,000 most recent commits. Hopefully |
| # we're not more behind than that at any point. |
| new_upstream = f'{args.origin}/upstream-{args.new_kernel_version}' |
| subject_to_commit_map = get_subject_to_commit_map(new_upstream, 100000) |
| logging.info('Done.') |
| |
| # Find the upstream commits for each commit, if present, and output the |
| # results as a tab-separated table. |
| sys.stdout.write(f'Hash\tUpstream Hash\tHas Exact Content Match\tSubject\n') |
| for commit in commits: |
| upstream_hashes = commit.get_upstream_commit_hashes(subject_to_commit_map) |
| if len(upstream_hashes) > 1: |
| logging.warning( |
| f'Found multiple upstream commits for commit {commit._hash}:' |
| f' {commit.subject}' |
| ) |
| if not upstream_hashes: |
| # We ouptut the string 'None' if we didn't find any matching commits. |
| upstream_hashes = ['None'] |
| for upstream_hash in upstream_hashes: |
| sys.stdout.write( |
| f'{commit._hash}\t{upstream_hash}\t{commit.has_exact_content_match}\t{commit.subject}\n' |
| ) |