| // Copyright 2020 Google Inc. All Rights Reserved. |
| // |
| // Licensed under the Apache License, Version 2.0 (the "License"); |
| // you may not use this file except in compliance with the License. |
| // You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, software |
| // distributed under the License is distributed on an "AS IS" BASIS, |
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| // See the License for the specific language governing permissions and |
| // limitations under the License. |
| |
| // This package generates a changelog based on the commit history between |
| // two build numbers. The changelog consists of two outputs - the commits |
| // added to the target build that aren't present in the source build, and the |
| // commits in the source build that aren't present in the target build. This |
| // package uses concurrency to improve performance. |
| // |
| // This package uses Gitiles to request information from a Git on Borg instance. |
| // To generate a changelog, the package first retrieves the the manifest files for |
| // the two requested builds using the provided manifest GoB instance and repository. |
| // The package then parses the XML files and retrieves the committish and instance |
| // URL. A request is sent on a seperate thread for each repository, asking for a list |
| // of commits that occurred between the source committish and the target committish. |
| // Finally, the resulting git.Commit objects are converted to Commit objects, and |
| // consolidated into a mapping of repository path -> []*Commit. |
| |
| package changelog |
| |
| import ( |
| "errors" |
| "net/http" |
| "regexp" |
| "strings" |
| |
| "cos.googlesource.com/cos/tools/src/pkg/utils" |
| "github.com/beevik/etree" |
| log "github.com/sirupsen/logrus" |
| gitilesApi "go.chromium.org/luci/common/api/gitiles" |
| gitilesProto "go.chromium.org/luci/common/proto/gitiles" |
| ) |
| |
| var ( |
| imageBuildRe = regexp.MustCompile("^cos-(dev-|beta-|stable-|rc-)?\\d+-([\\d-]+)$") |
| ) |
| |
| type repo struct { |
| Repo string |
| Path string |
| // The Git on Borg instance to query from. |
| InstanceURL string |
| // A value that points to the last commit for a build on a given repo. |
| // Acceptable values: |
| // - A commit SHA |
| // - A ref, ex. "refs/heads/branch" |
| // - A ref defined as n-th parent of R in the form "R-n". |
| // ex. "master-2" or "deadbeef-1". |
| // Source: https://pkg.go.dev/go.chromium.org/luci/common/proto/gitiles?tab=doc#LogRequest |
| Committish string |
| } |
| |
| type commitsRequest struct { |
| Client gitilesProto.GitilesClient |
| InstanceURL string |
| Path string |
| Repo string |
| Committish string |
| Ancestor string |
| QuerySize int |
| OutputChan chan commitsResult |
| } |
| |
| type commitsResult struct { |
| Commits []*Commit |
| InstanceURL string |
| Repo string |
| Path string |
| HasMoreCommits bool |
| Err utils.ChangelogError |
| } |
| |
| type additionsResult struct { |
| Additions map[string]*RepoLog |
| Err utils.ChangelogError |
| } |
| |
| // RepoLog contains a changelist for a particular repository |
| type RepoLog struct { |
| Commits []*Commit |
| InstanceURL string |
| Repo string |
| SourceSHA string |
| TargetSHA string |
| HasMoreCommits bool |
| } |
| |
| // resolveImageName returns the build number associated with an image name. |
| // If the string is not an image name, it returns the input string. |
| func resolveImageName(imageName string) string { |
| build := imageBuildRe.FindStringSubmatch(imageName) |
| if len(build) < 2 { |
| return imageName |
| } |
| buildNum := strings.Replace(build[2], "-", ".", 3) |
| log.Debugf("resolveImageName: image name %s was resolved to build number %s", imageName, buildNum) |
| return buildNum |
| } |
| |
| // limitPageSize will restrict a request page size to min of pageSize (which grows exponentially) |
| // or remaining request size |
| func limitPageSize(pageSize, requestedSize int) int { |
| if requestedSize == -1 || pageSize <= requestedSize { |
| return pageSize |
| } |
| return requestedSize |
| } |
| |
| func gitilesClient(httpClient *http.Client, remoteURL string) (gitilesProto.GitilesClient, utils.ChangelogError) { |
| log.Debugf("Creating Gitiles client for remote url %s\n", remoteURL) |
| cl, err := gitilesApi.NewRESTClient(httpClient, remoteURL, true) |
| if err != nil { |
| log.Errorf("gitilesClient: failed to create client for remote url %s", remoteURL) |
| return nil, utils.InternalServerError |
| } |
| return cl, nil |
| } |
| |
| func createGitilesClients(clients map[string]gitilesProto.GitilesClient, httpClient *http.Client, repoMap map[string]*repo) utils.ChangelogError { |
| log.Debug("Creating additional Gerrit clients for manifest file if not already created") |
| for _, repoData := range repoMap { |
| remoteURL := repoData.InstanceURL |
| if _, ok := clients[remoteURL]; ok { |
| continue |
| } |
| client, err := gitilesClient(httpClient, remoteURL) |
| if err != nil { |
| return err |
| } |
| clients[remoteURL] = client |
| } |
| return nil |
| } |
| |
| // repoMap generates a mapping of repository ID to instance URL and committish. |
| // This eliminates the need to track remote names and allows lookup |
| // of source committish when generating changelog. |
| func repoMap(manifest string) (map[string]*repo, error) { |
| log.Debug("Mapping repository to instance URL and committish") |
| if manifest == "" { |
| log.Error("repoMap: manifest file is empty") |
| return nil, errors.New("manifest file is empty") |
| } |
| doc := etree.NewDocument() |
| if err := doc.ReadFromString(manifest); err != nil { |
| log.Debug("repoMap: error parsing manifest xml:\n%w", err) |
| return nil, errors.New("could not parse XML for manifest file associated with build") |
| } |
| root := doc.SelectElement("manifest") |
| |
| // Parse each <remote fetch=X name=Y> tag in the manifest xml file. |
| // Extract the "fetch" and "name" attributes from each remote tag, and map the name to the fetch URL. |
| remoteMap := make(map[string]string) |
| for _, remote := range root.SelectElements("remote") { |
| url := strings.Replace(remote.SelectAttr("fetch").Value, "https://", "", 1) |
| remoteMap[remote.SelectAttr("name").Value] = url |
| } |
| |
| // Parse each <project name=X remote=Y revision=Z> tag in the manifest xml file. |
| // Extract the "name", "remote", and "revision" attributes from each project tag. |
| // Some projects do not have a "remote" attribute. |
| // If this is the case, they should use the default remoteURL. |
| if root.SelectElement("default").SelectAttr("remote") != nil { |
| remoteMap[""] = remoteMap[root.SelectElement("default").SelectAttr("remote").Value] |
| } |
| repos := make(map[string]*repo) |
| for _, project := range root.SelectElements("project") { |
| name, path := project.SelectAttr("name").Value, project.SelectAttrValue("path", "") |
| repos[path] = &repo{ |
| Repo: name, |
| Path: path, |
| InstanceURL: remoteMap[project.SelectAttrValue("remote", "")], |
| Committish: project.SelectAttr("revision").Value, |
| } |
| } |
| return repos, nil |
| } |
| |
| // mappedManifest retrieves a Manifest file from GoB and unmarshals XML. |
| // Returns a mapping of repository ID to repository data. |
| func mappedManifest(client gitilesProto.GitilesClient, repo string, buildInput, buildNum string) (map[string]*repo, utils.ChangelogError) { |
| log.Debugf("Retrieving manifest file for build %s\n", buildNum) |
| response, err := utils.DownloadManifest(client, repo, buildNum) |
| if err != nil { |
| log.Errorf("mappedManifest: error downloading manifest file from repo %s for build %s:\n%v", repo, buildNum, err) |
| httpCode := utils.GitilesErrCode(err) |
| if httpCode == "403" { |
| return nil, utils.ForbiddenError |
| } else if httpCode == "404" { |
| return nil, utils.BuildNotFound(buildInput) |
| } |
| return nil, utils.InternalServerError |
| } |
| mappedManifest, err := repoMap(response.Contents) |
| if err != nil { |
| log.Errorf("mappedManifest: error retrieving mapped manifest file from repo %s for build %s:\n%v", repo, buildNum, err) |
| httpCode := utils.GitilesErrCode(err) |
| if httpCode == "404" { |
| return nil, utils.BuildNotFound(buildInput) |
| } |
| return nil, utils.InternalServerError |
| } |
| return mappedManifest, nil |
| } |
| |
| // commits get all commits that occur between committish and ancestor for a specific repo. |
| func commits(req commitsRequest) { |
| log.Debugf("Fetching changelog for repo: %s on committish %s\n", req.Repo, req.Committish) |
| commits, hasMoreCommits, err := utils.Commits(req.Client, req.Repo, req.Committish, req.Ancestor, req.QuerySize) |
| if err != nil { |
| if utils.GitilesErrCode(err) == "404" { |
| req.OutputChan <- commitsResult{ |
| InstanceURL: req.InstanceURL, |
| Path: req.Path, |
| Repo: req.Repo, |
| } |
| } else { |
| log.Errorf("commits: error retrieving commit changelog on repo %s from commit %s to commit %s:\n%v", req.Repo, req.Committish, req.Ancestor, err) |
| req.OutputChan <- commitsResult{Err: utils.InternalServerError} |
| } |
| return |
| } |
| if commits == nil { |
| log.Info(req.Repo, req.Committish, req.Ancestor) |
| } |
| parsedCommits, err := ParseGitCommitLog(commits) |
| if err != nil { |
| log.Errorf("commits: error parsing Gitiles commits response\n%v", err) |
| req.OutputChan <- commitsResult{Err: utils.InternalServerError} |
| return |
| } |
| req.OutputChan <- commitsResult{ |
| Commits: parsedCommits, |
| InstanceURL: req.InstanceURL, |
| Path: req.Path, |
| Repo: req.Repo, |
| HasMoreCommits: hasMoreCommits, |
| } |
| } |
| |
| // additions retrieves all commits that occured between 2 parsed manifest files for each repo. |
| // Returns a map of repo name -> list of commits. |
| func additions(clients map[string]gitilesProto.GitilesClient, sourceRepos map[string]*repo, targetRepos map[string]*repo, querySize int, outputChan chan additionsResult) { |
| log.Debug("Retrieving commit additions") |
| repoCommits := make(map[string]*RepoLog) |
| commitsChan := make(chan commitsResult, len(targetRepos)) |
| for repoID, targetRepoInfo := range targetRepos { |
| cl := clients[targetRepoInfo.InstanceURL] |
| // If the source Manifest file does not contain a target repo, |
| // count every commit since target repo creation as an addition |
| ancestorCommittish := "" |
| if sourceRepoInfo, ok := sourceRepos[repoID]; ok { |
| ancestorCommittish = sourceRepoInfo.Committish |
| } |
| commitsReq := commitsRequest{ |
| Client: cl, |
| Path: targetRepoInfo.Path, |
| InstanceURL: targetRepoInfo.InstanceURL, |
| Repo: targetRepoInfo.Repo, |
| Committish: targetRepoInfo.Committish, |
| Ancestor: ancestorCommittish, |
| QuerySize: querySize, |
| OutputChan: commitsChan, |
| } |
| go commits(commitsReq) |
| } |
| for i := 0; i < len(targetRepos); i++ { |
| res := <-commitsChan |
| if res.Err != nil { |
| outputChan <- additionsResult{Err: res.Err} |
| return |
| } |
| var sourceSHA string |
| if sourceData, ok := sourceRepos[res.Path]; ok { |
| sourceSHA = sourceData.Committish |
| } |
| if len(res.Commits) > 0 { |
| repoCommits[res.Path] = &RepoLog{ |
| Commits: res.Commits, |
| HasMoreCommits: res.HasMoreCommits, |
| InstanceURL: res.InstanceURL, |
| Repo: res.Repo, |
| SourceSHA: sourceSHA, |
| TargetSHA: targetRepos[res.Path].Committish, |
| } |
| } |
| } |
| outputChan <- additionsResult{Additions: repoCommits} |
| return |
| } |
| |
| // Changelog generates a changelog between 2 build numbers |
| // |
| // httpClient is a authorized http.Client object with Gerrit scope. |
| // |
| // sourceBuildNum and targetBuildNum should be build numbers. It should match |
| // a tag that links directly to snapshot.xml |
| // Ex. For /refs/tags/15049.0.0, the argument should be 15049.0.0 |
| // |
| // host should be the GoB instance that Manifest files are hosted in |
| // ex. "cos.googlesource.com" |
| // |
| // repo should be the repository that build manifest files |
| // are located, ex. "cos/manifest-snapshots" |
| // |
| // querySize should be the number of commits that should be included in each |
| // repository changelog. Specify as -1 to get all commits |
| // |
| // Outputs two changelogs |
| // The first changelog contains new commits that were added to the target |
| // build starting from the source build number |
| // |
| // The second changelog contains all commits that are present in the source build |
| // but not present in the target build |
| func Changelog(httpClient *http.Client, source, target, host, repo, croslandURL string, querySize int) (map[string]*RepoLog, map[string]*RepoLog, utils.ChangelogError) { |
| if httpClient == nil { |
| log.Error("httpClient is nil") |
| return nil, nil, utils.InternalServerError |
| } |
| sourceBuildNum, targetBuildNum := resolveImageName(source), resolveImageName(target) |
| log.Infof("Retrieving changelog between %s and %s\n", sourceBuildNum, targetBuildNum) |
| clients := make(map[string]gitilesProto.GitilesClient) |
| |
| // Since the manifest file is always in the cos instance, add cos client |
| // so that client knows what URL to use |
| manifestClient, err := gitilesClient(httpClient, host) |
| if err != nil { |
| return nil, nil, err |
| } |
| sourceRepos, sourceErr := mappedManifest(manifestClient, repo, source, sourceBuildNum) |
| targetRepos, targetErr := mappedManifest(manifestClient, repo, target, targetBuildNum) |
| if sourceErr != nil && sourceErr.HTTPCode() == "404" && targetErr != nil && targetErr.HTTPCode() == "404" { |
| return nil, nil, utils.BothBuildsNotFound(croslandURL, source, target, sourceBuildNum, targetBuildNum) |
| } else if sourceErr != nil { |
| return nil, nil, sourceErr |
| } else if targetErr != nil { |
| return nil, nil, targetErr |
| } |
| |
| clients[host] = manifestClient |
| err = createGitilesClients(clients, httpClient, sourceRepos) |
| if err != nil { |
| return nil, nil, err |
| } |
| err = createGitilesClients(clients, httpClient, targetRepos) |
| if err != nil { |
| return nil, nil, err |
| } |
| |
| addChan := make(chan additionsResult, 1) |
| missChan := make(chan additionsResult, 1) |
| go additions(clients, sourceRepos, targetRepos, querySize, addChan) |
| go additions(clients, targetRepos, sourceRepos, querySize, missChan) |
| missRes := <-missChan |
| if missRes.Err != nil { |
| return nil, nil, missRes.Err |
| } |
| addRes := <-addChan |
| if addRes.Err != nil { |
| return nil, nil, addRes.Err |
| } |
| |
| return addRes.Additions, missRes.Additions, nil |
| } |