// This package generates a changelog based on the commit history between
// two build numbers. The changelog consists of two outputs - the commits
// added to the target build that aren't present in the source build, and the
// commits in the source build that aren't present in the target build. This
// package uses concurrency to improve performance.
// This package uses Gitiles to request information from a Git on Borg instance.
// To generate a changelog, the package first retrieves the the manifest files for
// the two requested builds using the provided manifest GoB instance and repository.
// The package then parses the XML files and retrieves the committish and instance
// URL. A request is sent on a seperate thread for each repository, asking for a list
// of commits that occurred between the source committish and the target committish.
// Finally, the resulting git.Commit objects are converted to Commit objects, and
// consolidated into a mapping of repository path -> []*Commit.
package changelog
import (
log ""
gitilesApi ""
gitilesProto ""
var (
imageBuildRe = regexp.MustCompile("^cos-(dev-|beta-|stable-|rc-)?\\d+-([\\d-]+)$")
type repo struct {
Repo string
Path string
// The Git on Borg instance to query from.
InstanceURL string
// A value that points to the last commit for a build on a given repo.
// Acceptable values:
// - A commit SHA
// - A ref, ex. "refs/heads/branch"
// - A ref defined as n-th parent of R in the form "R-n".
// ex. "master-2" or "deadbeef-1".
// Source:
Committish string
type commitsRequest struct {
Client gitilesProto.GitilesClient
InstanceURL string
Path string
Repo string
Committish string
Ancestor string
QuerySize int
OutputChan chan commitsResult
type commitsResult struct {
Commits []*Commit
InstanceURL string
Repo string
Path string
HasMoreCommits bool
Err utils.ChangelogError
type additionsResult struct {
Additions map[string]*RepoLog
Err utils.ChangelogError
// RepoLog contains a changelist for a particular repository
type RepoLog struct {
Commits []*Commit
InstanceURL string
Repo string
SourceSHA string
TargetSHA string
HasMoreCommits bool
// resolveImageName returns the build number associated with an image name.
// If the string is not an image name, it returns the input string.
func resolveImageName(imageName string) string {
build := imageBuildRe.FindStringSubmatch(imageName)
if len(build) < 2 {
return imageName
buildNum := strings.Replace(build[2], "-", ".", 3)
log.Debugf("resolveImageName: image name %s was resolved to build number %s", imageName, buildNum)
return buildNum
// limitPageSize will restrict a request page size to min of pageSize (which grows exponentially)
// or remaining request size
func limitPageSize(pageSize, requestedSize int) int {
if requestedSize == -1 || pageSize <= requestedSize {
return pageSize
return requestedSize
func gitilesClient(httpClient *http.Client, remoteURL string) (gitilesProto.GitilesClient, utils.ChangelogError) {
log.Debugf("Creating Gitiles client for remote url %s\n", remoteURL)
cl, err := gitilesApi.NewRESTClient(httpClient, remoteURL, true)
if err != nil {
log.Errorf("gitilesClient: failed to create client for remote url %s", remoteURL)
return nil, utils.InternalServerError
return cl, nil
func createGitilesClients(clients map[string]gitilesProto.GitilesClient, httpClient *http.Client, repoMap map[string]*repo) utils.ChangelogError {
log.Debug("Creating additional Gerrit clients for manifest file if not already created")
for _, repoData := range repoMap {
remoteURL := repoData.InstanceURL
if _, ok := clients[remoteURL]; ok {
client, err := gitilesClient(httpClient, remoteURL)
if err != nil {
return err
clients[remoteURL] = client
return nil
// repoMap generates a mapping of repository ID to instance URL and committish.
// This eliminates the need to track remote names and allows lookup
// of source committish when generating changelog.
func repoMap(manifest string) (map[string]*repo, error) {
log.Debug("Mapping repository to instance URL and committish")
if manifest == "" {
log.Error("repoMap: manifest file is empty")
return nil, errors.New("manifest file is empty")
doc := etree.NewDocument()
if err := doc.ReadFromString(manifest); err != nil {
log.Debug("repoMap: error parsing manifest xml:\n%w", err)
return nil, errors.New("could not parse XML for manifest file associated with build")
root := doc.SelectElement("manifest")
// Parse each <remote fetch=X name=Y> tag in the manifest xml file.
// Extract the "fetch" and "name" attributes from each remote tag, and map the name to the fetch URL.
remoteMap := make(map[string]string)
for _, remote := range root.SelectElements("remote") {
url := strings.Replace(remote.SelectAttr("fetch").Value, "https://", "", 1)
remoteMap[remote.SelectAttr("name").Value] = url
// Parse each <project name=X remote=Y revision=Z> tag in the manifest xml file.
// Extract the "name", "remote", and "revision" attributes from each project tag.
// Some projects do not have a "remote" attribute.
// If this is the case, they should use the default remoteURL.
if root.SelectElement("default").SelectAttr("remote") != nil {
remoteMap[""] = remoteMap[root.SelectElement("default").SelectAttr("remote").Value]
repos := make(map[string]*repo)
for _, project := range root.SelectElements("project") {
name, path := project.SelectAttr("name").Value, project.SelectAttrValue("path", "")
repos[path] = &repo{
Repo: name,
Path: path,
InstanceURL: remoteMap[project.SelectAttrValue("remote", "")],
Committish: project.SelectAttr("revision").Value,
return repos, nil
// mappedManifest retrieves a Manifest file from GoB and unmarshals XML.
// Returns a mapping of repository ID to repository data.
func mappedManifest(client gitilesProto.GitilesClient, repo string, buildInput, buildNum string) (map[string]*repo, utils.ChangelogError) {
log.Debugf("Retrieving manifest file for build %s\n", buildNum)
response, err := utils.DownloadManifest(client, repo, buildNum)
if err != nil {
log.Errorf("mappedManifest: error downloading manifest file from repo %s for build %s:\n%v", repo, buildNum, err)
httpCode := utils.GitilesErrCode(err)
if httpCode == "403" {
return nil, utils.ForbiddenError
} else if httpCode == "404" {
return nil, utils.BuildNotFound(buildInput)
return nil, utils.InternalServerError
mappedManifest, err := repoMap(response.Contents)
if err != nil {
log.Errorf("mappedManifest: error retrieving mapped manifest file from repo %s for build %s:\n%v", repo, buildNum, err)
httpCode := utils.GitilesErrCode(err)
if httpCode == "404" {
return nil, utils.BuildNotFound(buildInput)
return nil, utils.InternalServerError
return mappedManifest, nil
// commits get all commits that occur between committish and ancestor for a specific repo.
func commits(req commitsRequest) {
log.Debugf("Fetching changelog for repo: %s on committish %s\n", req.Repo, req.Committish)
commits, hasMoreCommits, err := utils.Commits(req.Client, req.Repo, req.Committish, req.Ancestor, req.QuerySize)
if err != nil {
if utils.GitilesErrCode(err) == "404" {
req.OutputChan <- commitsResult{
InstanceURL: req.InstanceURL,
Path: req.Path,
Repo: req.Repo,
} else {
log.Errorf("commits: error retrieving commit changelog on repo %s from commit %s to commit %s:\n%v", req.Repo, req.Committish, req.Ancestor, err)
req.OutputChan <- commitsResult{Err: utils.InternalServerError}
if commits == nil {
log.Info(req.Repo, req.Committish, req.Ancestor)
parsedCommits, err := ParseGitCommitLog(commits)
if err != nil {
log.Errorf("commits: error parsing Gitiles commits response\n%v", err)
req.OutputChan <- commitsResult{Err: utils.InternalServerError}
req.OutputChan <- commitsResult{
Commits: parsedCommits,
InstanceURL: req.InstanceURL,
Path: req.Path,
Repo: req.Repo,
HasMoreCommits: hasMoreCommits,
// additions retrieves all commits that occured between 2 parsed manifest files for each repo.
// Returns a map of repo name -> list of commits.
func additions(clients map[string]gitilesProto.GitilesClient, sourceRepos map[string]*repo, targetRepos map[string]*repo, querySize int, outputChan chan additionsResult) {
log.Debug("Retrieving commit additions")
repoCommits := make(map[string]*RepoLog)
commitsChan := make(chan commitsResult, len(targetRepos))
for repoID, targetRepoInfo := range targetRepos {
cl := clients[targetRepoInfo.InstanceURL]
// If the source Manifest file does not contain a target repo,
// count every commit since target repo creation as an addition
ancestorCommittish := ""
if sourceRepoInfo, ok := sourceRepos[repoID]; ok {
ancestorCommittish = sourceRepoInfo.Committish
commitsReq := commitsRequest{
Client: cl,
Path: targetRepoInfo.Path,
InstanceURL: targetRepoInfo.InstanceURL,
Repo: targetRepoInfo.Repo,
Committish: targetRepoInfo.Committish,
Ancestor: ancestorCommittish,
QuerySize: querySize,
OutputChan: commitsChan,
go commits(commitsReq)
for i := 0; i < len(targetRepos); i++ {
res := <-commitsChan
if res.Err != nil {
outputChan <- additionsResult{Err: res.Err}
var sourceSHA string
if sourceData, ok := sourceRepos[res.Path]; ok {
sourceSHA = sourceData.Committish
if len(res.Commits) > 0 {
repoCommits[res.Path] = &RepoLog{
Commits: res.Commits,
HasMoreCommits: res.HasMoreCommits,
InstanceURL: res.InstanceURL,
Repo: res.Repo,
SourceSHA: sourceSHA,
TargetSHA: targetRepos[res.Path].Committish,
outputChan <- additionsResult{Additions: repoCommits}
// Changelog generates a changelog between 2 build numbers
// httpClient is a authorized http.Client object with Gerrit scope.
// sourceBuildNum and targetBuildNum should be build numbers. It should match
// a tag that links directly to snapshot.xml
// Ex. For /refs/tags/15049.0.0, the argument should be 15049.0.0
// host should be the GoB instance that Manifest files are hosted in
// ex. ""
// repo should be the repository that build manifest files
// are located, ex. "cos/manifest-snapshots"
// querySize should be the number of commits that should be included in each
// repository changelog. Specify as -1 to get all commits
// Outputs two changelogs
// The first changelog contains new commits that were added to the target
// build starting from the source build number
// The second changelog contains all commits that are present in the source build
// but not present in the target build
func Changelog(httpClient *http.Client, source, target, host, repo, croslandURL string, querySize int) (map[string]*RepoLog, map[string]*RepoLog, utils.ChangelogError) {
if httpClient == nil {
log.Error("httpClient is nil")
return nil, nil, utils.InternalServerError
sourceBuildNum, targetBuildNum := resolveImageName(source), resolveImageName(target)
log.Infof("Retrieving changelog between %s and %s\n", sourceBuildNum, targetBuildNum)
clients := make(map[string]gitilesProto.GitilesClient)
// Since the manifest file is always in the cos instance, add cos client
// so that client knows what URL to use
manifestClient, err := gitilesClient(httpClient, host)
if err != nil {
return nil, nil, err
sourceRepos, sourceErr := mappedManifest(manifestClient, repo, source, sourceBuildNum)
targetRepos, targetErr := mappedManifest(manifestClient, repo, target, targetBuildNum)
if sourceErr != nil && sourceErr.HTTPCode() == "404" && targetErr != nil && targetErr.HTTPCode() == "404" {
return nil, nil, utils.BothBuildsNotFound(croslandURL, source, target, sourceBuildNum, targetBuildNum)
} else if sourceErr != nil {
return nil, nil, sourceErr
} else if targetErr != nil {
return nil, nil, targetErr
clients[host] = manifestClient
err = createGitilesClients(clients, httpClient, sourceRepos)
if err != nil {
return nil, nil, err
err = createGitilesClients(clients, httpClient, targetRepos)
if err != nil {
return nil, nil, err
addChan := make(chan additionsResult, 1)
missChan := make(chan additionsResult, 1)
go additions(clients, sourceRepos, targetRepos, querySize, addChan)
go additions(clients, targetRepos, sourceRepos, querySize, missChan)
missRes := <-missChan
if missRes.Err != nil {
return nil, nil, missRes.Err
addRes := <-addChan
if addRes.Err != nil {
return nil, nil, addRes.Err
return addRes.Additions, missRes.Additions, nil