blob: d204bdca61d4b0b96412d3f3f0f3375df91499d5 [file] [log] [blame]
// Copyright 2019 The Chromium OS Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
package main
import (
"bytes"
"encoding/json"
"flag"
"fmt"
"golang.org/x/crypto/ssh"
"log"
"os"
"os/exec"
"reflect"
"regexp"
"strconv"
"strings"
"text/tabwriter"
"time"
)
const labstationTelemetryCmds = "grep guado_labstation-release /etc/lsb-release; " +
"printf \"\n\"; " +
"uptime; " +
"printf \"\n\"; " +
"mosys eventlog list | tail -n 10; " +
"printf \"\n\"; " +
"pgrep --list-full update_engine; " +
"printf \"\n\";"
const warningMessage = `This utility assumes many things, like that you have atest in the environment
in which it is run and that you have cros in your DNS search path. This is just
a convenience utility for firmware qual test environment health triage. Feel
free and encouraged to extend and enhance it, but in the long term, it should be
mostly obviated by monitoring and alerting.
`
// This is the TCP Port number on which an ssh server listens (in the test image).
const sshServerPortNumber = 22
const autotestGetBranchesGerritAPIEndpoint = "https://chromium-review.googlesource.com/a/projects/chromiumos%2Fthird_party%2Fautotest/branches/"
const autotestGetCommitGerritAPIEndpoint = "https://chromium-review.googlesource.com/a/projects/chromiumos%2Fthird_party%2Fautotest/commits/"
type branch struct {
name string
lastCommit string
lastCommitTimestamp time.Time
}
type dut struct {
Hostname, Port, Labstation, Board, Model, Status, LockStatus, LockReason string
}
func sanitizeGobCurlOutput(output *[]byte) {
// Responses from gob-curl currently begin with ")]}'". Stripping that out
// makes the response marshalable.
if string((*output)[:4]) == ")]}'" {
*output = (*output)[4:]
} else {
log.Println("Sanitizing gob-curl output may no longer be necessary.")
}
}
func queryGerrit(endpoint string, resource string) map[string]*json.RawMessage {
// TODO(kmshelton): Check that gob-curl exists in the user's environment.
// "gob-curl" is used instead of net/http due to the complexities of authenticating.
gerritCmd := exec.Command("gob-curl", endpoint+resource)
gerritCmdOut, err := gerritCmd.Output()
if err != nil {
log.Fatalf("gob-curl encountered: %s", err)
}
sanitizeGobCurlOutput(&gerritCmdOut)
var gerritResponse map[string]*json.RawMessage
err = json.Unmarshal(gerritCmdOut, &gerritResponse)
if err != nil {
log.Fatalf("json.Unmarshal encountered: %s", err)
}
return gerritResponse
}
func newDut(hostname string) dut {
d := dut{Hostname: hostname}
regexMap := map[string]string{
"Port": `servo_port : (?P<Port>.*)`,
"Labstation": `servo_host : (?P<Labstation>.*)`,
"Board": `board:(?P<Board>.*)`,
"Model": `model:(?P<Model>.*)`,
"Status": `Status: (?P<Status>.*)`,
"LockStatus": `Locked: (?P<LockStatus>.*)`,
"LockReason": `Lock Reason: (?P<LockReason>.*)`,
}
cmd := exec.Command("atest", "host", "stat", hostname)
out, _ := cmd.Output()
for field, re := range regexMap {
match := regexp.MustCompile(re).FindStringSubmatch(string(out))
// The LockReason field can be empty if the DUT is not locked.
if len(match) != 2 && field != "LockReason" {
log.Printf("Skipping %s on %s. This could be ok if a DUT is only partially through the deployment checklist.", field, hostname)
continue
} else {
reflect.ValueOf(&d).Elem().FieldByName(field).SetString(match[1])
}
}
return d
}
func sendSSHCommand(host string, remoteCmd string, config *ssh.ClientConfig) (outs string, reterr error) {
conn, err := ssh.Dial("tcp", host+":"+strconv.Itoa(sshServerPortNumber), config)
if err != nil {
log.Fatal("Failed to dial: ", err)
}
defer conn.Close()
session, err := conn.NewSession()
if err != nil {
log.Fatal("Failed to create session: ", err)
}
defer session.Close()
var stdout bytes.Buffer
var stderr bytes.Buffer
session.Stdout = &stdout
session.Stderr = &stderr
if err := session.Run(remoteCmd); err != nil {
log.Print("Encountered an error when trying to run a remote command: " + err.Error())
log.Printf("Failed to run \"%s\" on host: %s.\nStdout was: %s\nStderr was: %s", remoteCmd, host, stdout.String(), stderr.String())
reterr = err
}
outs = stdout.String()
return
}
func main() {
master := branch{name: "master"}
prod := branch{name: "prod"}
branches := [2]*branch{&master, &prod}
for _, branch := range branches {
gerritBranchResponse := queryGerrit(autotestGetBranchesGerritAPIEndpoint, branch.name)
lastCommitWithQuotes := string(*gerritBranchResponse["revision"])
branch.lastCommit = lastCommitWithQuotes[1 : len(lastCommitWithQuotes)-1]
gerritCommitResponse := queryGerrit(autotestGetCommitGerritAPIEndpoint, branch.lastCommit)
var gerritCommitter map[string]interface{}
err := json.Unmarshal(*gerritCommitResponse["committer"], &gerritCommitter)
if err != nil {
log.Fatalf("json.Unmarshal encountered: %s", err)
}
branch.lastCommitTimestamp, err = time.Parse("2006-01-02 15:04:05.000000000",
fmt.Sprintf("%v", (gerritCommitter["date"])))
if err != nil {
log.Fatalf("time.Parse encountered: %s", err)
}
fmt.Printf("The last %s commit is from %s.\n",
branch.name,
branch.lastCommitTimestamp.Format("Jan 2 at 15:04"))
}
h := time.Now().Sub(branches[1].lastCommitTimestamp).Hours()
fmt.Printf("The prod version of autotest is about %.0f hours old.\n\n", h)
duts := []dut{}
poolPtr := flag.String("pool", "faft-cr50", "A pool of DUTs to operate on.")
flag.Parse()
fmt.Println(warningMessage)
log.Print("Gathering DUT info via atest...")
cmd := exec.Command("atest", "host", "list", "--hostnames-only", "--label=pool:"+*poolPtr)
out, err := cmd.Output()
if err != nil {
log.Fatalf("<atest host list> encountered: %s", err)
}
// Removing hostnames that don't begin with "chromeos1-" removes those that are not in the firmware lab.
for _, hostname := range strings.Fields(string(out)) {
if strings.HasPrefix(hostname, "chromeos1-") {
duts = append(duts, newDut(hostname))
}
}
log.Print("Summarizing DUT info...")
w := tabwriter.NewWriter(os.Stdout, 0, 0, 3, ' ', 0)
fmt.Fprintln(w, "Hostname\tBoard\tModel\tStatus\tLabstation\tPort\tLockStatus\tLockReason")
for _, dut := range duts {
fmt.Fprintln(w, dut.Hostname+"\t"+dut.Board+"\t"+dut.Model+"\t"+dut.Status+"\t"+dut.Labstation+"\t"+
dut.Port+"\t"+dut.LockStatus+"\t"+dut.LockReason+"\t")
}
w.Flush()
log.Print("Gathering and displaying key telemetry for labstations.")
containsElement := func(array []string, element string) bool {
for _, x := range array {
if x == element {
return true
}
}
return false
}
labstations := []string{}
for _, dut := range duts {
if !containsElement(labstations, dut.Labstation) && dut.Labstation != "" {
labstations = append(labstations, dut.Labstation)
}
}
// This key is not considered secret. It's used for authenticating with ChromeOS devices
// running test images.
testKey := []byte(
"\x2d\x2d\x2d\x2d\x2d\x42\x45\x47\x49\x4e\x20\x52\x53\x41\x20\x50\x52\x49\x56\x41\x54\x45\x20\x4b" +
"\x45\x59\x2d\x2d\x2d\x2d\x2d\x0a\x4d\x49\x49\x45\x6f\x41\x49\x42\x41\x41\x4b\x43\x41\x51" +
"\x45\x41\x76\x73\x4e\x70\x46\x64\x4b\x35\x6c\x62\x30\x47\x66\x4b\x78\x2b\x46\x67\x73\x72" +
"\x73\x4d\x2f\x32\x2b\x61\x5a\x56\x46\x59\x58\x48\x4d\x50\x64\x76\x47\x74\x54\x7a\x36\x33" +
"\x63\x69\x52\x68\x71\x30\x0a\x4a\x6e\x77\x37\x6e\x6c\x6e\x31\x53\x4f\x63\x48\x72\x61\x53" +
"\x7a\x33\x2f\x69\x6d\x45\x43\x42\x67\x38\x4e\x48\x49\x4b\x56\x36\x72\x41\x2b\x42\x39\x7a" +
"\x62\x66\x37\x70\x5a\x58\x45\x76\x32\x30\x78\x35\x55\x6c\x30\x76\x72\x63\x50\x71\x59\x57" +
"\x43\x34\x34\x50\x54\x0a\x74\x67\x73\x67\x76\x69\x38\x73\x30\x4b\x5a\x55\x5a\x4e\x39\x33" +
"\x59\x6c\x63\x6a\x5a\x2b\x51\x37\x42\x6a\x51\x2f\x74\x75\x77\x47\x53\x61\x4c\x57\x4c\x71" +
"\x4a\x37\x68\x6e\x48\x41\x4c\x4d\x4a\x33\x64\x62\x45\x4d\x39\x66\x4b\x42\x48\x51\x42\x43" +
"\x72\x47\x35\x48\x0a\x4f\x61\x57\x44\x32\x67\x74\x58\x6a\x37\x6a\x70\x30\x34\x4d\x2f\x57" +
"\x55\x6e\x44\x44\x64\x65\x6d\x71\x2f\x4b\x4d\x67\x36\x45\x39\x6a\x63\x72\x4a\x4f\x69\x51" +
"\x33\x39\x49\x75\x54\x70\x61\x73\x34\x68\x4c\x51\x7a\x56\x6b\x4b\x41\x4b\x53\x72\x70\x6c" +
"\x36\x4d\x59\x0a\x32\x65\x74\x48\x79\x6f\x4e\x61\x72\x6c\x57\x68\x63\x4f\x77\x69\x74\x41" +
"\x72\x45\x44\x77\x66\x33\x57\x67\x6e\x63\x74\x77\x4b\x73\x74\x49\x2f\x4d\x54\x4b\x42\x35" +
"\x42\x54\x70\x4f\x32\x57\x58\x55\x4e\x55\x76\x34\x6b\x58\x7a\x41\x2b\x67\x38\x2f\x6c\x31" +
"\x61\x6c\x0a\x6a\x49\x47\x31\x33\x76\x74\x64\x39\x41\x2f\x49\x56\x33\x4b\x46\x56\x78\x2f" +
"\x73\x4c\x6b\x6b\x6a\x75\x5a\x37\x7a\x32\x72\x51\x58\x79\x4e\x4b\x75\x4a\x77\x49\x42\x49" +
"\x77\x4b\x43\x41\x51\x41\x37\x39\x45\x57\x5a\x4a\x50\x68\x2f\x68\x49\x30\x43\x6e\x4a\x79" +
"\x6e\x0a\x31\x36\x41\x45\x58\x70\x34\x54\x38\x6e\x4b\x44\x47\x32\x70\x39\x47\x70\x43\x69" +
"\x43\x47\x6e\x71\x36\x75\x32\x44\x76\x7a\x2f\x75\x31\x70\x5a\x6b\x39\x37\x4e\x39\x54\x2b" +
"\x78\x34\x5a\x76\x61\x30\x47\x76\x4a\x63\x31\x76\x6e\x6c\x53\x54\x37\x6f\x62\x6a\x57\x2f" +
"\x0a\x59\x38\x2f\x45\x54\x38\x51\x65\x47\x53\x43\x54\x37\x78\x35\x50\x59\x44\x71\x69\x56" +
"\x73\x70\x6f\x65\x6d\x72\x33\x44\x43\x79\x59\x54\x4b\x50\x6b\x41\x44\x4b\x6e\x2b\x63\x4c" +
"\x41\x6e\x67\x44\x7a\x42\x58\x47\x48\x44\x54\x63\x66\x4e\x50\x34\x55\x36\x78\x66\x72\x0a" +
"\x51\x63\x35\x4a\x4b\x38\x42\x73\x46\x52\x38\x6b\x41\x70\x71\x53\x73\x2f\x7a\x43\x55\x34" +
"\x65\x71\x42\x74\x70\x32\x46\x56\x76\x50\x62\x67\x55\x4f\x76\x33\x75\x55\x72\x46\x6e\x6a" +
"\x45\x75\x47\x73\x39\x72\x62\x31\x51\x5a\x30\x4b\x36\x6f\x30\x38\x4c\x34\x43\x71\x0a\x4e" +
"\x2b\x65\x32\x6e\x54\x79\x73\x6a\x70\x37\x38\x62\x6c\x61\x6b\x5a\x66\x71\x6c\x75\x72\x71" +
"\x54\x59\x36\x69\x4a\x62\x30\x49\x6d\x55\x32\x57\x33\x54\x38\x73\x56\x36\x77\x35\x47\x50" +
"\x31\x4e\x54\x37\x65\x69\x63\x58\x4c\x4f\x33\x57\x64\x49\x52\x42\x31\x35\x61\x0a\x65\x76" +
"\x6f\x67\x50\x65\x71\x74\x4d\x6f\x38\x47\x63\x4f\x36\x32\x77\x55\x2f\x44\x34\x55\x43\x76" +
"\x71\x34\x47\x4e\x45\x6a\x76\x59\x4f\x76\x46\x6d\x50\x7a\x58\x48\x76\x68\x54\x78\x73\x69" +
"\x57\x76\x35\x4b\x45\x41\x43\x74\x6c\x65\x42\x49\x45\x59\x6d\x57\x48\x41\x0a\x50\x4f\x77" +
"\x72\x41\x6f\x47\x42\x41\x4f\x4b\x67\x4e\x52\x67\x78\x48\x4c\x37\x72\x34\x62\x4f\x6d\x70" +
"\x4c\x51\x63\x59\x4b\x37\x78\x67\x41\x34\x39\x4f\x70\x69\x6b\x6d\x72\x65\x62\x58\x43\x51" +
"\x6e\x5a\x2f\x6b\x5a\x33\x51\x73\x4c\x56\x76\x31\x51\x64\x4e\x4d\x48\x0a\x52\x78\x2f\x65" +
"\x78\x37\x37\x32\x31\x67\x38\x52\x30\x6f\x57\x73\x6c\x4d\x31\x34\x6f\x74\x5a\x53\x4d\x49" +
"\x54\x43\x44\x43\x4d\x57\x54\x59\x56\x42\x4e\x4d\x31\x62\x71\x59\x6e\x55\x65\x45\x75\x35" +
"\x48\x61\x67\x46\x77\x78\x6a\x51\x32\x74\x4c\x75\x53\x73\x38\x45\x0a\x53\x42\x7a\x45\x72" +
"\x39\x36\x4a\x4c\x66\x68\x77\x75\x42\x68\x44\x48\x31\x30\x73\x51\x71\x6e\x2b\x4f\x51\x47" +
"\x31\x79\x6a\x35\x61\x63\x73\x34\x50\x74\x33\x4c\x34\x77\x6c\x59\x77\x4d\x78\x30\x76\x73" +
"\x31\x42\x78\x41\x6f\x47\x42\x41\x4e\x64\x39\x4f\x77\x72\x6f\x0a\x35\x4f\x4e\x69\x4a\x58" +
"\x66\x4b\x4e\x61\x4e\x59\x2f\x63\x4a\x59\x75\x4c\x52\x2b\x62\x7a\x47\x65\x79\x70\x38\x6f" +
"\x78\x54\x6f\x78\x67\x6d\x4d\x34\x55\x75\x41\x34\x68\x68\x44\x55\x37\x70\x65\x67\x34\x73" +
"\x64\x6f\x4b\x4a\x34\x58\x6a\x42\x39\x63\x4b\x4d\x43\x7a\x0a\x5a\x47\x55\x35\x4b\x48\x4b" +
"\x4b\x78\x4e\x66\x39\x35\x2f\x5a\x37\x61\x79\x77\x69\x49\x4a\x45\x55\x45\x2f\x78\x50\x52" +
"\x47\x4e\x50\x36\x74\x6e\x67\x52\x75\x6e\x65\x76\x70\x32\x51\x79\x76\x5a\x66\x34\x70\x67" +
"\x76\x41\x43\x76\x6b\x31\x74\x6c\x39\x42\x33\x48\x48\x0a\x37\x4a\x35\x74\x59\x2f\x47\x52" +
"\x6b\x54\x34\x73\x51\x75\x5a\x59\x70\x78\x33\x59\x6e\x62\x64\x50\x35\x59\x36\x4b\x78\x33" +
"\x33\x42\x46\x37\x51\x58\x41\x6f\x47\x41\x56\x43\x7a\x67\x68\x56\x51\x52\x2f\x63\x56\x54" +
"\x31\x51\x4e\x68\x76\x7a\x32\x39\x67\x73\x36\x36\x0a\x69\x50\x49\x72\x74\x51\x6e\x77\x55" +
"\x74\x4e\x4f\x48\x41\x36\x69\x39\x68\x2b\x4d\x6e\x62\x50\x42\x4f\x59\x52\x49\x70\x69\x64" +
"\x47\x54\x61\x71\x45\x74\x4b\x54\x54\x4b\x69\x73\x77\x37\x39\x4a\x6a\x4a\x37\x38\x58\x36" +
"\x54\x52\x34\x61\x39\x4d\x4c\x30\x6f\x53\x67\x0a\x63\x31\x4b\x37\x31\x7a\x39\x4e\x6d\x5a" +
"\x67\x50\x62\x4a\x55\x32\x35\x71\x4d\x4e\x38\x30\x5a\x43\x70\x68\x33\x2b\x68\x32\x66\x39" +
"\x68\x77\x63\x36\x41\x6a\x4c\x7a\x30\x55\x35\x77\x51\x34\x61\x6c\x50\x39\x30\x39\x56\x52" +
"\x56\x49\x58\x37\x69\x4d\x38\x70\x61\x66\x0a\x71\x35\x39\x77\x42\x69\x48\x68\x79\x44\x33" +
"\x4a\x31\x36\x51\x41\x78\x68\x73\x43\x67\x59\x42\x75\x30\x72\x43\x6d\x68\x6d\x63\x56\x32" +
"\x72\x51\x75\x2b\x6b\x64\x34\x6c\x43\x71\x37\x75\x4a\x6d\x42\x5a\x5a\x68\x46\x5a\x35\x74" +
"\x6e\x79\x39\x4d\x6c\x50\x67\x69\x4b\x0a\x7a\x49\x4a\x6b\x72\x31\x72\x6b\x46\x62\x79\x49" +
"\x66\x71\x43\x44\x7a\x79\x72\x55\x39\x69\x72\x4f\x54\x4b\x63\x2b\x69\x43\x55\x41\x32\x35" +
"\x45\x6b\x39\x75\x6a\x6b\x48\x43\x34\x6d\x2f\x61\x54\x55\x33\x6c\x6e\x6b\x4e\x6a\x59\x70" +
"\x2f\x4f\x46\x58\x70\x58\x46\x33\x0a\x58\x57\x5a\x4d\x59\x2b\x30\x41\x6b\x35\x75\x55\x70" +
"\x6c\x64\x47\x38\x35\x6d\x77\x4c\x49\x76\x41\x54\x75\x33\x69\x76\x70\x62\x79\x5a\x43\x54" +
"\x46\x59\x4d\x35\x61\x66\x53\x6d\x34\x53\x74\x6d\x61\x55\x69\x55\x35\x74\x41\x2b\x6f\x5a" +
"\x4b\x45\x63\x47\x69\x6c\x79\x0a\x6a\x77\x4b\x42\x67\x42\x64\x46\x4c\x67\x2b\x6b\x54\x6d" +
"\x38\x37\x37\x6c\x63\x79\x62\x51\x30\x34\x47\x31\x6b\x49\x52\x4d\x66\x35\x76\x41\x58\x63" +
"\x43\x6f\x6e\x7a\x42\x74\x38\x72\x79\x39\x4a\x2b\x32\x69\x58\x31\x64\x64\x6c\x75\x32\x4b" +
"\x32\x76\x4d\x72\x6f\x44\x0a\x31\x63\x50\x2f\x55\x2f\x45\x6d\x76\x6f\x43\x58\x53\x4f\x47" +
"\x75\x65\x74\x61\x49\x34\x55\x4e\x51\x77\x45\x2f\x72\x47\x43\x74\x6b\x70\x76\x4e\x6a\x35" +
"\x79\x34\x74\x77\x56\x4c\x68\x35\x51\x75\x66\x53\x4f\x6c\x34\x39\x56\x30\x55\x74\x30\x6d" +
"\x77\x6a\x50\x58\x77\x0a\x48\x66\x4e\x2f\x32\x4d\x6f\x4f\x30\x37\x76\x51\x72\x6a\x67\x73" +
"\x46\x79\x6c\x76\x72\x77\x39\x41\x37\x39\x78\x49\x74\x41\x42\x61\x71\x4b\x6e\x64\x6c\x6d" +
"\x71\x6c\x77\x4d\x5a\x57\x63\x39\x4e\x65\x0a\x2d\x2d\x2d\x2d\x2d\x45\x4e\x44\x20\x52\x53" +
"\x41\x20\x50\x52\x49\x56\x41\x54\x45\x20\x4b\x45\x59\x2d\x2d\x2d\x2d\x2d\x0a")
testKeyParsed, err := ssh.ParsePrivateKey(testKey)
if err != nil {
log.Fatal("Unable to parse testing ssh key: ", err)
}
config := &ssh.ClientConfig{
User: "root",
Auth: []ssh.AuthMethod{
ssh.PublicKeys(testKeyParsed),
},
HostKeyCallback: ssh.InsecureIgnoreHostKey(),
}
for _, labstation := range labstations {
fmt.Println("Operating on ", labstation)
out, err := sendSSHCommand(labstation, labstationTelemetryCmds, config)
if err != nil {
log.Fatalf("%s appears unhealthy. Running the telemetry commands encountered: %s", labstation, err)
}
fmt.Println("\n", out)
}
log.Print("Querying servos for their versions (note this depends on the servo consoles being in a functional state): ")
for _, dut := range duts {
if dut.Labstation == "" || dut.Port == "" {
continue
}
servoVersionsCommand := fmt.Sprintf("dut-control -p %s servo_micro_version; dut-control -p %s servo_v4_version;", dut.Port, dut.Port)
out, err := sendSSHCommand(dut.Labstation, servoVersionsCommand, config)
if err != nil {
fmt.Printf("%s has a servo for %s that appears unhealthy. Running the servo version commands encountered: %s\n\n",
dut.Labstation, dut.Hostname, err)
} else {
fmt.Printf("%s\n%s\n", dut.Hostname, out)
}
}
}