Modify ParseColumns to handle multi-worded titles

Modified ParseColumns to handle column titles that have
multiple words instead of one. Modified how ParseColumns
is called in functions that call ParseColumns: unit tests
as well as iostat's and vmstat's Run functions.

Change-Id: I71d5869a1f375fc913d8cce12929a91497470671
Reviewed-on: https://cos-review.googlesource.com/c/cos/tools/+/19870
Cloud-Build: GCB Service account <228075978874@cloudbuild.gserviceaccount.com>
Reviewed-by: Dexter Rivera <riverade@google.com>
Reviewed-by: Vaibhav Rustagi <vaibhavrustagi@google.com>
Tested-by: Dexter Rivera <riverade@google.com>
diff --git a/src/pkg/nodeprofiler/profiler/commands.go b/src/pkg/nodeprofiler/profiler/commands.go
index 4fbda71..54ef931 100644
--- a/src/pkg/nodeprofiler/profiler/commands.go
+++ b/src/pkg/nodeprofiler/profiler/commands.go
@@ -56,9 +56,11 @@
 	lines := strings.Split(strings.Trim(s, "\n"), "\n")
 	// ignore the first row in vmstat's output
 	lines = lines[1:]
-	titles := v.titles
+	// split first row into columns based on titles
+	allTitles := strings.Fields(lines[0])
+	wantTitles := v.titles
 	// parse output by columns
-	output, err := utils.ParseColumns(lines, titles...)
+	output, err := utils.ParseColumns(lines, allTitles, wantTitles...)
 	return output, err
 }
 
@@ -156,14 +158,16 @@
 	if err != nil {
 		return nil, fmt.Errorf("failed to run the command 'iostat': %v", err)
 	}
-
 	s := string(out)
 	lines := strings.Split(strings.Trim(s, "\n"), "\n")
-	titles := i.titles
 	// ignore the first 2 lines in iostat's output so that the first line
 	// is column titles.
 	lines = lines[2:]
+
+	// split first row into columns based on titles
+	allTitles := strings.Fields(lines[0])
+	wantTitles := i.titles
 	// parse output by rows and columns
-	output, err := utils.ParseColumns(lines, titles...)
+	output, err := utils.ParseColumns(lines, allTitles, wantTitles...)
 	return output, err
 }
diff --git a/src/pkg/nodeprofiler/utils/parsing.go b/src/pkg/nodeprofiler/utils/parsing.go
index ee32ea0..4a01a28 100644
--- a/src/pkg/nodeprofiler/utils/parsing.go
+++ b/src/pkg/nodeprofiler/utils/parsing.go
@@ -12,7 +12,19 @@
 type ParsedOutput map[string][]string
 
 // ParseColumns parses command outputs which are in a column table.
-// The function takes in an optional titles argument which specifies the
+// It takes in a slice of strings which has all the column titles of the
+// command in the correct order. This is necessary because some titles
+// contain multiple strings, thus splitting by whitespaces will split row
+// incorrectly. E.g, splitting the titles row in df's output based on
+// whitespaces will result in:
+//
+// "Filesystem  Use% Mounted on" -> ["Filesystem", "Use%", "Mounted", "on"] instead of:
+//                                  ["Filesystem", "Use%", "Mounted on"]
+// The allTitles slice is used to give each title its correct index. If the
+// titles in the slice are in wrong order, the function's output will be
+// incorrect.
+//
+// The function also takes in an optional want titles slice which specifies the
 // columns to parse. If this argument is missing, then all columns are parsed.
 //
 // Eg, ParseColumns(["r        b        swpd      buff",
@@ -24,11 +36,11 @@
 // The output needs to have titles on all its columns else the function will
 // return an error:
 //
-// Eg ParseColumns(["              total        used",
-//                  "Mem:          14520          12",
-//                  "Swap:             0           0"],
-//                  ["total", "used"])
-//                  err : "row has different number of columns from header row"
+// Eg [FAIL] ParseColumns(["              total        used",
+//                        "Mem:          14520          12",
+//                        "Swap:             0           0"],
+//                        ["total", "used"])
+//                        err : "row has different number of columns from header row"
 //
 // Some edge cases that will be parsed by this function include:
 //
@@ -44,9 +56,9 @@
 //  "kB_read/s" : {"57.39"},
 //  ...
 // }
-func ParseColumns(rows []string, titles ...string) (map[string][]string, error) {
-
+func ParseColumns(rows []string, allTitles []string, wantTitles ...string) (map[string][]string, error) {
 	parsedOutput := ParsedOutput{}
+
 	// maps each column title to its index eg columns["r"] = 0 wth vmstat.
 	columns := make(map[string]int)
 	// header saves the row that contains the titles
@@ -57,16 +69,15 @@
 		if row = strings.TrimSpace(row); row == "" {
 			continue
 		}
-		tokens := strings.Fields(row)
 
 		if i == 0 {
 			header = row
 			// if no titles were specified, use all of them
-			if len(titles) == 0 {
-				titles = tokens
+			if len(wantTitles) == 0 {
+				wantTitles = allTitles
 			}
-			// map column title to its index eg "r" : 0 with vmstat
-			for index, str := range tokens {
+			// map each column title to its index eg "r" : 0 with vmstat
+			for index, str := range allTitles {
 				columns[str] = index
 			}
 			continue
@@ -75,23 +86,24 @@
 		if row == header {
 			continue
 		}
+
+		tokens := strings.Fields(row)
 		// checks that number of columns in row is equal to number of column
 		// titles that were in header. Thus trying to parse the rows below
 		// will return an error here:
 		// "             total        used", (len 2)
 		// "Mem:         14520          12", (len 3)
 		// "Swap:            0           0",(len 3)
-		if len(columns) != len(tokens) {
+		if len(allTitles) != len(tokens) {
 			err := "row has different number of columns from header row: \n" +
 				"header row: \n %q \n " +
 				"current row: \n %q"
 			return nil, fmt.Errorf(err, header, row)
 		}
-
 		// loop over titles, get index of each title using map,
 		// use that to get the actual values, add title and its value(s) to
 		// parsedOutput.
-		for _, title := range titles {
+		for _, title := range wantTitles {
 			index, ok := columns[title]
 			if !ok {
 				return nil, fmt.Errorf("unknown column title %q", title)
@@ -110,7 +122,7 @@
 // ParseRows parses command outputs which are in a row table. It takes in a
 // string which specifies the delimiter that separates row title from values.
 // The function does not support '\n' as a delimiter for now. It takes in an
-// optional titles argument which specifies which rows to parse.
+// optional titles slice which specifies which rows to parse.
 //
 // Eg, ParseRows(["avg-cpu:  %user %nice %system  %iowait %steal  %idle"],
 //               [avg-cpu]) = map[string][]string {
@@ -142,12 +154,10 @@
 //    "cpu family" : {"6"}
 // }
 func ParseRows(lines []string, delim string, titles ...string) (map[string][]string, error) {
-
 	parsedOutput := ParsedOutput{}
 	// rows maps each row title to their value(s) which is the rest of the row
 	// after delimiter
 	rows := make(map[string][]string)
-
 	for _, line := range lines {
 		// skip empty lines
 		if line = strings.TrimSpace(line); line == "" {
@@ -171,7 +181,6 @@
 		tokens = strings.Fields(strings.Join(value, " "))
 		rows[header] = tokens
 	}
-
 	// if no titles were passed, use all the row titles.
 	if len(titles) == 0 {
 		for key := range rows {
@@ -192,7 +201,7 @@
 }
 
 // ParseRowsAndColumns parses command outputs that have row and column headers.
-// It takes in an optional titles argument which specifies the row column
+// It also takes in an optional titles slice which specifies the row column
 // combination to parse. If the titles argument is missing, then an empty map
 // is returned.
 //
@@ -219,7 +228,6 @@
 // "vdb               0.74        10.39        23.23     859900    1922916",
 // "vda               0.01         0.46         0.00      37829          0"
 func ParseRowsAndColumns(lines []string, titles ...string) (map[string][]string, error) {
-
 	parsedOutput := make(ParsedOutput)
 	// columns maps column title to its index eg columns["total"] = 0 wth free.
 	columns := make(map[string]int)
@@ -228,7 +236,6 @@
 	// Eg "Mem" : ["total", "used"] for "Mem:total", "Mem:used"
 	//    "Swap": ["total", "used"] for "Swap:total", "Swap:used"
 	titlesMap := make(map[string][]string)
-
 	// loop over titles and split them by row and column titles.
 	for _, title := range titles {
 		headers := strings.Split(strings.Trim(title, ":"), ":")
@@ -242,7 +249,6 @@
 			return nil, fmt.Errorf(err, title)
 		}
 	}
-
 	var diff int
 	// rows stores each title in rows as key and the rest of the row as value.
 	rows := make(map[string][]string)
@@ -253,7 +259,6 @@
 		if len(tokens) == 0 {
 			continue
 		}
-
 		if i == 0 {
 			// Looking at the edge case example above (iostat's output), since
 			// rows are split by whitespaces, the index of "tps" will be 1
@@ -283,7 +288,6 @@
 		//everything to the right of the row title is its value
 		rows[rHeader] = tokens[1:]
 	}
-
 	// loop over the titlesMap and use the row titles to access all
 	// the values for that row. From those values, access the columns
 	// we're interested in
@@ -304,7 +308,6 @@
 	for rowTitle, colTitles := range titlesMap {
 		values := rows[rowTitle]
 		for _, columnTitle := range colTitles {
-
 			index := columns[columnTitle]
 			value := values[index]
 			// combine the row and column title again when adding to the parsed
diff --git a/src/pkg/nodeprofiler/utils/parsing_test.go b/src/pkg/nodeprofiler/utils/parsing_test.go
index 71c8416..b2a93b2 100644
--- a/src/pkg/nodeprofiler/utils/parsing_test.go
+++ b/src/pkg/nodeprofiler/utils/parsing_test.go
@@ -8,13 +8,13 @@
 
 func TestParseColumns(t *testing.T) {
 	tests := []struct {
-		name    string
-		rows    []string
-		titles  []string
-		want    map[string][]string
-		wantErr bool
+		name       string
+		rows       []string
+		allTitles  []string
+		wantTitles []string
+		want       map[string][]string
+		wantErr    bool
 	}{
-
 		{
 			name: "vmstat's output with spaced rows",
 			rows: []string{
@@ -26,7 +26,9 @@
 				"                                                                                ",
 				"2  0      0 14827096      0  25608    0    0     0     0 5283 8037  7  3 90  0  0",
 			},
-			titles: []string{"us", "sy", "st"},
+			allTitles: []string{"r", "b", "swpd", "free", "buff", "cache", "si", "so", "bi", "bo",
+				"in", "cs", "us", "sy", "id", "wa", "st"},
+			wantTitles: []string{"us", "sy", "st"},
 			want: map[string][]string{
 				"us": {"1", "2", "7"},
 				"sy": {"0", "1", "3"},
@@ -34,6 +36,26 @@
 			},
 		},
 		{
+			name: "df's output with empty titles slice",
+			rows: []string{
+				"Filesystem      Size  Used Avail Use% Mounted on",
+				"/dev/vdb        7.5G  4.5G  2.4G  66% /",
+				"none            492K     0  492K   0% /dev",
+				"devtmpfs        7.1G     0  7.1G   0% /dev/tty",
+				"/dev/vdb        7.5G  4.5G  2.4G  66% /dev/kvm",
+			},
+			allTitles: []string{"Filesystem", "Size", "Used", "Avail",
+				"Use%", "Mounted on"},
+			want: map[string][]string{
+				"Filesystem": {"/dev/vdb", "none", "devtmpfs", "/dev/vdb"},
+				"Size":       {"7.5G", "492K", "7.1G", "7.5G"},
+				"Used":       {"4.5G", "0", "0", "4.5G"},
+				"Avail":      {"2.4G", "492K", "7.1G", "2.4G"},
+				"Use%":       {"66%", "0%", "0%", "66%"},
+				"Mounted on": {"/", "/dev", "/dev/tty", "/dev/kvm"},
+			},
+		},
+		{
 			name: "empty slice",
 			rows: []string{},
 			want: map[string][]string{},
@@ -44,6 +66,7 @@
 				"r  b   swpd   free   buff",
 				"5  0      0 14827096      0",
 			},
+			allTitles: []string{"r", "b", "swpd", "free", "buff"},
 			want: map[string][]string{
 				"r":    {"5"},
 				"b":    {"0"},
@@ -65,7 +88,9 @@
 				"																																		 ",
 				"Device            r/s     w/s     rkB/s     wkB/s   rrqm/s   wrqm/s  %rrqm  %wrqm r_await w_await aqu-sz rareq-sz wareq-sz  svctm  %util",
 			},
-			titles: []string{"%util"},
+			allTitles: []string{"Device", "r/s", "w/s", "rkB/s", "wkB/s", "rrqm/s", "wrqm/s", "%rrqm", "%wrqm", "%r_await", "w_await", "aqu-sz", "rareq-sz",
+				"wareq-sz", "svctm", "%util"},
+			wantTitles: []string{"%util"},
 			want: map[string][]string{
 				"%util": {"5.62", "0.00"},
 			},
@@ -77,8 +102,9 @@
 				"Mem:          14520          13       14481           0          25       14506",
 				"Swap:             0           0           0",
 			},
-			titles:  []string{"total"},
-			wantErr: true,
+			allTitles:  []string{"total", "used", "free", "shared", "buff/cache", "available"},
+			wantTitles: []string{"total"},
+			wantErr:    true,
 		},
 		{
 			name: "unknown titles",
@@ -86,22 +112,34 @@
 				"r  b   swpd    free   buff   cache",
 				"5  0      0 14827096     0   25608",
 			},
-			titles:  []string{"r", "b", "used"},
-			wantErr: true,
+			allTitles:  []string{"r", "b", "swpd", "free", "buff", "cache"},
+			wantTitles: []string{"r", "b", "used"},
+			wantErr:    true,
+		},
+		{
+			name: "incomplete slice of all titles",
+			rows: []string{
+				"Filesystem      Size  Used Avail Use% Mounted on",
+				"/dev/vdb        7.5G  4.5G  2.4G  67% /",
+				"none            492K     0  492K   0% /dev",
+				"devtmpfs        7.1G     0  7.1G   0% /dev/tty",
+				"/dev/vdb        7.5G  4.5G  2.4G  67% /dev/kvm",
+			},
+			allTitles: []string{"Filesystem", "Size", "Used", "Avail", "Use%"},
+			wantErr:   true,
 		},
 	}
-
 	for _, test := range tests {
-		got, err := ParseColumns(test.rows, test.titles...)
+		got, err := ParseColumns(test.rows, test.allTitles, test.wantTitles...)
 		if gotErr := err != nil; gotErr != test.wantErr {
-			t.Fatalf("ParseColumns(%v, %v) err %v, wantErr: %t", test.rows, test.titles, err, test.wantErr)
+			t.Fatalf("ParseColumns(%v, %v, %v) err %v, wantErr: %t", test.rows, test.allTitles, test.wantTitles, err, test.wantErr)
 		}
 		if diff := cmp.Diff(test.want, got); diff != "" {
-			t.Errorf("Ran ParseColumns(%v, %v), but got mismatch between got and want (+got, -want): \n diff %s", test.rows, test.titles, diff)
+			t.Errorf("Ran ParseColumns(%v, %v, %v), but got mismatch between got and want (+got, -want): \n diff %s", test.rows,
+				test.allTitles, test.wantTitles, diff)
 		}
 	}
 }
-
 func TestParseRows(t *testing.T) {
 	tests := []struct {
 		name    string
@@ -187,7 +225,6 @@
 			wantErr: true,
 		},
 	}
-
 	for _, test := range tests {
 		got, err := ParseRows(test.rows, test.delim, test.titles...)
 		if gotErr := err != nil; gotErr != test.wantErr {
@@ -198,7 +235,6 @@
 		}
 	}
 }
-
 func TestParseRowsAndColumns(t *testing.T) {
 	tests := []struct {
 		name    string
@@ -249,7 +285,6 @@
 			wantErr: true,
 		},
 	}
-
 	for _, test := range tests {
 		got, err := ParseRowsAndColumns(test.rows, test.titles...)
 		if gotErr := err != nil; gotErr != test.wantErr {
@@ -259,5 +294,4 @@
 			t.Errorf("Ran ParseRowsAndColumns(%v, %v), but got mismatch between got and want (+got, -want): \n diff %s", test.rows, test.titles, diff)
 		}
 	}
-
 }