Modify which outputs are parsed by ParseRowsAndColumns This CL modifies ParseRowsAndColumns so that it parses only output that have empty strings on row 0 column 0. If an output has both row and column headers but an non-empty string on row 0 column 0, it should be parsed by ParseColumns instead Change-Id: Ic9e1ce9e5fd4d9d860afd87b61fd5b481b1cc815 Reviewed-on: https://cos-review.googlesource.com/c/cos/tools/+/20931 Reviewed-by: Dexter Rivera <riverade@google.com> Tested-by: Dexter Rivera <riverade@google.com> Cloud-Build: Dexter Rivera <riverade@google.com>

commit: 4b30dcbe3b5b080da0dfa07648e6079006577a05 [log] [tgz]
author: Harriet Muutu <hmuutu@google.com> Tue Aug 10 17:27:12 2021 -0400
committer: Harriet Muutu <hmuutu@google.com> Wed Aug 11 21:57:33 2021 +0000
tree: b7e1f5061439be3e63a308a32e80b37fe51b14ad
parent: 9c9db4f8fdda89e17ef84d25d39de7965c505f8b [diff]
diff --git a/src/pkg/nodeprofiler/utils/parsing.go b/src/pkg/nodeprofiler/utils/parsing.go
index 4a01a28..ab5ab71 100644
--- a/src/pkg/nodeprofiler/utils/parsing.go
+++ b/src/pkg/nodeprofiler/utils/parsing.go

@@ -201,7 +201,20 @@
 }
 
 // ParseRowsAndColumns parses command outputs that have row and column headers.
-// It also takes in an optional titles slice which specifies the row column
+// Specifically, it parses command outputs that have an empty string on row 0
+// column 0. For example, the output of free as seen below:
+//            total        used        free
+// Mem:       14518          12       14479
+// Swap:          0           0           0
+//
+// If the command output has a non-empty string on row 0 column 0 as with
+// iostat below, the function will throw an error since this is an output that
+// should be parsed by columns instead
+// "Device             tps    kB_read/s    kB_wrtn/s    kB_read    kB_wrtn",
+// "vdb               0.74        10.39        23.23     859900    1922916",
+// "vda               0.01         0.46         0.00      37829          0"
+//
+// The function takes in an optional titles slice which specifies the row column
 // combination to parse. If the titles argument is missing, then an empty map
 // is returned.
 //
@@ -219,14 +232,6 @@
 //                                  "Swap:      0      0       0        "],
 //                                  ["Mem+used", "Swap+total"])
 //                                  err : "title string not well-formatted"
-//
-// Here are some edge cases parsed by the function:
-//
-// Rows with non-empty strings on row 0 column 0 E.g., with iostat (The default is
-// an empty string on row 0 column 0):
-// "Device             tps    kB_read/s    kB_wrtn/s    kB_read    kB_wrtn",
-// "vdb               0.74        10.39        23.23     859900    1922916",
-// "vda               0.01         0.46         0.00      37829          0"
 func ParseRowsAndColumns(lines []string, titles ...string) (map[string][]string, error) {
 	parsedOutput := make(ParsedOutput)
 	// columns maps column title to its index eg columns["total"] = 0 wth free.
@@ -249,7 +254,6 @@
 			return nil, fmt.Errorf(err, title)
 		}
 	}
-	var diff int
 	// rows stores each title in rows as key and the rest of the row as value.
 	rows := make(map[string][]string)
 	// loop over each row, mapping its title to the rest of the row (which is
@@ -260,27 +264,25 @@
 			continue
 		}
 		if i == 0 {
-			// Looking at the edge case example above (iostat's output), since
-			// rows are split by whitespaces, the index of "tps" will be 1
-			// after split. When the second row is split, and divided into row
-			// title and values, the following will result:
-			// "vdb" : {"0.74", "10.39", "23.23", "859900", "1922916"}
+			// This function parses command outputs that have an empty string on
+			// row 0 column 0. If the output has a non-empty string on row 0
+			// column 0 like iostat(see above), then the function throws an error
+			// here since this output should be parsed by columns. Hence the
+			// need for splitting by regular expression.
 			//
-			// Index of column titles will be used to access values from slice
-			// above. Index of "tps" = 1 and index 1 of slice above is 10.39
-			// (which is incorrect). The correct value is in index 0 (which we
-			// we would have gotten if col 0 row 0 was empty). To deal with this,
-			// if column 0 of row 0 is a non-empty string, then 1 is subtracted
-			// from the actual index of the rest of the colums in row 0. Thus the
-			// need for the diff variable.
+			// The expression splits the line into characters. If the first character
+			// is not an empty string, then the output had a non-empty string on row 0
+			// col 0 and an error is returned.
 			exp := regexp.MustCompile(`\s*`)
 			chars := exp.Split(line, -1)
 			if chars[0] != "" {
-				diff = -1
+				err := "command output has non-empty string on row 0 column 0: " +
+					"%q"
+				return nil, fmt.Errorf(err, line)
 			}
 			// map header name to its index
 			for index, str := range tokens {
-				columns[str] = index + diff
+				columns[str] = index
 			}
 			continue
 		}

diff --git a/src/pkg/nodeprofiler/utils/parsing_test.go b/src/pkg/nodeprofiler/utils/parsing_test.go
index b2a93b2..d6b238b 100644
--- a/src/pkg/nodeprofiler/utils/parsing_test.go
+++ b/src/pkg/nodeprofiler/utils/parsing_test.go

@@ -259,7 +259,7 @@
 			},
 		},
 		{
-			name: "iostat with spaced rows",
+			name: "iostat",
 			rows: []string{
 				"Device             tps    kB_read/s    kB_wrtn/s    kB_read    kB_wrtn",
 				"                                                                      ",
@@ -267,17 +267,13 @@
 				"                                                                       ",
 				"vda               0.00         0.20         0.00      37845          0",
 			},
-			titles: []string{"vdb:tps", "vda:kB_read", "vdb:kB_wrtn"},
-			want: map[string][]string{
-				"vdb:tps":     {"1.27"},
-				"vdb:kB_wrtn": {"8072028"},
-				"vda:kB_read": {"37845"},
-			},
+			titles:  []string{"vdb:tps", "vda:kB_read", "vdb:kB_wrtn"},
+			wantErr: true,
 		},
 		{
 			name: "wrongly formatted titles",
 			rows: []string{
-				"Device             tps    kB_read/s    kB_wrtn/s    kB_read    kB_wrtn",
+				"                  tps    kB_read/s    kB_wrtn/s    kB_read    kB_wrtn",
 				"vdb               1.27         3.79        41.80     732408    8072028",
 				"vda               0.00         0.20         0.00      37845          0",
 			},
commit	4b30dcbe3b5b080da0dfa07648e6079006577a05	[log] [tgz]
author	Harriet Muutu <hmuutu@google.com>	Tue Aug 10 17:27:12 2021 -0400
committer	Harriet Muutu <hmuutu@google.com>	Wed Aug 11 21:57:33 2021 +0000
tree	b7e1f5061439be3e63a308a32e80b37fe51b14ad
parent	9c9db4f8fdda89e17ef84d25d39de7965c505f8b [diff]