src/pkg/nodeprofiler/utils/parsing.go - cos/tools - Git at Google

 package utils

 import (
 	"fmt"
 	"regexp"
 	"strings"
 )

 // ParsedOutput is a data structure that holds the parsed output
 // of certain shell commands whose output takes the form of a
 // table.
 type ParsedOutput map[string][]string

 // ParseColumns parses command outputs which are in a column table.
 // The function takes in an optional titles argument which specifies the
 // columns to parse. If this argument is missing, then all columns are parsed.
 //
 // Eg, ParseColumns(["r        b        swpd      buff",
 //                  "10        0    14831128        0"],
 //                  ["r", "b"]) = map[string][]string {
 //                  "r": ["10"]
 //                  "b": ["0"]}
 //
 // The output needs to have titles on all its columns else the function will
 // return an error:
 //
 // Eg ParseColumns(["              total        used",
 //                  "Mem:          14520          12",
 //                  "Swap:             0           0"],
 //                  ["total", "used"])
 //                  err : "row has different number of columns from header row"
 //
 // Some edge cases that will be parsed by this function include:
 //
 // rows with repeated headers, eg with the output of iostat:
 // [] string {"Device      tps    kB_read/s    kB_wrtn/s    kB_read    kB_wrtn",
 //            "vdb        2.39        57.39        69.83     855576    1041132",
 //            "                                                               ",
 //            "Device      tps    kB_read/s    kB_wrtn/s    kB_read    kB_wrtn",
 //            "                                                                ",
 //            "Device      tps    kB_read/s    kB_wrtn/s    kB_read    kB_wrtn"} =>
 // map[string][]string {
 // 	"tps" : {"2.39"},
 //  "kB_read/s" : {"57.39"},
 //  ...
 // }
 func ParseColumns(rows []string, titles ...string) (map[string][]string, error) {

 	parsedOutput := ParsedOutput{}
 	// maps each column title to its index eg columns["r"] = 0 wth vmstat.
 	columns := make(map[string]int)
 	// header saves the row that contains the titles
 	var header string

 	for i, row := range rows {
 		// skip empty lines
 		if row = strings.TrimSpace(row); row == "" {
 			continue
 		}
 		tokens := strings.Fields(row)

 		if i == 0 {
 			header = row
 			// if no titles were specified, use all of them
 			if len(titles) == 0 {
 				titles = tokens
 			}
 			// map column title to its index eg "r" : 0 with vmstat
 			for index, str := range tokens {
 				columns[str] = index
 			}
 			continue
 		}
 		// if a row is similar to the row with headers, ignore it
 		if row == header {
 			continue
 		}
 		// checks that number of columns in row is equal to number of column
 		// titles that were in header. Thus trying to parse the rows below
 		// will return an error here:
 		// "             total        used", (len 2)
 		// "Mem:         14520          12", (len 3)
 		// "Swap:            0           0",(len 3)
 		if len(columns) != len(tokens) {
 			err := "row has different number of columns from header row: \n" +
 				"header row: \n %q \n " +
 				"current row: \n %q"
 			return nil, fmt.Errorf(err, header, row)
 		}

 		// loop over titles, get index of each title using map,
 		// use that to get the actual values, add title and its value(s) to
 		// parsedOutput.
 		for _, title := range titles {
 			index, ok := columns[title]
 			if !ok {
 				return nil, fmt.Errorf("unknown column title %q", title)
 			}
 			// Get the actual value from the row. Eg, if 'r' is the title and
 			// the lines to parse were below, then index = 0 and tokens[index] = 5
 			// "r   b   swpd ..."
 			// "5   0      0 ..."
 			value := tokens[index]
 			parsedOutput[title] = append(parsedOutput[title], value)
 		}
 	}
 	return parsedOutput, nil
 }

 // ParseRows parses command outputs which are in a row table. It takes in a
 // string which specifies the delimiter that separates row title from values.
 // The function does not support '\n' as a delimiter for now. It takes in an
 // optional titles argument which specifies which rows to parse.
 //
 // Eg, ParseRows(["avg-cpu:  %user %nice %system  %iowait %steal  %idle"],
 //               [avg-cpu]) = map[string][]string {
 //               avg-cpu: [%user, %nice, %system, %iowait, %steal, %idle]}
 //
 // If the wrong delimiter is passed in, the function returns an error:
 //
 // Eg [FAIL] ParseRows(["Device  tps   kB_read/s   kB_wrtn/s",
 //                     "vdb 	1.13	  19.48 	  33.61"
 //                     "vda    0.02       0.86       0.00"], ":", ["vda"])
 //                     err: "failed to split row into row title and value"
 //
 // Some edge cases parsed by this function include:
 //
 // Rows whose delimiter have whitespaces around it. For example,
 // [] string { "processor:     7",
 //             "CPU family:    6"} =>
 // map[string][]string {
 //	  "processor"  : {"7"}
 //    "cpu family" : {"6"}
 // }
 //
 // OR
 //
 // [] string { "processor      : 7",
 //             "cpu family     : 6"} =>
 // map[string][]string {
 // 	  "processor" : {"7"}
 //    "cpu family" : {"6"}
 // }
 func ParseRows(lines []string, delim string, titles ...string) (map[string][]string, error) {

 	parsedOutput := ParsedOutput{}
 	// rows maps each row title to their value(s) which is the rest of the row
 	// after delimiter
 	rows := make(map[string][]string)

 	for _, line := range lines {
 		// skip empty lines
 		if line = strings.TrimSpace(line); line == "" {
 			continue
 		}
 		tokens := strings.Split(line, delim)
 		// if row did not split, return an error.
 		if len(tokens) == 1 {
 			err := "failed to split %q by the delimiter %q"
 			return nil, fmt.Errorf(err, line, delim)
 		}
 		// removes white space from title
 		header := strings.TrimSpace(tokens[0])
 		value := tokens[1:]
 		// remove any extra white spaces in the values. Since values is a
 		// slice, first join all the strings into 1 and split it. For example,
 		//
 		// "Architecture:     x86_64" will be split into ["Architecture", "    x86_64"]
 		// To remove the whitespaces in [ "    x86_64"], join slice into
 		// "x86_64" then split to make it a slice again ["x86_64"].
 		tokens = strings.Fields(strings.Join(value, " "))
 		rows[header] = tokens
 	}

 	// if no titles were passed, use all the row titles.
 	if len(titles) == 0 {
 		for key := range rows {
 			titles = append(titles, key)
 		}
 	}
 	// loop over titles passed to function (or initiliazed above), get their
 	// values from the map, add to parsedOutput
 	for _, title := range titles {
 		var values []string
 		var ok bool
 		if values, ok = rows[title]; !ok {
 			return nil, fmt.Errorf("unknown row title %q", title)
 		}
 		parsedOutput[title] = values
 	}
 	return parsedOutput, nil
 }

 // ParseRowsAndColumns parses command outputs that have row and column headers.
 // It takes in an optional titles argument which specifies the row column
 // combination to parse. If the titles argument is missing, then an empty map
 // is returned.
 //
 // Eg, ParseRowsAndColumns(["		total   used   free   shared",
 //                         "Mem:   14520     12   14482        0",
 //                         "Swap:      0      0       0        "],
 //                         ["Mem:used", "Swap:total"]) = map[string][]string {
 //                         "Mem:used": ["12"]
 //                         "Swap:total" : ["0"]}
 //
 // The titles should be in the format "row:column". Else, an error is returned:
 //
 // Eg [FAIL], ParseRowsAndColumns(["       total   used   free   shared",
 //                                  "Mem:   14520     12   14482       0",
 //                                  "Swap:      0      0       0        "],
 //                                  ["Mem+used", "Swap+total"])
 //                                  err : "title string not well-formatted"
 //
 // Here are some edge cases parsed by the function:
 //
 // Rows with non-empty strings on row 0 column 0 E.g., with iostat (The default is
 // an empty string on row 0 column 0):
 // "Device             tps    kB_read/s    kB_wrtn/s    kB_read    kB_wrtn",
 // "vdb               0.74        10.39        23.23     859900    1922916",
 // "vda               0.01         0.46         0.00      37829          0"
 func ParseRowsAndColumns(lines []string, titles ...string) (map[string][]string, error) {

 	parsedOutput := make(ParsedOutput)
 	// columns maps column title to its index eg columns["total"] = 0 wth free.
 	columns := make(map[string]int)
 	// titlesMap maps a row title to columns titles based on the titles passed
 	// into function:
 	// Eg "Mem" : ["total", "used"] for "Mem:total", "Mem:used"
 	//    "Swap": ["total", "used"] for "Swap:total", "Swap:used"
 	titlesMap := make(map[string][]string)

 	// loop over titles and split them by row and column titles.
 	for _, title := range titles {
 		headers := strings.Split(strings.Trim(title, ":"), ":")
 		if length := len(headers); length == 2 {
 			titlesMap[headers[0]] = append(titlesMap[headers[0]], headers[1])
 		} else {
 			err := "title string not well-formed: each title should " +
 				"be in the form <row>:<column>, where row is the name " +
 				"of the row header and column is the name of the " +
 				"column header but got %q"
 			return nil, fmt.Errorf(err, title)
 		}
 	}

 	var diff int
 	// rows stores each title in rows as key and the rest of the row as value.
 	rows := make(map[string][]string)
 	// loop over each row, mapping its title to the rest of the row (which is
 	// its value).
 	for i, line := range lines {
 		tokens := strings.Fields(line)
 		if len(tokens) == 0 {
 			continue
 		}

 		if i == 0 {
 			// Looking at the edge case example above (iostat's output), since
 			// rows are split by whitespaces, the index of "tps" will be 1
 			// after split. When the second row is split, and divided into row
 			// title and values, the following will result:
 			// "vdb" : {"0.74", "10.39", "23.23", "859900", "1922916"}
 			//
 			// Index of column titles will be used to access values from slice
 			// above. Index of "tps" = 1 and index 1 of slice above is 10.39
 			// (which is incorrect). The correct value is in index 0 (which we
 			// we would have gotten if col 0 row 0 was empty). To deal with this,
 			// if column 0 of row 0 is a non-empty string, then 1 is subtracted
 			// from the actual index of the rest of the colums in row 0. Thus the
 			// need for the diff variable.
 			exp := regexp.MustCompile(`\s*`)
 			chars := exp.Split(line, -1)
 			if chars[0] != "" {
 				diff = -1
 			}
 			// map header name to its index
 			for index, str := range tokens {
 				columns[str] = index + diff
 			}
 			continue
 		}
 		rHeader := strings.Trim(tokens[0], ":")
 		//everything to the right of the row title is its value
 		rows[rHeader] = tokens[1:]
 	}

 	// loop over the titlesMap and use the row titles to access all
 	// the values for that row. From those values, access the columns
 	// we're interested in
 	// Eg with free's output below:
 	// "              total        used        free", (len 3)
 	// "Mem:          14520          13       14482", (len 4)
 	// "Swap:             0           0           0"  (len 4)
 	//
 	// Assuming the titlesMap is: "Mem"  : {"total", "used"}
 	//						      "Swap" : {"total", "used"}
 	//
 	// When we loop over the map above, we first access the values for the
 	// the row titles:  "Mem": {"14520", "13", "14482"}
 	//                  "Swap": {"0", "0", "0"}
 	// Then to access the values we're interested eg "Mem:total", use the index of
 	// the column title "total" to index into the slice of values, i.e,
 	// columns["total"] = 0 which corresponds to "14520" in {"14520", "13", "14482"}
 	for rowTitle, colTitles := range titlesMap {
 		values := rows[rowTitle]
 		for _, columnTitle := range colTitles {

 			index := columns[columnTitle]
 			value := values[index]
 			// combine the row and column title again when adding to the parsed
 			// output map.
 			combined := rowTitle + ":" + columnTitle
 			parsedOutput[combined] = append(parsedOutput[combined], value)
 		}
 	}
 	return parsedOutput, nil
 }
	package utils

	import (
	"fmt"
	"regexp"
	"strings"
	)

	// ParsedOutput is a data structure that holds the parsed output
	// of certain shell commands whose output takes the form of a
	// table.
	type ParsedOutput map[string][]string

	// ParseColumns parses command outputs which are in a column table.
	// The function takes in an optional titles argument which specifies the
	// columns to parse. If this argument is missing, then all columns are parsed.
	//
	// Eg, ParseColumns(["r b swpd buff",
	// "10 0 14831128 0"],
	// ["r", "b"]) = map[string][]string {
	// "r": ["10"]
	// "b": ["0"]}
	//
	// The output needs to have titles on all its columns else the function will
	// return an error:
	//
	// Eg ParseColumns([" total used",
	// "Mem: 14520 12",
	// "Swap: 0 0"],
	// ["total", "used"])
	// err : "row has different number of columns from header row"
	//
	// Some edge cases that will be parsed by this function include:
	//
	// rows with repeated headers, eg with the output of iostat:
	// [] string {"Device tps kB_read/s kB_wrtn/s kB_read kB_wrtn",
	// "vdb 2.39 57.39 69.83 855576 1041132",
	// " ",
	// "Device tps kB_read/s kB_wrtn/s kB_read kB_wrtn",
	// " ",
	// "Device tps kB_read/s kB_wrtn/s kB_read kB_wrtn"} =>
	// map[string][]string {
	// "tps" : {"2.39"},
	// "kB_read/s" : {"57.39"},
	// ...
	// }
	func ParseColumns(rows []string, titles ...string) (map[string][]string, error) {

	parsedOutput := ParsedOutput{}
	// maps each column title to its index eg columns["r"] = 0 wth vmstat.
	columns := make(map[string]int)
	// header saves the row that contains the titles
	var header string

	for i, row := range rows {
	// skip empty lines
	if row = strings.TrimSpace(row); row == "" {
	continue
	}
	tokens := strings.Fields(row)

	if i == 0 {
	header = row
	// if no titles were specified, use all of them
	if len(titles) == 0 {
	titles = tokens
	}
	// map column title to its index eg "r" : 0 with vmstat
	for index, str := range tokens {
	columns[str] = index
	}
	continue
	}
	// if a row is similar to the row with headers, ignore it
	if row == header {
	continue
	}
	// checks that number of columns in row is equal to number of column
	// titles that were in header. Thus trying to parse the rows below
	// will return an error here:
	// " total used", (len 2)
	// "Mem: 14520 12", (len 3)
	// "Swap: 0 0",(len 3)
	if len(columns) != len(tokens) {
	err := "row has different number of columns from header row: \n" +
	"header row: \n %q \n " +
	"current row: \n %q"
	return nil, fmt.Errorf(err, header, row)
	}

	// loop over titles, get index of each title using map,
	// use that to get the actual values, add title and its value(s) to
	// parsedOutput.
	for _, title := range titles {
	index, ok := columns[title]
	if !ok {
	return nil, fmt.Errorf("unknown column title %q", title)
	}
	// Get the actual value from the row. Eg, if 'r' is the title and
	// the lines to parse were below, then index = 0 and tokens[index] = 5
	// "r b swpd ..."
	// "5 0 0 ..."
	value := tokens[index]
	parsedOutput[title] = append(parsedOutput[title], value)
	}
	}
	return parsedOutput, nil
	}

	// ParseRows parses command outputs which are in a row table. It takes in a
	// string which specifies the delimiter that separates row title from values.
	// The function does not support '\n' as a delimiter for now. It takes in an
	// optional titles argument which specifies which rows to parse.
	//
	// Eg, ParseRows(["avg-cpu: %user %nice %system %iowait %steal %idle"],
	// [avg-cpu]) = map[string][]string {
	// avg-cpu: [%user, %nice, %system, %iowait, %steal, %idle]}
	//
	// If the wrong delimiter is passed in, the function returns an error:
	//
	// Eg [FAIL] ParseRows(["Device tps kB_read/s kB_wrtn/s",
	// "vdb 1.13 19.48 33.61"
	// "vda 0.02 0.86 0.00"], ":", ["vda"])
	// err: "failed to split row into row title and value"
	//
	// Some edge cases parsed by this function include:
	//
	// Rows whose delimiter have whitespaces around it. For example,
	// [] string { "processor: 7",
	// "CPU family: 6"} =>
	// map[string][]string {
	// "processor" : {"7"}
	// "cpu family" : {"6"}
	// }
	//
	// OR
	//
	// [] string { "processor : 7",
	// "cpu family : 6"} =>
	// map[string][]string {
	// "processor" : {"7"}
	// "cpu family" : {"6"}
	// }
	func ParseRows(lines []string, delim string, titles ...string) (map[string][]string, error) {

	parsedOutput := ParsedOutput{}
	// rows maps each row title to their value(s) which is the rest of the row
	// after delimiter
	rows := make(map[string][]string)

	for _, line := range lines {
	// skip empty lines
	if line = strings.TrimSpace(line); line == "" {
	continue
	}
	tokens := strings.Split(line, delim)
	// if row did not split, return an error.
	if len(tokens) == 1 {
	err := "failed to split %q by the delimiter %q"
	return nil, fmt.Errorf(err, line, delim)
	}
	// removes white space from title
	header := strings.TrimSpace(tokens[0])
	value := tokens[1:]
	// remove any extra white spaces in the values. Since values is a
	// slice, first join all the strings into 1 and split it. For example,
	//
	// "Architecture: x86_64" will be split into ["Architecture", " x86_64"]
	// To remove the whitespaces in [ " x86_64"], join slice into
	// "x86_64" then split to make it a slice again ["x86_64"].
	tokens = strings.Fields(strings.Join(value, " "))
	rows[header] = tokens
	}

	// if no titles were passed, use all the row titles.
	if len(titles) == 0 {
	for key := range rows {
	titles = append(titles, key)
	}
	}
	// loop over titles passed to function (or initiliazed above), get their
	// values from the map, add to parsedOutput
	for _, title := range titles {
	var values []string
	var ok bool
	if values, ok = rows[title]; !ok {
	return nil, fmt.Errorf("unknown row title %q", title)
	}
	parsedOutput[title] = values
	}
	return parsedOutput, nil
	}

	// ParseRowsAndColumns parses command outputs that have row and column headers.
	// It takes in an optional titles argument which specifies the row column
	// combination to parse. If the titles argument is missing, then an empty map
	// is returned.
	//
	// Eg, ParseRowsAndColumns([" total used free shared",
	// "Mem: 14520 12 14482 0",
	// "Swap: 0 0 0 "],
	// ["Mem:used", "Swap:total"]) = map[string][]string {
	// "Mem:used": ["12"]
	// "Swap:total" : ["0"]}
	//
	// The titles should be in the format "row:column". Else, an error is returned:
	//
	// Eg [FAIL], ParseRowsAndColumns([" total used free shared",
	// "Mem: 14520 12 14482 0",
	// "Swap: 0 0 0 "],
	// ["Mem+used", "Swap+total"])
	// err : "title string not well-formatted"
	//
	// Here are some edge cases parsed by the function:
	//
	// Rows with non-empty strings on row 0 column 0 E.g., with iostat (The default is
	// an empty string on row 0 column 0):
	// "Device tps kB_read/s kB_wrtn/s kB_read kB_wrtn",
	// "vdb 0.74 10.39 23.23 859900 1922916",
	// "vda 0.01 0.46 0.00 37829 0"
	func ParseRowsAndColumns(lines []string, titles ...string) (map[string][]string, error) {

	parsedOutput := make(ParsedOutput)
	// columns maps column title to its index eg columns["total"] = 0 wth free.
	columns := make(map[string]int)
	// titlesMap maps a row title to columns titles based on the titles passed
	// into function:
	// Eg "Mem" : ["total", "used"] for "Mem:total", "Mem:used"
	// "Swap": ["total", "used"] for "Swap:total", "Swap:used"
	titlesMap := make(map[string][]string)

	// loop over titles and split them by row and column titles.
	for _, title := range titles {
	headers := strings.Split(strings.Trim(title, ":"), ":")
	if length := len(headers); length == 2 {
	titlesMap[headers[0]] = append(titlesMap[headers[0]], headers[1])
	} else {
	err := "title string not well-formed: each title should " +
	"be in the form <row>:<column>, where row is the name " +
	"of the row header and column is the name of the " +
	"column header but got %q"
	return nil, fmt.Errorf(err, title)
	}
	}

	var diff int
	// rows stores each title in rows as key and the rest of the row as value.
	rows := make(map[string][]string)
	// loop over each row, mapping its title to the rest of the row (which is
	// its value).
	for i, line := range lines {
	tokens := strings.Fields(line)
	if len(tokens) == 0 {
	continue
	}

	if i == 0 {
	// Looking at the edge case example above (iostat's output), since
	// rows are split by whitespaces, the index of "tps" will be 1
	// after split. When the second row is split, and divided into row
	// title and values, the following will result:
	// "vdb" : {"0.74", "10.39", "23.23", "859900", "1922916"}
	//
	// Index of column titles will be used to access values from slice
	// above. Index of "tps" = 1 and index 1 of slice above is 10.39
	// (which is incorrect). The correct value is in index 0 (which we
	// we would have gotten if col 0 row 0 was empty). To deal with this,
	// if column 0 of row 0 is a non-empty string, then 1 is subtracted
	// from the actual index of the rest of the colums in row 0. Thus the
	// need for the diff variable.
	exp := regexp.MustCompile(`\s*`)
	chars := exp.Split(line, -1)
	if chars[0] != "" {
	diff = -1
	}
	// map header name to its index
	for index, str := range tokens {
	columns[str] = index + diff
	}
	continue
	}
	rHeader := strings.Trim(tokens[0], ":")
	//everything to the right of the row title is its value
	rows[rHeader] = tokens[1:]
	}

	// loop over the titlesMap and use the row titles to access all
	// the values for that row. From those values, access the columns
	// we're interested in
	// Eg with free's output below:
	// " total used free", (len 3)
	// "Mem: 14520 13 14482", (len 4)
	// "Swap: 0 0 0" (len 4)
	//
	// Assuming the titlesMap is: "Mem" : {"total", "used"}
	// "Swap" : {"total", "used"}
	//
	// When we loop over the map above, we first access the values for the
	// the row titles: "Mem": {"14520", "13", "14482"}
	// "Swap": {"0", "0", "0"}
	// Then to access the values we're interested eg "Mem:total", use the index of
	// the column title "total" to index into the slice of values, i.e,
	// columns["total"] = 0 which corresponds to "14520" in {"14520", "13", "14482"}
	for rowTitle, colTitles := range titlesMap {
	values := rows[rowTitle]
	for _, columnTitle := range colTitles {

	index := columns[columnTitle]
	value := values[index]
	// combine the row and column title again when adding to the parsed
	// output map.
	combined := rowTitle + ":" + columnTitle
	parsedOutput[combined] = append(parsedOutput[combined], value)
	}
	}
	return parsedOutput, nil
	}