Added support for histogram creation.

Added argument for histogram creation.
Added function to streamline Hirstogram generatoin.
Added function to generate all histograms.

BUG=chromium:1103853
TEST=None

Change-Id: Id489ecd9e9b3faaa7960cbae630b690a1400f6aa
Reviewed-on: https://chromium-review.googlesource.com/c/chromiumos/chromite/+/2374125
Commit-Queue: Kevin Ruvalcaba <kevinruvalcaba@google.com>
Tested-by: Kevin Ruvalcaba <kevinruvalcaba@google.com>
Reviewed-by: Alex Klein <saklein@chromium.org>
diff --git a/contrib/depgraph_visualization/README.md b/contrib/depgraph_visualization/README.md
index 1bf452c..1b8fc7b 100644
--- a/contrib/depgraph_visualization/README.md
+++ b/contrib/depgraph_visualization/README.md
@@ -1,4 +1,4 @@
-# Depgraph visualizer
+# Depgraph Visualizer
 
 ## Installation
 Enter your SDK, go to this modules location,
@@ -10,6 +10,7 @@
 cd ~/trunk/chromite/contrib/depgraph_visualization
 ./install.sh
 source my_visualizations/bin/activate
+cd my_visualizations
 ```
 
 This will put a script called `visualize_depgraph` on your PATH. From there you
@@ -41,6 +42,16 @@
 ```bash
 visualize_depgraph net-fs/samba -b=amd64-generic --output-path=bar/foo --output-name=SambaGraph
 ```
+## Secondary usage
+With the argument `--include-histograms` you can also generate four png files
+with histograms for dependency and reverse dependency distribution.
+These plots are partition in two because the number of packages Y with a range
+X of (reverse)dependencies goes from 600 to 1.
+```bash
+visualize_depgraph --include-Histograms=True --output-path=foo/bar -b=arm64-generic
+```
+These files are saved in the same directory as the main output file.
+
 ## Important notes
 
 If you were to use a package with no dependencies
diff --git a/contrib/depgraph_visualization/depgraph_visualization/depgraph_viz.py b/contrib/depgraph_visualization/depgraph_visualization/depgraph_viz.py
index 6f03c7d..581ca2f 100644
--- a/contrib/depgraph_visualization/depgraph_visualization/depgraph_viz.py
+++ b/contrib/depgraph_visualization/depgraph_visualization/depgraph_viz.py
@@ -27,6 +27,8 @@
                       help='Write output to the given path.')
   parser.add_argument('--output-name', default='DepGraph',
                       help='Write output file name.')
+  parser.add_argument('--include-histograms', default=False,
+                      help='Create and save histograms about dependencies.')
   parser.add_argument('pkgs', nargs='*', default=_DEFAULT_PACKAGES)
   opts = parser.parse_args(argv)
   opts.Freeze()
@@ -77,3 +79,5 @@
   runtime_tree = CreateRuntimeTree(sysroot, opts.pkgs)
   dep_vis = visualize.DepVisualizer(runtime_tree)
   dep_vis.VisualizeGraph(output_name=out_name, output_dir=out_dir)
+  if opts.include_histograms:
+    dep_vis.GenerateHistograms(opts.build_target, out_dir)
diff --git a/contrib/depgraph_visualization/depgraph_visualization/visualize.py b/contrib/depgraph_visualization/depgraph_visualization/visualize.py
index 16059de..01ff291 100644
--- a/contrib/depgraph_visualization/depgraph_visualization/visualize.py
+++ b/contrib/depgraph_visualization/depgraph_visualization/visualize.py
@@ -10,6 +10,7 @@
 """
 
 from typing import Dict, Iterator, List, Set, Tuple
+import matplotlib.pyplot as plt # pylint: disable=import-error
 
 
 class PackageNode(object):
@@ -145,6 +146,97 @@
     # Writes an HTML file with the graph on it.
     net.write_html(f'{output_dir}/{output_name}.html')
 
+  def GenerateHistograms(self, build_name: str, path: str):
+    """Creates 4 histograms with dependency and rvs dependency distribution.
+
+    The amount of packages with a certain range of dependencies and
+    reverse dependencies ranges from 600 to 1 so we split the histograms
+    of both into two; giving us four in total.
+
+    Args:
+      build_name: Name of the target build
+      path: Path to output files.
+    """
+    # Prepare data to plot.
+    dep_count = [len(n.dependencies) for n in self.pkg_dict.values()]
+    # There isn't a good explanation for the values of the bins other than
+    # they yield a good result.
+    dep_bins_low = [0, 1, 3, 5, 9, 13, 17, 20]
+    # The bigger bins start from 21 and go on multiples of 50
+    # until the highest value of the data points.
+    highest_dep = max(dep_count)
+    top = (highest_dep // 50)+1
+    dep_bins_high = [21] + [i*50 for i in range(1, top)] + [highest_dep]
+
+    # The histogram gets created and saved.
+    _SaveHistogram(dep_count,
+                   dep_bins_low,
+                   f'({build_name}): Dependency_distribution_low',
+                   path,
+                   '#205973')
+
+    _SaveHistogram(dep_count,
+                   dep_bins_high,
+                   f'({build_name}): Dependency_distribution_high',
+                   path,
+                   '#205973')
+
+    # Do the same for the reverse dependencies.
+    rvs_count = [len(n.rvs_dependencies) for n in self.pkg_dict.values()]
+    rvs_bins_low = [1, 2, 10, 20]
+    highest_rvs = max(rvs_count)
+    top = (highest_rvs // 50)+1
+    rvs_bins_high = [21] + [i*50 for i in range(1, top)] + [highest_rvs]
+
+    _SaveHistogram(rvs_count,
+                   rvs_bins_low,
+                   f'({build_name}): Reverse_Dependency_distribution_low',
+                   path,
+                   '#ef7e56')
+
+    _SaveHistogram(rvs_count,
+                   rvs_bins_high,
+                   f'({build_name}): Reverse_Dependency_distribution_high',
+                   path,
+                   '#ef7e56')
+
+
+def _SaveHistogram(data: List[int],
+                   bins: List[int],
+                   name: str,
+                   path: str,
+                   color: str):
+  """Streamline the process of plotting histograms.
+
+  Plots and saves a histogram as a png file.
+
+  Args:
+    data: List with data points.
+    bins: List with ranges for the histogram.
+    name: Name of the output file.
+    path: Path of the output file.
+    color: Color in either rgb or hexadecimal format.
+  """
+
+  plt.hist(data,
+           bins=bins,
+           edgecolor='black',
+           color=[color])
+
+  # Location for the labels in the x-ticks.
+  xplace = [(a+b) // 2 for a, b in zip(bins[:-1], bins[1:])]
+  # Create labels accurately portraying bin ranges.
+  xlabels = [f'{a}-{b-1}' for a, b in zip(bins[:-2], bins[1:-1])]
+  # Last range is inclusive.
+  xlabels += [f'{bins[-2]}-{bins[-1]}']
+  plt.xticks(xplace, xlabels)
+
+  plt.ylabel('Number of Packages')
+  plt.title(name)
+  plt.savefig(f'{path}/{name}.png')
+  # The plot needs to be cleaned otherwise all the graphs clump together.
+  plt.clf()
+
 
 def _BfsColoring(net,
                  queue: List[Iterator[PackageNode]],
diff --git a/contrib/depgraph_visualization/requirements.txt b/contrib/depgraph_visualization/requirements.txt
index be25b9e..ebbf13c 100644
--- a/contrib/depgraph_visualization/requirements.txt
+++ b/contrib/depgraph_visualization/requirements.txt
@@ -1,3 +1,4 @@
 --find-links file:///mnt/host/source/infra_virtualenv/pip_packages
 portage>=2.3
+matplotlib==3.3.0
 pyvis==0.1.*