# -*- coding: utf-8 -*-
# Copyright 2018 The Chromium OS Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
"""Python module to draw heat map for Chrome

heat map is a histogram used to analyze the locality of function layout.

This module is used by heat_map.py. HeatmapGenerator is a class to
generate data for drawing heat maps (the actual drawing of heat maps is
performed by another script perf-to-inst-page.sh). It can also analyze
the symbol names in hot pages.
"""

from __future__ import print_function

import bisect
import collections
import os
import pipes
import subprocess

from cros_utils import command_executer


class MMap(object):
  """Class to store mmap information in perf report.

  Useful for cases where there are multiple mmaps for Chrome in the same
  process
  """

  def __init__(self, addr, size, offset):
    self.start_address = addr
    self.size = size
    self.offset = offset

  def __str__(self):
    return '(%x, %x, %x)' % (self.start_address, self.size, self.offset)

  def merge(self, mmap):
    if self.start_address == mmap.start_address:
      assert self.start_address == mmap.start_address and \
      self.size == mmap.size, 'The process mmap is copied from forking'
      return

    # This case occurs when two mmaps are found for the same process.
    # Observed before but never seen since. Turn if off now
    assert self.start_address == mmap.start_address, \
    'There should only be one mmap for each process'

    # Assume the smaller address is for rodata
    # Use the larger address for text segment
    assert self.start_address < mmap.start_address, \
    'Assume always mmap larger address later for the same process'

    # Assumes larger address is text segment
    self.start_address = mmap.start_address
    self.size = mmap.size


class HeatmapGenerator(object):
  """Class to generate heat map with a perf report, containing mmaps and

  samples. This class contains two interfaces with other modules:
  draw() and analyze().

  draw() draws a heatmap with the sample information given in the perf report
  analyze() prints out the symbol names in hottest pages with the given
  chrome binary
  """

  def __init__(self, perf_report, page_size, title, log_level='verbose'):
    self.perf_report = perf_report
    # Pick 1G as a relatively large number. All addresses less than it will
    # be recorded. The actual heatmap will show up to a boundary of the
    # largest address in text segment.
    self.max_addr = 1024 * 1024 * 1024
    self.ce = command_executer.GetCommandExecuter(log_level=log_level)
    self.dir = os.path.dirname(os.path.realpath(__file__))
    with open(perf_report) as f:
      self.perf_report_contents = f.readlines()
    # Write histogram results to a text file, in order to use gnu plot to draw
    self.hist_temp_output = open('out.txt', 'w')
    self.processes = {}
    self.deleted_processes = {}
    self.count = 0
    self.page_size = page_size
    self.title = title
    self.symbol_addresses = []
    self.symbol_names = []
    # Set huge page region of Chrome to be first 30MB, only used when printing
    # out hottest pages
    self.huge_page_size = 30 * 1024 * 1024

  def _parse_perf_sample(self, line):
    # In a perf report, generated with -D, a PERF_RECORD_SAMPLE command should
    # look like this: TODO: some arguments are unknown
    #
    # cpuid cycle unknown [unknown]: PERF_RECORD_SAMPLE(IP, 0x2): pid/tid:
    # 0xaddr period: period addr: addr
    # ... thread: threadname:tid
    # ...... dso: process
    #
    # This is an example:
    # 1 136712833349 0x6a558 [0x30]: PERF_RECORD_SAMPLE(IP, 0x2): 5227/5227:
    # 0x55555683b810 period: 372151 addr: 0
    # ... thread: chrome:5227
    # ...... dso: /opt/google/chrome/chrome
    #
    # For this function, the 7th argument (args[6]) after spltting with spaces
    # is pid/tid. We use the combination of the two as the pid.
    # Also, we add an assertion here to check the tid in the 7th argument(
    # args[6]) and the 15th argument(arg[14]) are the same
    #
    # The function returns the ((pid,tid), address) pair if the sampling
    # is on Chrome. Otherwise, return (None, None) pair.

    if 'thread: chrome' not in line or \
    'dso: /opt/google/chrome/chrome' not in line:
      return None, None
    args = line.split(' ')
    pid_raw = args[6].split('/')
    assert pid_raw[1][:-1] == args[14].split(':')[1][:-1], \
    'TID in %s of sample is not the same: %s/%s' % (
        line[:-1], pid_raw[1][:-1], args[14].split(':')[1][:-1])
    key = (int(pid_raw[0]), int(pid_raw[1][:-1]))
    address = int(args[7], base=16)
    return key, address

  def _parse_perf_record(self, line):
    # In a perf report, generated with -D, a PERF_RECORD_MMAP2 command should
    # look like this: TODO: some arguments are unknown
    #
    # cpuid cycle unknown [unknown]: PERF_RECORD_MMAP2 pid/tid:
    # [0xaddr(0xlength) @ pageoffset maj:min ino ino_generation]:
    # permission process
    #
    # This is an example.
    # 2 136690556823 0xa6898 [0x80]: PERF_RECORD_MMAP2 5227/5227:
    # [0x555556496000(0x8d1b000) @ 0xf42000 b3:03 92844 1892514370]:
    # r-xp /opt/google/chrome/chrome
    #
    # For this function, the 6th argument (args[5]) after spltting with spaces
    # is pid/tid. We use the combination of the two as the pid.
    # The 7th argument (args[6]) is the [0xaddr(0xlength). We can peel the
    # string to get the address and size of the mmap.
    # The 9th argument (args[8]) is the page offset.
    # The function returns the ((pid,tid), mmap) pair if the mmap is for Chrome
    # is on Chrome. Otherwise, return (None, None) pair.

    if 'chrome/chrome' not in line:
      return None, None
    args = line.split(' ')
    pid_raw = args[5].split('/')
    assert pid_raw[0] == pid_raw[1][:-1], \
    'PID in %s of mmap is not the same: %s/%s' % (
        line[:-1], pid_raw[0], pid_raw[1])
    pid = (int(pid_raw[0]), int(pid_raw[1][:-1]))
    address_raw = args[6].split('(')
    start_address = int(address_raw[0][1:], base=16)
    size = int(address_raw[1][:-1], base=16)
    offset = int(args[8][:-2], base=16)
    # Return an mmap object instead of only starting address,
    # in case there are many mmaps for the sample PID
    return pid, MMap(start_address, size, offset)

  def _parse_pair_event(self, arg):
    # This function is called by the _parse_* functions that has a pattern of
    # pids like: (pid:tid):(pid:tid), i.e.
    # PERF_RECORD_FORK and PERF_RECORD_COMM
    _, remain = arg.split('(', 1)
    pid1, remain = remain.split(':', 1)
    pid2, remain = remain.split(')', 1)
    _, remain = remain.split('(', 1)
    pid3, remain = remain.split(':', 1)
    pid4, remain = remain.split(')', 1)
    return (int(pid1), int(pid2)), (int(pid3), int(pid4))

  def _process_perf_record(self, line):
    # This function calls _parse_perf_record() to get information from
    # PERF_RECORD_MMAP2. It records the mmap object for each pid (a pair of
    # pid,tid), into a dictionary.
    pid, mmap = self._parse_perf_record(line)
    if pid is None:
      # PID = None meaning the mmap is not for chrome
      return
    if pid in self.processes:
      self.processes[pid].merge(mmap)
    else:
      self.processes[pid] = mmap

  def _process_perf_fork(self, line):
    # In a perf report, generated with -D, a PERF_RECORD_FORK command should
    # look like this:
    #
    # cpuid cycle unknown [unknown]:
    # PERF_RECORD_FORK(pid_to:tid_to):(pid_from:tid_from)
    #
    # This is an example.
    # 0 0 0x22a8 [0x38]: PERF_RECORD_FORK(1:1):(0:0)
    #
    # In this function, we need to peel the information of pid:tid pairs
    # So we get the last argument and send it to function _parse_pair_event()
    # for analysis.
    # We use (pid, tid) as the pid.
    args = line.split(' ')
    pid_to, pid_from = self._parse_pair_event(args[-1])
    if pid_from in self.processes:
      assert pid_to not in self.processes
      self.processes[pid_to] = MMap(self.processes[pid_from].start_address,
                                    self.processes[pid_from].size,
                                    self.processes[pid_from].offset)

  def _process_perf_exit(self, line):
    # In a perf report, generated with -D, a PERF_RECORD_EXIT command should
    # look like this:
    #
    # cpuid cycle unknown [unknown]:
    # PERF_RECORD_EXIT(pid1:tid1):(pid2:tid2)
    #
    # This is an example.
    # 1 136082505621 0x30810 [0x38]: PERF_RECORD_EXIT(3851:3851):(3851:3851)
    #
    # In this function, we need to peel the information of pid:tid pairs
    # So we get the last argument and send it to function _parse_pair_event()
    # for analysis.
    # We use (pid, tid) as the pid.
    args = line.split(' ')
    pid_to, pid_from = self._parse_pair_event(args[-1])
    assert pid_to == pid_from, '(%d, %d) (%d, %d)' % (pid_to[0], pid_to[1],
                                                      pid_from[0], pid_from[1])
    if pid_to in self.processes:
      # Don't delete the process yet
      self.deleted_processes[pid_from] = self.processes[pid_from]

  def _process_perf_sample(self, line):
    # This function calls _parse_perf_sample() to get information from
    # the perf report.
    # It needs to check the starting address of allocated mmap from
    # the dictionary (self.processes) to calculate the offset within
    # the text section of the sampling.
    # The offset is calculated into pages (4KB or 2MB) and writes into
    # out.txt together with the total counts, which will be used to
    # calculate histogram.
    pid, addr = self._parse_perf_sample(line)
    if pid is None:
      return

    assert pid in self.processes and pid not in self.deleted_processes, \
    'PID %d not found mmap and not forked from another process'

    start_address = self.processes[pid].start_address
    address = addr - start_address
    assert address >= 0 and \
    'addresses accessed in PERF_RECORD_SAMPLE should be larger than' \
    ' the starting address of Chrome'
    if address < self.max_addr:
      self.count += 1
      print(('%d/%d: %d %d' % (pid[0], pid[1], self.count,
                               address / self.page_size * self.page_size)),
            file=self.hist_temp_output)

  def _read_perf_report(self):
    # Serve as main function to read perf report, generated by -D
    lines = iter(self.perf_report_contents)
    for line in lines:
      if 'PERF_RECORD_MMAP' in line:
        self._process_perf_record(line)
      elif 'PERF_RECORD_FORK' in line:
        self._process_perf_fork(line)
      elif 'PERF_RECORD_EXIT' in line:
        self._process_perf_exit(line)
      elif 'PERF_RECORD_SAMPLE' in line:
        # Perf sample is multi-line
        self._process_perf_sample(line + next(lines) + next(lines))
    self.hist_temp_output.close()

  def _draw_heat_map(self):
    # Calls a script (perf-to-inst-page.sh) to calculate histogram
    # of results written in out.txt and also generate pngs for
    # heat maps.
    heatmap_script = os.path.join(self.dir, 'perf-to-inst-page.sh')
    cmd = '{0} {1}'.format(heatmap_script, pipes.quote(self.title))
    retval = self.ce.RunCommand(cmd)
    if retval:
      raise RuntimeError('Failed to run script to generate heatmap')

  def _restore_histogram(self, name):
    hist = {}
    with open(name) as f:
      for l in f.readlines():
        num, addr = l.strip().split(' ')
        hist[int(addr)] = int(num)
    return hist

  def _read_symbols_from_binary(self, binary):
    # FIXME: We are using nm to read symbol names from Chrome binary
    # for now. Can we get perf to hand us symbol names, instead of
    # using nm in the future?
    #
    # Get all the symbols (and their starting addresses) that fall into
    # the page. Will be used to print out information of hot pages
    # Each line shows the information of a symbol:
    # [symbol value (0xaddr)] [symbol type] [symbol name]
    # For some symbols, the [symbol name] field might be missing.
    # e.g.
    # 0000000001129da0 t Builtins_LdaNamedPropertyHandler

    # Generate a list of symbols from nm tool and check each line
    # to extract symbols names
    text_section_start = 0
    for l in subprocess.check_output(['nm', '-n', binary]).split('\n'):
      args = l.strip().split(' ')
      if len(args) < 3:
        # No name field
        continue
      addr_raw, symbol_type, name = args
      addr = int(addr_raw, base=16)
      if 't' not in symbol_type and 'T' not in symbol_type:
        # Filter out symbols not in text sections
        continue
      if len(self.symbol_addresses) == 0:
        # The first symbol in text sections
        text_section_start = addr
        self.symbol_addresses.append(0)
        self.symbol_names.append(name)
      else:
        assert text_section_start != 0, \
        'The starting address of text section has not been found'
        if addr == self.symbol_addresses[-1]:
          # if the same address has multiple symbols, put them together
          # and separate symbol names with '/'
          self.symbol_names[-1] += '/' + name
        else:
          # The output of nm -n command is already sorted by address
          # Insert to the end will result in a sorted array for bisect
          self.symbol_addresses.append(addr - text_section_start)
          self.symbol_names.append(name)

  def _get_list_of_pages_to_show(self, hist, top_n):
    sorted_hist = sorted(
        hist.iteritems(), key=lambda value: value[1], reverse=True)
    _, max_count = sorted_hist[0]

    # Depending on the configuration of top_n, select pages in the list
    # if % is in top_n, e.g. top_n = '20%', we will select the pages that has
    # sample count larger than 20% of the peak amount
    # otherwise, if top_n is a number, e.g. top_n = 5, we will select top 5
    # hottest pages within the 30MB region and top 5 hottest pages outside of
    # the 30MB region

    if '%' in top_n:
      count_threshold = max_count * int(top_n[:-1]) / 100
      list_to_show = [(k, v) for (k, v) in sorted_hist if v >= count_threshold]
    else:
      in_huge_page = [
          (k, v) for (k, v) in sorted_hist if k < self.huge_page_size
      ][:int(top_n)]
      outside_huge_page = [
          (k, v) for (k, v) in sorted_hist if k >= self.huge_page_size
      ][:int(top_n)]

      list_to_show = in_huge_page + outside_huge_page
    return list_to_show, max_count

  def _map_addr_to_symbol(self, addr):
    # Find out the symbol name
    assert len(self.symbol_addresses) > 0
    index = bisect.bisect(self.symbol_addresses, addr)
    assert index > 0 and index <= len(self.symbol_names), \
    'Failed to find an index (%d) in the list (len=%d)' % (
        index, len(self.symbol_names))
    return self.symbol_names[index - 1]

  def _get_symbols_in_hot_pages(self, fp, pages_to_show, max_count):
    # Print symbols in all the pages of interest
    for page_num, sample_num in pages_to_show:
      print(
          '----------------------------------------------------------', file=fp)
      print(('Page Offset: %d MB, Count: %d (%.1f%%)' % (
          page_num / 1024 / 1024, sample_num, 100.0 * sample_num / max_count)),
            file=fp)

      symbol_counts = collections.Counter()
      # Read Sample File and find out the occurance of symbols in the page
      lines = iter(self.perf_report_contents)
      for line in lines:
        if 'PERF_RECORD_SAMPLE' in line:
          pid, addr = self._parse_perf_sample(line + next(lines) + next(lines))
          lines.next()
          lines.next()
          if pid is None:
            # The sampling is not on Chrome
            continue
          if addr / self.page_size != (
              self.processes[pid].start_address + page_num) / self.page_size:
            # Sampling not in the current page
            continue

          name = self._map_addr_to_symbol(addr -
                                          self.processes[pid].start_address)
          assert name, 'Failed to find symbol name of addr %x' % addr
          symbol_counts[name] += 1

      assert sum(symbol_counts.itervalues()) == sample_num, \
      'Symbol name matching missing for some addresses: %d vs %d' % (
          sum(symbol_counts.itervalues()), sample_num)

      # Print out the symbol names sorted by the number of samples in
      # the page
      for name, count in sorted(
          symbol_counts.iteritems(), key=lambda kv: kv[1], reverse=True):
        if count == 0:
          break
        print('> %s : %d' % (name, count), file=fp)
      print('\n\n', file=fp)

  def draw(self):
    # First read perf report to process information and save histogram
    # into a text file
    self._read_perf_report()
    # Then use gnu plot to draw heat map
    self._draw_heat_map()

  def analyze(self, binary, top_n='10'):
    # Read histogram from histo.txt
    hist = self._restore_histogram('inst-histo.txt')
    # Generate Symbol Names and save it to nm.txt
    self._read_symbols_from_binary(binary)
    # Sort the pages according to the hotness
    pages_to_show, max_count = self._get_list_of_pages_to_show(hist, top_n)

    # Write hottest pages
    with open('addr2symbol.txt', 'w') as fp:
      self._get_symbols_in_hot_pages(fp, pages_to_show, max_count)
