| From b3f84fb3650b32adc89b89384887b977a999a9f6 Mon Sep 17 00:00:00 2001 |
| From: James Clark <james.clark@arm.com> |
| Date: Mon, 10 May 2021 17:32:48 +0300 |
| Subject: [PATCH 1/2] perf cs-etm: Set time on synthesised samples to preserve |
| ordering |
| |
| The following attribute is set when synthesising samples in |
| timed decoding mode: |
| |
| attr.sample_type |= PERF_SAMPLE_TIME; |
| |
| This results in new samples that appear to have timestamps but |
| because we don't assign any timestamps to the samples, when the |
| resulting inject file is opened again, the synthesised samples |
| will be on the wrong side of the MMAP or COMM events. |
| |
| For example, this results in the samples being associated with |
| the perf binary, rather than the target of the record: |
| |
| perf record -e cs_etm/@tmc_etr0/u top |
| perf inject -i perf.data -o perf.inject --itrace=i100il |
| perf report -i perf.inject |
| |
| Where 'Command' == perf should show as 'top': |
| |
| # Overhead Command Source Shared Object Source Symbol Target Symbol Basic Block Cycles |
| # ........ ....... .................... ...................... ...................... .................. |
| # |
| 31.08% perf [unknown] [.] 0x000000000040c3f8 [.] 0x000000000040c3e8 - |
| |
| If the perf.data file is opened directly with perf, without the |
| inject step, then this already works correctly because the |
| events are synthesised after the COMM and MMAP events and |
| no second sorting happens. Re-sorting only happens when opening |
| the perf.inject file for the second time so timestamps are |
| needed. |
| |
| Using the timestamp from the AUX record mirrors the current |
| behaviour when opening directly with perf, because the events |
| are generated on the call to cs_etm__process_queues(). |
| |
| The ETM trace could optionally contain time stamps, but there is |
| no way to correlate this with the kernel time. So, the best available |
| time value is that of the AUX_RECORD header. This patch uses |
| the timestamp from the header for all the samples. The ordering of the |
| samples are implicit in the trace and thus is fine with respect to |
| relative ordering. |
| |
| Reviewed-by: Leo Yan <leo.yan@linaro.org> |
| Reviewed-by: Mathieu Poirier <mathieu.poirier@linaro.org> |
| Co-developed-by: Al Grant <al.grant@arm.com> |
| Signed-off-by: Al Grant <al.grant@arm.com> |
| Signed-off-by: James Clark <james.clark@arm.com> |
| Acked-by: Suzuki K Poulos <suzuki.poulose@arm.com> |
| Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com> |
| Cc: Anshuman Khandual <anshuman.khandual@arm.com> |
| Cc: Branislav Rankov <branislav.rankov@arm.com> |
| Cc: Denis Nikitin <denik@chromium.org> |
| Cc: Jiri Olsa <jolsa@redhat.com> |
| Cc: John Garry <john.garry@huawei.com> |
| Cc: Mark Rutland <mark.rutland@arm.com> |
| Cc: Mike Leach <mike.leach@linaro.org> |
| Cc: Namhyung Kim <namhyung@kernel.org> |
| Cc: Will Deacon <will@kernel.org> |
| Cc: linux-arm-kernel@lists.infradead.org |
| Cc: coresight@lists.linaro.org |
| Link: https://lore.kernel.org/r/20210510143248.27423-3-james.clark@arm.com |
| Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> |
| --- |
| tools/perf/util/cs-etm.c | 15 +++++++++++++-- |
| 1 file changed, 13 insertions(+), 2 deletions(-) |
| |
| diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c |
| index ad83a97..a557f83 100644 |
| --- a/tools/perf/util/cs-etm.c |
| +++ b/tools/perf/util/cs-etm.c |
| @@ -52,6 +52,7 @@ struct cs_etm_auxtrace { |
| u8 sample_instructions; |
| |
| int num_cpu; |
| + u64 latest_kernel_timestamp; |
| u32 auxtrace_type; |
| u64 branches_sample_type; |
| u64 branches_id; |
| @@ -1148,6 +1149,8 @@ static int cs_etm__synth_instruction_sample(struct cs_etm_queue *etmq, |
| event->sample.header.misc = cs_etm__cpu_mode(etmq, addr); |
| event->sample.header.size = sizeof(struct perf_event_header); |
| |
| + if (!etm->timeless_decoding) |
| + sample.time = etm->latest_kernel_timestamp; |
| sample.ip = addr; |
| sample.pid = tidq->pid; |
| sample.tid = tidq->tid; |
| @@ -1202,6 +1205,8 @@ static int cs_etm__synth_branch_sample(struct cs_etm_queue *etmq, |
| event->sample.header.misc = cs_etm__cpu_mode(etmq, ip); |
| event->sample.header.size = sizeof(struct perf_event_header); |
| |
| + if (!etm->timeless_decoding) |
| + sample.time = etm->latest_kernel_timestamp; |
| sample.ip = ip; |
| sample.pid = tidq->pid; |
| sample.tid = tidq->tid; |
| @@ -2355,9 +2360,15 @@ static int cs_etm__process_event(struct perf_session *session, |
| else if (event->header.type == PERF_RECORD_SWITCH_CPU_WIDE) |
| return cs_etm__process_switch_cpu_wide(etm, event); |
| |
| - if (!etm->timeless_decoding && |
| - event->header.type == PERF_RECORD_AUX) |
| + if (!etm->timeless_decoding && event->header.type == PERF_RECORD_AUX) { |
| + /* |
| + * Record the latest kernel timestamp available in the header |
| + * for samples so that synthesised samples occur from this point |
| + * onwards. |
| + */ |
| + etm->latest_kernel_timestamp = sample_kernel_timestamp; |
| return cs_etm__process_queues(etm); |
| + } |
| |
| return 0; |
| } |
| -- |
| 2.31.0 |
| |