|  | // SPDX-License-Identifier: GPL-2.0 | 
|  | /* | 
|  | * Copyright © 2018 Intel Corporation | 
|  | */ | 
|  |  | 
|  | #include <linux/sort.h> | 
|  |  | 
|  | #include "intel_gpu_commands.h" | 
|  | #include "intel_gt_pm.h" | 
|  | #include "intel_rps.h" | 
|  |  | 
|  | #include "i915_selftest.h" | 
|  | #include "selftests/igt_flush_test.h" | 
|  |  | 
|  | #define COUNT 5 | 
|  |  | 
|  | static int cmp_u32(const void *A, const void *B) | 
|  | { | 
|  | const u32 *a = A, *b = B; | 
|  |  | 
|  | return *a - *b; | 
|  | } | 
|  |  | 
|  | static void perf_begin(struct intel_gt *gt) | 
|  | { | 
|  | intel_gt_pm_get(gt); | 
|  |  | 
|  | /* Boost gpufreq to max [waitboost] and keep it fixed */ | 
|  | atomic_inc(>->rps.num_waiters); | 
|  | schedule_work(>->rps.work); | 
|  | flush_work(>->rps.work); | 
|  | } | 
|  |  | 
|  | static int perf_end(struct intel_gt *gt) | 
|  | { | 
|  | atomic_dec(>->rps.num_waiters); | 
|  | intel_gt_pm_put(gt); | 
|  |  | 
|  | return igt_flush_test(gt->i915); | 
|  | } | 
|  |  | 
|  | static int write_timestamp(struct i915_request *rq, int slot) | 
|  | { | 
|  | struct intel_timeline *tl = | 
|  | rcu_dereference_protected(rq->timeline, | 
|  | !i915_request_signaled(rq)); | 
|  | u32 cmd; | 
|  | u32 *cs; | 
|  |  | 
|  | cs = intel_ring_begin(rq, 4); | 
|  | if (IS_ERR(cs)) | 
|  | return PTR_ERR(cs); | 
|  |  | 
|  | cmd = MI_STORE_REGISTER_MEM | MI_USE_GGTT; | 
|  | if (GRAPHICS_VER(rq->engine->i915) >= 8) | 
|  | cmd++; | 
|  | *cs++ = cmd; | 
|  | *cs++ = i915_mmio_reg_offset(RING_TIMESTAMP(rq->engine->mmio_base)); | 
|  | *cs++ = tl->hwsp_offset + slot * sizeof(u32); | 
|  | *cs++ = 0; | 
|  |  | 
|  | intel_ring_advance(rq, cs); | 
|  |  | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | static struct i915_vma *create_empty_batch(struct intel_context *ce) | 
|  | { | 
|  | struct drm_i915_gem_object *obj; | 
|  | struct i915_vma *vma; | 
|  | u32 *cs; | 
|  | int err; | 
|  |  | 
|  | obj = i915_gem_object_create_internal(ce->engine->i915, PAGE_SIZE); | 
|  | if (IS_ERR(obj)) | 
|  | return ERR_CAST(obj); | 
|  |  | 
|  | cs = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WB); | 
|  | if (IS_ERR(cs)) { | 
|  | err = PTR_ERR(cs); | 
|  | goto err_put; | 
|  | } | 
|  |  | 
|  | cs[0] = MI_BATCH_BUFFER_END; | 
|  |  | 
|  | i915_gem_object_flush_map(obj); | 
|  |  | 
|  | vma = i915_vma_instance(obj, ce->vm, NULL); | 
|  | if (IS_ERR(vma)) { | 
|  | err = PTR_ERR(vma); | 
|  | goto err_unpin; | 
|  | } | 
|  |  | 
|  | err = i915_vma_pin(vma, 0, 0, PIN_USER); | 
|  | if (err) | 
|  | goto err_unpin; | 
|  |  | 
|  | i915_gem_object_unpin_map(obj); | 
|  | return vma; | 
|  |  | 
|  | err_unpin: | 
|  | i915_gem_object_unpin_map(obj); | 
|  | err_put: | 
|  | i915_gem_object_put(obj); | 
|  | return ERR_PTR(err); | 
|  | } | 
|  |  | 
|  | static u32 trifilter(u32 *a) | 
|  | { | 
|  | u64 sum; | 
|  |  | 
|  | sort(a, COUNT, sizeof(*a), cmp_u32, NULL); | 
|  |  | 
|  | sum = mul_u32_u32(a[2], 2); | 
|  | sum += a[1]; | 
|  | sum += a[3]; | 
|  |  | 
|  | return sum >> 2; | 
|  | } | 
|  |  | 
|  | static int perf_mi_bb_start(void *arg) | 
|  | { | 
|  | struct intel_gt *gt = arg; | 
|  | struct intel_engine_cs *engine; | 
|  | enum intel_engine_id id; | 
|  | int err = 0; | 
|  |  | 
|  | if (GRAPHICS_VER(gt->i915) < 7) /* for per-engine CS_TIMESTAMP */ | 
|  | return 0; | 
|  |  | 
|  | perf_begin(gt); | 
|  | for_each_engine(engine, gt, id) { | 
|  | struct intel_context *ce = engine->kernel_context; | 
|  | struct i915_vma *batch; | 
|  | u32 cycles[COUNT]; | 
|  | int i; | 
|  |  | 
|  | intel_engine_pm_get(engine); | 
|  |  | 
|  | batch = create_empty_batch(ce); | 
|  | if (IS_ERR(batch)) { | 
|  | err = PTR_ERR(batch); | 
|  | intel_engine_pm_put(engine); | 
|  | break; | 
|  | } | 
|  |  | 
|  | err = i915_vma_sync(batch); | 
|  | if (err) { | 
|  | intel_engine_pm_put(engine); | 
|  | i915_vma_put(batch); | 
|  | break; | 
|  | } | 
|  |  | 
|  | for (i = 0; i < ARRAY_SIZE(cycles); i++) { | 
|  | struct i915_request *rq; | 
|  |  | 
|  | rq = i915_request_create(ce); | 
|  | if (IS_ERR(rq)) { | 
|  | err = PTR_ERR(rq); | 
|  | break; | 
|  | } | 
|  |  | 
|  | err = write_timestamp(rq, 2); | 
|  | if (err) | 
|  | goto out; | 
|  |  | 
|  | err = rq->engine->emit_bb_start(rq, | 
|  | batch->node.start, 8, | 
|  | 0); | 
|  | if (err) | 
|  | goto out; | 
|  |  | 
|  | err = write_timestamp(rq, 3); | 
|  | if (err) | 
|  | goto out; | 
|  |  | 
|  | out: | 
|  | i915_request_get(rq); | 
|  | i915_request_add(rq); | 
|  |  | 
|  | if (i915_request_wait(rq, 0, HZ / 5) < 0) | 
|  | err = -EIO; | 
|  | i915_request_put(rq); | 
|  | if (err) | 
|  | break; | 
|  |  | 
|  | cycles[i] = rq->hwsp_seqno[3] - rq->hwsp_seqno[2]; | 
|  | } | 
|  | i915_vma_put(batch); | 
|  | intel_engine_pm_put(engine); | 
|  | if (err) | 
|  | break; | 
|  |  | 
|  | pr_info("%s: MI_BB_START cycles: %u\n", | 
|  | engine->name, trifilter(cycles)); | 
|  | } | 
|  | if (perf_end(gt)) | 
|  | err = -EIO; | 
|  |  | 
|  | return err; | 
|  | } | 
|  |  | 
|  | static struct i915_vma *create_nop_batch(struct intel_context *ce) | 
|  | { | 
|  | struct drm_i915_gem_object *obj; | 
|  | struct i915_vma *vma; | 
|  | u32 *cs; | 
|  | int err; | 
|  |  | 
|  | obj = i915_gem_object_create_internal(ce->engine->i915, SZ_64K); | 
|  | if (IS_ERR(obj)) | 
|  | return ERR_CAST(obj); | 
|  |  | 
|  | cs = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WB); | 
|  | if (IS_ERR(cs)) { | 
|  | err = PTR_ERR(cs); | 
|  | goto err_put; | 
|  | } | 
|  |  | 
|  | memset(cs, 0, SZ_64K); | 
|  | cs[SZ_64K / sizeof(*cs) - 1] = MI_BATCH_BUFFER_END; | 
|  |  | 
|  | i915_gem_object_flush_map(obj); | 
|  |  | 
|  | vma = i915_vma_instance(obj, ce->vm, NULL); | 
|  | if (IS_ERR(vma)) { | 
|  | err = PTR_ERR(vma); | 
|  | goto err_unpin; | 
|  | } | 
|  |  | 
|  | err = i915_vma_pin(vma, 0, 0, PIN_USER); | 
|  | if (err) | 
|  | goto err_unpin; | 
|  |  | 
|  | i915_gem_object_unpin_map(obj); | 
|  | return vma; | 
|  |  | 
|  | err_unpin: | 
|  | i915_gem_object_unpin_map(obj); | 
|  | err_put: | 
|  | i915_gem_object_put(obj); | 
|  | return ERR_PTR(err); | 
|  | } | 
|  |  | 
|  | static int perf_mi_noop(void *arg) | 
|  | { | 
|  | struct intel_gt *gt = arg; | 
|  | struct intel_engine_cs *engine; | 
|  | enum intel_engine_id id; | 
|  | int err = 0; | 
|  |  | 
|  | if (GRAPHICS_VER(gt->i915) < 7) /* for per-engine CS_TIMESTAMP */ | 
|  | return 0; | 
|  |  | 
|  | perf_begin(gt); | 
|  | for_each_engine(engine, gt, id) { | 
|  | struct intel_context *ce = engine->kernel_context; | 
|  | struct i915_vma *base, *nop; | 
|  | u32 cycles[COUNT]; | 
|  | int i; | 
|  |  | 
|  | intel_engine_pm_get(engine); | 
|  |  | 
|  | base = create_empty_batch(ce); | 
|  | if (IS_ERR(base)) { | 
|  | err = PTR_ERR(base); | 
|  | intel_engine_pm_put(engine); | 
|  | break; | 
|  | } | 
|  |  | 
|  | err = i915_vma_sync(base); | 
|  | if (err) { | 
|  | i915_vma_put(base); | 
|  | intel_engine_pm_put(engine); | 
|  | break; | 
|  | } | 
|  |  | 
|  | nop = create_nop_batch(ce); | 
|  | if (IS_ERR(nop)) { | 
|  | err = PTR_ERR(nop); | 
|  | i915_vma_put(base); | 
|  | intel_engine_pm_put(engine); | 
|  | break; | 
|  | } | 
|  |  | 
|  | err = i915_vma_sync(nop); | 
|  | if (err) { | 
|  | i915_vma_put(nop); | 
|  | i915_vma_put(base); | 
|  | intel_engine_pm_put(engine); | 
|  | break; | 
|  | } | 
|  |  | 
|  | for (i = 0; i < ARRAY_SIZE(cycles); i++) { | 
|  | struct i915_request *rq; | 
|  |  | 
|  | rq = i915_request_create(ce); | 
|  | if (IS_ERR(rq)) { | 
|  | err = PTR_ERR(rq); | 
|  | break; | 
|  | } | 
|  |  | 
|  | err = write_timestamp(rq, 2); | 
|  | if (err) | 
|  | goto out; | 
|  |  | 
|  | err = rq->engine->emit_bb_start(rq, | 
|  | base->node.start, 8, | 
|  | 0); | 
|  | if (err) | 
|  | goto out; | 
|  |  | 
|  | err = write_timestamp(rq, 3); | 
|  | if (err) | 
|  | goto out; | 
|  |  | 
|  | err = rq->engine->emit_bb_start(rq, | 
|  | nop->node.start, | 
|  | nop->node.size, | 
|  | 0); | 
|  | if (err) | 
|  | goto out; | 
|  |  | 
|  | err = write_timestamp(rq, 4); | 
|  | if (err) | 
|  | goto out; | 
|  |  | 
|  | out: | 
|  | i915_request_get(rq); | 
|  | i915_request_add(rq); | 
|  |  | 
|  | if (i915_request_wait(rq, 0, HZ / 5) < 0) | 
|  | err = -EIO; | 
|  | i915_request_put(rq); | 
|  | if (err) | 
|  | break; | 
|  |  | 
|  | cycles[i] = | 
|  | (rq->hwsp_seqno[4] - rq->hwsp_seqno[3]) - | 
|  | (rq->hwsp_seqno[3] - rq->hwsp_seqno[2]); | 
|  | } | 
|  | i915_vma_put(nop); | 
|  | i915_vma_put(base); | 
|  | intel_engine_pm_put(engine); | 
|  | if (err) | 
|  | break; | 
|  |  | 
|  | pr_info("%s: 16K MI_NOOP cycles: %u\n", | 
|  | engine->name, trifilter(cycles)); | 
|  | } | 
|  | if (perf_end(gt)) | 
|  | err = -EIO; | 
|  |  | 
|  | return err; | 
|  | } | 
|  |  | 
|  | int intel_engine_cs_perf_selftests(struct drm_i915_private *i915) | 
|  | { | 
|  | static const struct i915_subtest tests[] = { | 
|  | SUBTEST(perf_mi_bb_start), | 
|  | SUBTEST(perf_mi_noop), | 
|  | }; | 
|  |  | 
|  | if (intel_gt_is_wedged(&i915->gt)) | 
|  | return 0; | 
|  |  | 
|  | return intel_gt_live_subtests(tests, &i915->gt); | 
|  | } | 
|  |  | 
|  | static int intel_mmio_bases_check(void *arg) | 
|  | { | 
|  | int i, j; | 
|  |  | 
|  | for (i = 0; i < ARRAY_SIZE(intel_engines); i++) { | 
|  | const struct engine_info *info = &intel_engines[i]; | 
|  | u8 prev = U8_MAX; | 
|  |  | 
|  | for (j = 0; j < MAX_MMIO_BASES; j++) { | 
|  | u8 ver = info->mmio_bases[j].graphics_ver; | 
|  | u32 base = info->mmio_bases[j].base; | 
|  |  | 
|  | if (ver >= prev) { | 
|  | pr_err("%s(%s, class:%d, instance:%d): mmio base for graphics ver %u is before the one for ver %u\n", | 
|  | __func__, | 
|  | intel_engine_class_repr(info->class), | 
|  | info->class, info->instance, | 
|  | prev, ver); | 
|  | return -EINVAL; | 
|  | } | 
|  |  | 
|  | if (ver == 0) | 
|  | break; | 
|  |  | 
|  | if (!base) { | 
|  | pr_err("%s(%s, class:%d, instance:%d): invalid mmio base (%x) for graphics ver %u at entry %u\n", | 
|  | __func__, | 
|  | intel_engine_class_repr(info->class), | 
|  | info->class, info->instance, | 
|  | base, ver, j); | 
|  | return -EINVAL; | 
|  | } | 
|  |  | 
|  | prev = ver; | 
|  | } | 
|  |  | 
|  | pr_debug("%s: min graphics version supported for %s%d is %u\n", | 
|  | __func__, | 
|  | intel_engine_class_repr(info->class), | 
|  | info->instance, | 
|  | prev); | 
|  | } | 
|  |  | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | int intel_engine_cs_mock_selftests(void) | 
|  | { | 
|  | static const struct i915_subtest tests[] = { | 
|  | SUBTEST(intel_mmio_bases_check), | 
|  | }; | 
|  |  | 
|  | return i915_subtests(tests, NULL); | 
|  | } |