|  | /* | 
|  | * Copyright 2014 Advanced Micro Devices, Inc. | 
|  | * | 
|  | * Permission is hereby granted, free of charge, to any person obtaining a | 
|  | * copy of this software and associated documentation files (the "Software"), | 
|  | * to deal in the Software without restriction, including without limitation | 
|  | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | 
|  | * and/or sell copies of the Software, and to permit persons to whom the | 
|  | * Software is furnished to do so, subject to the following conditions: | 
|  | * | 
|  | * The above copyright notice and this permission notice shall be included in | 
|  | * all copies or substantial portions of the Software. | 
|  | * | 
|  | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | 
|  | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | 
|  | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL | 
|  | * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR | 
|  | * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, | 
|  | * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR | 
|  | * OTHER DEALINGS IN THE SOFTWARE. | 
|  | * | 
|  | */ | 
|  |  | 
|  | #include <linux/dma-mapping.h> | 
|  |  | 
|  | #include "amdgpu.h" | 
|  | #include "amdgpu_ih.h" | 
|  |  | 
|  | /** | 
|  | * amdgpu_ih_ring_init - initialize the IH state | 
|  | * | 
|  | * @adev: amdgpu_device pointer | 
|  | * @ih: ih ring to initialize | 
|  | * @ring_size: ring size to allocate | 
|  | * @use_bus_addr: true when we can use dma_alloc_coherent | 
|  | * | 
|  | * Initializes the IH state and allocates a buffer | 
|  | * for the IH ring buffer. | 
|  | * Returns 0 for success, errors for failure. | 
|  | */ | 
|  | int amdgpu_ih_ring_init(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih, | 
|  | unsigned ring_size, bool use_bus_addr) | 
|  | { | 
|  | u32 rb_bufsz; | 
|  | int r; | 
|  |  | 
|  | /* Align ring size */ | 
|  | rb_bufsz = order_base_2(ring_size / 4); | 
|  | ring_size = (1 << rb_bufsz) * 4; | 
|  | ih->ring_size = ring_size; | 
|  | ih->ptr_mask = ih->ring_size - 1; | 
|  | ih->rptr = 0; | 
|  | ih->use_bus_addr = use_bus_addr; | 
|  |  | 
|  | if (use_bus_addr) { | 
|  | dma_addr_t dma_addr; | 
|  |  | 
|  | if (ih->ring) | 
|  | return 0; | 
|  |  | 
|  | /* add 8 bytes for the rptr/wptr shadows and | 
|  | * add them to the end of the ring allocation. | 
|  | */ | 
|  | ih->ring = dma_alloc_coherent(adev->dev, ih->ring_size + 8, | 
|  | &dma_addr, GFP_KERNEL); | 
|  | if (ih->ring == NULL) | 
|  | return -ENOMEM; | 
|  |  | 
|  | ih->gpu_addr = dma_addr; | 
|  | ih->wptr_addr = dma_addr + ih->ring_size; | 
|  | ih->wptr_cpu = &ih->ring[ih->ring_size / 4]; | 
|  | ih->rptr_addr = dma_addr + ih->ring_size + 4; | 
|  | ih->rptr_cpu = &ih->ring[(ih->ring_size / 4) + 1]; | 
|  | } else { | 
|  | unsigned wptr_offs, rptr_offs; | 
|  |  | 
|  | r = amdgpu_device_wb_get(adev, &wptr_offs); | 
|  | if (r) | 
|  | return r; | 
|  |  | 
|  | r = amdgpu_device_wb_get(adev, &rptr_offs); | 
|  | if (r) { | 
|  | amdgpu_device_wb_free(adev, wptr_offs); | 
|  | return r; | 
|  | } | 
|  |  | 
|  | r = amdgpu_bo_create_kernel(adev, ih->ring_size, PAGE_SIZE, | 
|  | AMDGPU_GEM_DOMAIN_GTT, | 
|  | &ih->ring_obj, &ih->gpu_addr, | 
|  | (void **)&ih->ring); | 
|  | if (r) { | 
|  | amdgpu_device_wb_free(adev, rptr_offs); | 
|  | amdgpu_device_wb_free(adev, wptr_offs); | 
|  | return r; | 
|  | } | 
|  |  | 
|  | ih->wptr_addr = adev->wb.gpu_addr + wptr_offs * 4; | 
|  | ih->wptr_cpu = &adev->wb.wb[wptr_offs]; | 
|  | ih->rptr_addr = adev->wb.gpu_addr + rptr_offs * 4; | 
|  | ih->rptr_cpu = &adev->wb.wb[rptr_offs]; | 
|  | } | 
|  |  | 
|  | init_waitqueue_head(&ih->wait_process); | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | /** | 
|  | * amdgpu_ih_ring_fini - tear down the IH state | 
|  | * | 
|  | * @adev: amdgpu_device pointer | 
|  | * @ih: ih ring to tear down | 
|  | * | 
|  | * Tears down the IH state and frees buffer | 
|  | * used for the IH ring buffer. | 
|  | */ | 
|  | void amdgpu_ih_ring_fini(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih) | 
|  | { | 
|  |  | 
|  | if (!ih->ring) | 
|  | return; | 
|  |  | 
|  | if (ih->use_bus_addr) { | 
|  |  | 
|  | /* add 8 bytes for the rptr/wptr shadows and | 
|  | * add them to the end of the ring allocation. | 
|  | */ | 
|  | dma_free_coherent(adev->dev, ih->ring_size + 8, | 
|  | (void *)ih->ring, ih->gpu_addr); | 
|  | ih->ring = NULL; | 
|  | } else { | 
|  | amdgpu_bo_free_kernel(&ih->ring_obj, &ih->gpu_addr, | 
|  | (void **)&ih->ring); | 
|  | amdgpu_device_wb_free(adev, (ih->wptr_addr - ih->gpu_addr) / 4); | 
|  | amdgpu_device_wb_free(adev, (ih->rptr_addr - ih->gpu_addr) / 4); | 
|  | } | 
|  | } | 
|  |  | 
|  | /** | 
|  | * amdgpu_ih_ring_write - write IV to the ring buffer | 
|  | * | 
|  | * @ih: ih ring to write to | 
|  | * @iv: the iv to write | 
|  | * @num_dw: size of the iv in dw | 
|  | * | 
|  | * Writes an IV to the ring buffer using the CPU and increment the wptr. | 
|  | * Used for testing and delegating IVs to a software ring. | 
|  | */ | 
|  | void amdgpu_ih_ring_write(struct amdgpu_ih_ring *ih, const uint32_t *iv, | 
|  | unsigned int num_dw) | 
|  | { | 
|  | uint32_t wptr = le32_to_cpu(*ih->wptr_cpu) >> 2; | 
|  | unsigned int i; | 
|  |  | 
|  | for (i = 0; i < num_dw; ++i) | 
|  | ih->ring[wptr++] = cpu_to_le32(iv[i]); | 
|  |  | 
|  | wptr <<= 2; | 
|  | wptr &= ih->ptr_mask; | 
|  |  | 
|  | /* Only commit the new wptr if we don't overflow */ | 
|  | if (wptr != READ_ONCE(ih->rptr)) { | 
|  | wmb(); | 
|  | WRITE_ONCE(*ih->wptr_cpu, cpu_to_le32(wptr)); | 
|  | } | 
|  | } | 
|  |  | 
|  | /* Waiter helper that checks current rptr matches or passes checkpoint wptr */ | 
|  | static bool amdgpu_ih_has_checkpoint_processed(struct amdgpu_device *adev, | 
|  | struct amdgpu_ih_ring *ih, | 
|  | uint32_t checkpoint_wptr, | 
|  | uint32_t *prev_rptr) | 
|  | { | 
|  | uint32_t cur_rptr = ih->rptr | (*prev_rptr & ~ih->ptr_mask); | 
|  |  | 
|  | /* rptr has wrapped. */ | 
|  | if (cur_rptr < *prev_rptr) | 
|  | cur_rptr += ih->ptr_mask + 1; | 
|  | *prev_rptr = cur_rptr; | 
|  |  | 
|  | /* check ring is empty to workaround missing wptr overflow flag */ | 
|  | return cur_rptr >= checkpoint_wptr || | 
|  | (cur_rptr & ih->ptr_mask) == amdgpu_ih_get_wptr(adev, ih); | 
|  | } | 
|  |  | 
|  | /** | 
|  | * amdgpu_ih_wait_on_checkpoint_process - wait to process IVs up to checkpoint | 
|  | * | 
|  | * @adev: amdgpu_device pointer | 
|  | * @ih: ih ring to process | 
|  | * | 
|  | * Used to ensure ring has processed IVs up to the checkpoint write pointer. | 
|  | */ | 
|  | int amdgpu_ih_wait_on_checkpoint_process(struct amdgpu_device *adev, | 
|  | struct amdgpu_ih_ring *ih) | 
|  | { | 
|  | uint32_t checkpoint_wptr, rptr; | 
|  |  | 
|  | if (!ih->enabled || adev->shutdown) | 
|  | return -ENODEV; | 
|  |  | 
|  | checkpoint_wptr = amdgpu_ih_get_wptr(adev, ih); | 
|  | /* Order wptr with rptr. */ | 
|  | rmb(); | 
|  | rptr = READ_ONCE(ih->rptr); | 
|  |  | 
|  | /* wptr has wrapped. */ | 
|  | if (rptr > checkpoint_wptr) | 
|  | checkpoint_wptr += ih->ptr_mask + 1; | 
|  |  | 
|  | return wait_event_interruptible(ih->wait_process, | 
|  | amdgpu_ih_has_checkpoint_processed(adev, ih, | 
|  | checkpoint_wptr, &rptr)); | 
|  | } | 
|  |  | 
|  | /** | 
|  | * amdgpu_ih_process - interrupt handler | 
|  | * | 
|  | * @adev: amdgpu_device pointer | 
|  | * @ih: ih ring to process | 
|  | * | 
|  | * Interrupt hander (VI), walk the IH ring. | 
|  | * Returns irq process return code. | 
|  | */ | 
|  | int amdgpu_ih_process(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih) | 
|  | { | 
|  | unsigned int count; | 
|  | u32 wptr; | 
|  |  | 
|  | if (!ih->enabled || adev->shutdown) | 
|  | return IRQ_NONE; | 
|  |  | 
|  | wptr = amdgpu_ih_get_wptr(adev, ih); | 
|  |  | 
|  | restart_ih: | 
|  | count  = AMDGPU_IH_MAX_NUM_IVS; | 
|  | DRM_DEBUG("%s: rptr %d, wptr %d\n", __func__, ih->rptr, wptr); | 
|  |  | 
|  | /* Order reading of wptr vs. reading of IH ring data */ | 
|  | rmb(); | 
|  |  | 
|  | while (ih->rptr != wptr && --count) { | 
|  | amdgpu_irq_dispatch(adev, ih); | 
|  | ih->rptr &= ih->ptr_mask; | 
|  | } | 
|  |  | 
|  | amdgpu_ih_set_rptr(adev, ih); | 
|  | wake_up_all(&ih->wait_process); | 
|  |  | 
|  | /* make sure wptr hasn't changed while processing */ | 
|  | wptr = amdgpu_ih_get_wptr(adev, ih); | 
|  | if (wptr != ih->rptr) | 
|  | goto restart_ih; | 
|  |  | 
|  | return IRQ_HANDLED; | 
|  | } | 
|  |  | 
|  | /** | 
|  | * amdgpu_ih_decode_iv_helper - decode an interrupt vector | 
|  | * | 
|  | * @adev: amdgpu_device pointer | 
|  | * @ih: ih ring to process | 
|  | * @entry: IV entry | 
|  | * | 
|  | * Decodes the interrupt vector at the current rptr | 
|  | * position and also advance the position for for Vega10 | 
|  | * and later GPUs. | 
|  | */ | 
|  | void amdgpu_ih_decode_iv_helper(struct amdgpu_device *adev, | 
|  | struct amdgpu_ih_ring *ih, | 
|  | struct amdgpu_iv_entry *entry) | 
|  | { | 
|  | /* wptr/rptr are in bytes! */ | 
|  | u32 ring_index = ih->rptr >> 2; | 
|  | uint32_t dw[8]; | 
|  |  | 
|  | dw[0] = le32_to_cpu(ih->ring[ring_index + 0]); | 
|  | dw[1] = le32_to_cpu(ih->ring[ring_index + 1]); | 
|  | dw[2] = le32_to_cpu(ih->ring[ring_index + 2]); | 
|  | dw[3] = le32_to_cpu(ih->ring[ring_index + 3]); | 
|  | dw[4] = le32_to_cpu(ih->ring[ring_index + 4]); | 
|  | dw[5] = le32_to_cpu(ih->ring[ring_index + 5]); | 
|  | dw[6] = le32_to_cpu(ih->ring[ring_index + 6]); | 
|  | dw[7] = le32_to_cpu(ih->ring[ring_index + 7]); | 
|  |  | 
|  | entry->client_id = dw[0] & 0xff; | 
|  | entry->src_id = (dw[0] >> 8) & 0xff; | 
|  | entry->ring_id = (dw[0] >> 16) & 0xff; | 
|  | entry->vmid = (dw[0] >> 24) & 0xf; | 
|  | entry->vmid_src = (dw[0] >> 31); | 
|  | entry->timestamp = dw[1] | ((u64)(dw[2] & 0xffff) << 32); | 
|  | entry->timestamp_src = dw[2] >> 31; | 
|  | entry->pasid = dw[3] & 0xffff; | 
|  | entry->pasid_src = dw[3] >> 31; | 
|  | entry->src_data[0] = dw[4]; | 
|  | entry->src_data[1] = dw[5]; | 
|  | entry->src_data[2] = dw[6]; | 
|  | entry->src_data[3] = dw[7]; | 
|  |  | 
|  | /* wptr/rptr are in bytes! */ | 
|  | ih->rptr += 32; | 
|  | } |