|  | // SPDX-License-Identifier: GPL-2.0-only | 
|  | /* | 
|  | * VFIO PCI I/O Port & MMIO access | 
|  | * | 
|  | * Copyright (C) 2012 Red Hat, Inc.  All rights reserved. | 
|  | *     Author: Alex Williamson <alex.williamson@redhat.com> | 
|  | * | 
|  | * Derived from original vfio: | 
|  | * Copyright 2010 Cisco Systems, Inc.  All rights reserved. | 
|  | * Author: Tom Lyon, pugs@cisco.com | 
|  | */ | 
|  |  | 
|  | #include <linux/fs.h> | 
|  | #include <linux/pci.h> | 
|  | #include <linux/uaccess.h> | 
|  | #include <linux/io.h> | 
|  | #include <linux/vfio.h> | 
|  | #include <linux/vgaarb.h> | 
|  |  | 
|  | #include <linux/vfio_pci_core.h> | 
|  |  | 
|  | #ifdef __LITTLE_ENDIAN | 
|  | #define vfio_ioread64	ioread64 | 
|  | #define vfio_iowrite64	iowrite64 | 
|  | #define vfio_ioread32	ioread32 | 
|  | #define vfio_iowrite32	iowrite32 | 
|  | #define vfio_ioread16	ioread16 | 
|  | #define vfio_iowrite16	iowrite16 | 
|  | #else | 
|  | #define vfio_ioread64	ioread64be | 
|  | #define vfio_iowrite64	iowrite64be | 
|  | #define vfio_ioread32	ioread32be | 
|  | #define vfio_iowrite32	iowrite32be | 
|  | #define vfio_ioread16	ioread16be | 
|  | #define vfio_iowrite16	iowrite16be | 
|  | #endif | 
|  | #define vfio_ioread8	ioread8 | 
|  | #define vfio_iowrite8	iowrite8 | 
|  |  | 
|  | #define VFIO_IOWRITE(size) \ | 
|  | static int vfio_pci_iowrite##size(struct vfio_pci_core_device *vdev,		\ | 
|  | bool test_mem, u##size val, void __iomem *io)	\ | 
|  | {									\ | 
|  | if (test_mem) {							\ | 
|  | down_read(&vdev->memory_lock);				\ | 
|  | if (!__vfio_pci_memory_enabled(vdev)) {			\ | 
|  | up_read(&vdev->memory_lock);			\ | 
|  | return -EIO;					\ | 
|  | }							\ | 
|  | }								\ | 
|  | \ | 
|  | vfio_iowrite##size(val, io);					\ | 
|  | \ | 
|  | if (test_mem)							\ | 
|  | up_read(&vdev->memory_lock);				\ | 
|  | \ | 
|  | return 0;							\ | 
|  | } | 
|  |  | 
|  | VFIO_IOWRITE(8) | 
|  | VFIO_IOWRITE(16) | 
|  | VFIO_IOWRITE(32) | 
|  | #ifdef iowrite64 | 
|  | VFIO_IOWRITE(64) | 
|  | #endif | 
|  |  | 
|  | #define VFIO_IOREAD(size) \ | 
|  | static int vfio_pci_ioread##size(struct vfio_pci_core_device *vdev,		\ | 
|  | bool test_mem, u##size *val, void __iomem *io)	\ | 
|  | {									\ | 
|  | if (test_mem) {							\ | 
|  | down_read(&vdev->memory_lock);				\ | 
|  | if (!__vfio_pci_memory_enabled(vdev)) {			\ | 
|  | up_read(&vdev->memory_lock);			\ | 
|  | return -EIO;					\ | 
|  | }							\ | 
|  | }								\ | 
|  | \ | 
|  | *val = vfio_ioread##size(io);					\ | 
|  | \ | 
|  | if (test_mem)							\ | 
|  | up_read(&vdev->memory_lock);				\ | 
|  | \ | 
|  | return 0;							\ | 
|  | } | 
|  |  | 
|  | VFIO_IOREAD(8) | 
|  | VFIO_IOREAD(16) | 
|  | VFIO_IOREAD(32) | 
|  |  | 
|  | /* | 
|  | * Read or write from an __iomem region (MMIO or I/O port) with an excluded | 
|  | * range which is inaccessible.  The excluded range drops writes and fills | 
|  | * reads with -1.  This is intended for handling MSI-X vector tables and | 
|  | * leftover space for ROM BARs. | 
|  | */ | 
|  | static ssize_t do_io_rw(struct vfio_pci_core_device *vdev, bool test_mem, | 
|  | void __iomem *io, char __user *buf, | 
|  | loff_t off, size_t count, size_t x_start, | 
|  | size_t x_end, bool iswrite) | 
|  | { | 
|  | ssize_t done = 0; | 
|  | int ret; | 
|  |  | 
|  | while (count) { | 
|  | size_t fillable, filled; | 
|  |  | 
|  | if (off < x_start) | 
|  | fillable = min(count, (size_t)(x_start - off)); | 
|  | else if (off >= x_end) | 
|  | fillable = count; | 
|  | else | 
|  | fillable = 0; | 
|  |  | 
|  | if (fillable >= 4 && !(off % 4)) { | 
|  | u32 val; | 
|  |  | 
|  | if (iswrite) { | 
|  | if (copy_from_user(&val, buf, 4)) | 
|  | return -EFAULT; | 
|  |  | 
|  | ret = vfio_pci_iowrite32(vdev, test_mem, | 
|  | val, io + off); | 
|  | if (ret) | 
|  | return ret; | 
|  | } else { | 
|  | ret = vfio_pci_ioread32(vdev, test_mem, | 
|  | &val, io + off); | 
|  | if (ret) | 
|  | return ret; | 
|  |  | 
|  | if (copy_to_user(buf, &val, 4)) | 
|  | return -EFAULT; | 
|  | } | 
|  |  | 
|  | filled = 4; | 
|  | } else if (fillable >= 2 && !(off % 2)) { | 
|  | u16 val; | 
|  |  | 
|  | if (iswrite) { | 
|  | if (copy_from_user(&val, buf, 2)) | 
|  | return -EFAULT; | 
|  |  | 
|  | ret = vfio_pci_iowrite16(vdev, test_mem, | 
|  | val, io + off); | 
|  | if (ret) | 
|  | return ret; | 
|  | } else { | 
|  | ret = vfio_pci_ioread16(vdev, test_mem, | 
|  | &val, io + off); | 
|  | if (ret) | 
|  | return ret; | 
|  |  | 
|  | if (copy_to_user(buf, &val, 2)) | 
|  | return -EFAULT; | 
|  | } | 
|  |  | 
|  | filled = 2; | 
|  | } else if (fillable) { | 
|  | u8 val; | 
|  |  | 
|  | if (iswrite) { | 
|  | if (copy_from_user(&val, buf, 1)) | 
|  | return -EFAULT; | 
|  |  | 
|  | ret = vfio_pci_iowrite8(vdev, test_mem, | 
|  | val, io + off); | 
|  | if (ret) | 
|  | return ret; | 
|  | } else { | 
|  | ret = vfio_pci_ioread8(vdev, test_mem, | 
|  | &val, io + off); | 
|  | if (ret) | 
|  | return ret; | 
|  |  | 
|  | if (copy_to_user(buf, &val, 1)) | 
|  | return -EFAULT; | 
|  | } | 
|  |  | 
|  | filled = 1; | 
|  | } else { | 
|  | /* Fill reads with -1, drop writes */ | 
|  | filled = min(count, (size_t)(x_end - off)); | 
|  | if (!iswrite) { | 
|  | u8 val = 0xFF; | 
|  | size_t i; | 
|  |  | 
|  | for (i = 0; i < filled; i++) | 
|  | if (copy_to_user(buf + i, &val, 1)) | 
|  | return -EFAULT; | 
|  | } | 
|  | } | 
|  |  | 
|  | count -= filled; | 
|  | done += filled; | 
|  | off += filled; | 
|  | buf += filled; | 
|  | } | 
|  |  | 
|  | return done; | 
|  | } | 
|  |  | 
|  | static int vfio_pci_setup_barmap(struct vfio_pci_core_device *vdev, int bar) | 
|  | { | 
|  | struct pci_dev *pdev = vdev->pdev; | 
|  | int ret; | 
|  | void __iomem *io; | 
|  |  | 
|  | if (vdev->barmap[bar]) | 
|  | return 0; | 
|  |  | 
|  | ret = pci_request_selected_regions(pdev, 1 << bar, "vfio"); | 
|  | if (ret) | 
|  | return ret; | 
|  |  | 
|  | io = pci_iomap(pdev, bar, 0); | 
|  | if (!io) { | 
|  | pci_release_selected_regions(pdev, 1 << bar); | 
|  | return -ENOMEM; | 
|  | } | 
|  |  | 
|  | vdev->barmap[bar] = io; | 
|  |  | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | ssize_t vfio_pci_bar_rw(struct vfio_pci_core_device *vdev, char __user *buf, | 
|  | size_t count, loff_t *ppos, bool iswrite) | 
|  | { | 
|  | struct pci_dev *pdev = vdev->pdev; | 
|  | loff_t pos = *ppos & VFIO_PCI_OFFSET_MASK; | 
|  | int bar = VFIO_PCI_OFFSET_TO_INDEX(*ppos); | 
|  | size_t x_start = 0, x_end = 0; | 
|  | resource_size_t end; | 
|  | void __iomem *io; | 
|  | struct resource *res = &vdev->pdev->resource[bar]; | 
|  | ssize_t done; | 
|  |  | 
|  | if (pci_resource_start(pdev, bar)) | 
|  | end = pci_resource_len(pdev, bar); | 
|  | else if (bar == PCI_ROM_RESOURCE && | 
|  | pdev->resource[bar].flags & IORESOURCE_ROM_SHADOW) | 
|  | end = 0x20000; | 
|  | else | 
|  | return -EINVAL; | 
|  |  | 
|  | if (pos >= end) | 
|  | return -EINVAL; | 
|  |  | 
|  | count = min(count, (size_t)(end - pos)); | 
|  |  | 
|  | if (bar == PCI_ROM_RESOURCE) { | 
|  | /* | 
|  | * The ROM can fill less space than the BAR, so we start the | 
|  | * excluded range at the end of the actual ROM.  This makes | 
|  | * filling large ROM BARs much faster. | 
|  | */ | 
|  | io = pci_map_rom(pdev, &x_start); | 
|  | if (!io) { | 
|  | done = -ENOMEM; | 
|  | goto out; | 
|  | } | 
|  | x_end = end; | 
|  | } else { | 
|  | int ret = vfio_pci_setup_barmap(vdev, bar); | 
|  | if (ret) { | 
|  | done = ret; | 
|  | goto out; | 
|  | } | 
|  |  | 
|  | io = vdev->barmap[bar]; | 
|  | } | 
|  |  | 
|  | if (bar == vdev->msix_bar) { | 
|  | x_start = vdev->msix_offset; | 
|  | x_end = vdev->msix_offset + vdev->msix_size; | 
|  | } | 
|  |  | 
|  | done = do_io_rw(vdev, res->flags & IORESOURCE_MEM, io, buf, pos, | 
|  | count, x_start, x_end, iswrite); | 
|  |  | 
|  | if (done >= 0) | 
|  | *ppos += done; | 
|  |  | 
|  | if (bar == PCI_ROM_RESOURCE) | 
|  | pci_unmap_rom(pdev, io); | 
|  | out: | 
|  | return done; | 
|  | } | 
|  |  | 
|  | #ifdef CONFIG_VFIO_PCI_VGA | 
|  | ssize_t vfio_pci_vga_rw(struct vfio_pci_core_device *vdev, char __user *buf, | 
|  | size_t count, loff_t *ppos, bool iswrite) | 
|  | { | 
|  | int ret; | 
|  | loff_t off, pos = *ppos & VFIO_PCI_OFFSET_MASK; | 
|  | void __iomem *iomem = NULL; | 
|  | unsigned int rsrc; | 
|  | bool is_ioport; | 
|  | ssize_t done; | 
|  |  | 
|  | if (!vdev->has_vga) | 
|  | return -EINVAL; | 
|  |  | 
|  | if (pos > 0xbfffful) | 
|  | return -EINVAL; | 
|  |  | 
|  | switch ((u32)pos) { | 
|  | case 0xa0000 ... 0xbffff: | 
|  | count = min(count, (size_t)(0xc0000 - pos)); | 
|  | iomem = ioremap(0xa0000, 0xbffff - 0xa0000 + 1); | 
|  | off = pos - 0xa0000; | 
|  | rsrc = VGA_RSRC_LEGACY_MEM; | 
|  | is_ioport = false; | 
|  | break; | 
|  | case 0x3b0 ... 0x3bb: | 
|  | count = min(count, (size_t)(0x3bc - pos)); | 
|  | iomem = ioport_map(0x3b0, 0x3bb - 0x3b0 + 1); | 
|  | off = pos - 0x3b0; | 
|  | rsrc = VGA_RSRC_LEGACY_IO; | 
|  | is_ioport = true; | 
|  | break; | 
|  | case 0x3c0 ... 0x3df: | 
|  | count = min(count, (size_t)(0x3e0 - pos)); | 
|  | iomem = ioport_map(0x3c0, 0x3df - 0x3c0 + 1); | 
|  | off = pos - 0x3c0; | 
|  | rsrc = VGA_RSRC_LEGACY_IO; | 
|  | is_ioport = true; | 
|  | break; | 
|  | default: | 
|  | return -EINVAL; | 
|  | } | 
|  |  | 
|  | if (!iomem) | 
|  | return -ENOMEM; | 
|  |  | 
|  | ret = vga_get_interruptible(vdev->pdev, rsrc); | 
|  | if (ret) { | 
|  | is_ioport ? ioport_unmap(iomem) : iounmap(iomem); | 
|  | return ret; | 
|  | } | 
|  |  | 
|  | /* | 
|  | * VGA MMIO is a legacy, non-BAR resource that hopefully allows | 
|  | * probing, so we don't currently worry about access in relation | 
|  | * to the memory enable bit in the command register. | 
|  | */ | 
|  | done = do_io_rw(vdev, false, iomem, buf, off, count, 0, 0, iswrite); | 
|  |  | 
|  | vga_put(vdev->pdev, rsrc); | 
|  |  | 
|  | is_ioport ? ioport_unmap(iomem) : iounmap(iomem); | 
|  |  | 
|  | if (done >= 0) | 
|  | *ppos += done; | 
|  |  | 
|  | return done; | 
|  | } | 
|  | #endif | 
|  |  | 
|  | static void vfio_pci_ioeventfd_do_write(struct vfio_pci_ioeventfd *ioeventfd, | 
|  | bool test_mem) | 
|  | { | 
|  | switch (ioeventfd->count) { | 
|  | case 1: | 
|  | vfio_pci_iowrite8(ioeventfd->vdev, test_mem, | 
|  | ioeventfd->data, ioeventfd->addr); | 
|  | break; | 
|  | case 2: | 
|  | vfio_pci_iowrite16(ioeventfd->vdev, test_mem, | 
|  | ioeventfd->data, ioeventfd->addr); | 
|  | break; | 
|  | case 4: | 
|  | vfio_pci_iowrite32(ioeventfd->vdev, test_mem, | 
|  | ioeventfd->data, ioeventfd->addr); | 
|  | break; | 
|  | #ifdef iowrite64 | 
|  | case 8: | 
|  | vfio_pci_iowrite64(ioeventfd->vdev, test_mem, | 
|  | ioeventfd->data, ioeventfd->addr); | 
|  | break; | 
|  | #endif | 
|  | } | 
|  | } | 
|  |  | 
|  | static int vfio_pci_ioeventfd_handler(void *opaque, void *unused) | 
|  | { | 
|  | struct vfio_pci_ioeventfd *ioeventfd = opaque; | 
|  | struct vfio_pci_core_device *vdev = ioeventfd->vdev; | 
|  |  | 
|  | if (ioeventfd->test_mem) { | 
|  | if (!down_read_trylock(&vdev->memory_lock)) | 
|  | return 1; /* Lock contended, use thread */ | 
|  | if (!__vfio_pci_memory_enabled(vdev)) { | 
|  | up_read(&vdev->memory_lock); | 
|  | return 0; | 
|  | } | 
|  | } | 
|  |  | 
|  | vfio_pci_ioeventfd_do_write(ioeventfd, false); | 
|  |  | 
|  | if (ioeventfd->test_mem) | 
|  | up_read(&vdev->memory_lock); | 
|  |  | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | static void vfio_pci_ioeventfd_thread(void *opaque, void *unused) | 
|  | { | 
|  | struct vfio_pci_ioeventfd *ioeventfd = opaque; | 
|  |  | 
|  | vfio_pci_ioeventfd_do_write(ioeventfd, ioeventfd->test_mem); | 
|  | } | 
|  |  | 
|  | long vfio_pci_ioeventfd(struct vfio_pci_core_device *vdev, loff_t offset, | 
|  | uint64_t data, int count, int fd) | 
|  | { | 
|  | struct pci_dev *pdev = vdev->pdev; | 
|  | loff_t pos = offset & VFIO_PCI_OFFSET_MASK; | 
|  | int ret, bar = VFIO_PCI_OFFSET_TO_INDEX(offset); | 
|  | struct vfio_pci_ioeventfd *ioeventfd; | 
|  |  | 
|  | /* Only support ioeventfds into BARs */ | 
|  | if (bar > VFIO_PCI_BAR5_REGION_INDEX) | 
|  | return -EINVAL; | 
|  |  | 
|  | if (pos + count > pci_resource_len(pdev, bar)) | 
|  | return -EINVAL; | 
|  |  | 
|  | /* Disallow ioeventfds working around MSI-X table writes */ | 
|  | if (bar == vdev->msix_bar && | 
|  | !(pos + count <= vdev->msix_offset || | 
|  | pos >= vdev->msix_offset + vdev->msix_size)) | 
|  | return -EINVAL; | 
|  |  | 
|  | #ifndef iowrite64 | 
|  | if (count == 8) | 
|  | return -EINVAL; | 
|  | #endif | 
|  |  | 
|  | ret = vfio_pci_setup_barmap(vdev, bar); | 
|  | if (ret) | 
|  | return ret; | 
|  |  | 
|  | mutex_lock(&vdev->ioeventfds_lock); | 
|  |  | 
|  | list_for_each_entry(ioeventfd, &vdev->ioeventfds_list, next) { | 
|  | if (ioeventfd->pos == pos && ioeventfd->bar == bar && | 
|  | ioeventfd->data == data && ioeventfd->count == count) { | 
|  | if (fd == -1) { | 
|  | vfio_virqfd_disable(&ioeventfd->virqfd); | 
|  | list_del(&ioeventfd->next); | 
|  | vdev->ioeventfds_nr--; | 
|  | kfree(ioeventfd); | 
|  | ret = 0; | 
|  | } else | 
|  | ret = -EEXIST; | 
|  |  | 
|  | goto out_unlock; | 
|  | } | 
|  | } | 
|  |  | 
|  | if (fd < 0) { | 
|  | ret = -ENODEV; | 
|  | goto out_unlock; | 
|  | } | 
|  |  | 
|  | if (vdev->ioeventfds_nr >= VFIO_PCI_IOEVENTFD_MAX) { | 
|  | ret = -ENOSPC; | 
|  | goto out_unlock; | 
|  | } | 
|  |  | 
|  | ioeventfd = kzalloc(sizeof(*ioeventfd), GFP_KERNEL); | 
|  | if (!ioeventfd) { | 
|  | ret = -ENOMEM; | 
|  | goto out_unlock; | 
|  | } | 
|  |  | 
|  | ioeventfd->vdev = vdev; | 
|  | ioeventfd->addr = vdev->barmap[bar] + pos; | 
|  | ioeventfd->data = data; | 
|  | ioeventfd->pos = pos; | 
|  | ioeventfd->bar = bar; | 
|  | ioeventfd->count = count; | 
|  | ioeventfd->test_mem = vdev->pdev->resource[bar].flags & IORESOURCE_MEM; | 
|  |  | 
|  | ret = vfio_virqfd_enable(ioeventfd, vfio_pci_ioeventfd_handler, | 
|  | vfio_pci_ioeventfd_thread, NULL, | 
|  | &ioeventfd->virqfd, fd); | 
|  | if (ret) { | 
|  | kfree(ioeventfd); | 
|  | goto out_unlock; | 
|  | } | 
|  |  | 
|  | list_add(&ioeventfd->next, &vdev->ioeventfds_list); | 
|  | vdev->ioeventfds_nr++; | 
|  |  | 
|  | out_unlock: | 
|  | mutex_unlock(&vdev->ioeventfds_lock); | 
|  |  | 
|  | return ret; | 
|  | } |