merge-gve-1.1.0 from branch/tag: gve-1.1.0 into branch: cos-4.19
Changelog:
-------------------------------------------------------------
gve: Bump version to 1.1.0.
gve: Move the irq db indexes out of the ntfy block struct
gve: Add rx buffer pagecnt bias
gve: Add netif_set_xps_queue call
gve: Fixes race between gve_open and gve_init_prive.
gve: Add support for raw addressing to the rx path
gve: Add support for raw addressing in the tx path
gve: Add support for raw addressing device option
gve: Fixes memory barrier to make sure we have synchronized the seq no with the device.
gve: Enable Link Speed Reporting in the driver.
gve: Fix max Tx/Rx queues in gve_probe.
gve: Use link status register to report link status
gve: Batch AQ commands for creating and destroying queues.
gve: Fix gve flags enum. The flag bits were not used as expected.
gve: fix -ENOMEM null check on a page allocation
gve: Add Gvnic stats AQ command and ethtool show/set-priv-flags.
gve: Add support for dma_mask register
gve: Fix freeing DMA memory with different size when describe device.
gve: Add stats for gve.
gve: Get and set Rx copybreak via ethtool
BUG=b/156745396
SOURCE=GVNIC
TEST=tryjob
Change-Id: Ib798a7357d4867868045975885fe54ecfb6c67bb
Reviewed-on: https://cos-review.googlesource.com/c/third_party/kernel/+/1660
Reviewed-by: Roy Yang <royyang@google.com>
diff --git a/drivers/net/ethernet/google/gve/gve.h b/drivers/net/ethernet/google/gve/gve.h
index 5fe24fa..e0f6142 100644
--- a/drivers/net/ethernet/google/gve/gve.h
+++ b/drivers/net/ethernet/google/gve/gve.h
@@ -31,6 +31,17 @@
/* 1 for management, 1 for rx, 1 for tx */
#define GVE_MIN_MSIX 3
+/* Numbers of gve tx/rx stats in stats report. */
+#define GVE_TX_STATS_REPORT_NUM 5
+#define GVE_RX_STATS_REPORT_NUM 2
+
+/* Numbers of NIC tx/rx stats in stats report. */
+#define NIC_TX_STATS_REPORT_NUM 0
+#define NIC_RX_STATS_REPORT_NUM 4
+
+/* Interval to schedule a service task, 20000ms. */
+#define GVE_SERVICE_TIMER_PERIOD 20000
+
/* Each slot in the desc ring has a 1:1 mapping to a slot in the data ring */
struct gve_rx_desc_queue {
struct gve_rx_desc *desc_ring; /* the descriptor ring */
@@ -43,6 +54,8 @@
struct page *page;
void *page_address;
u32 page_offset; /* offset to write to in page */
+ int pagecnt_bias; /* expected pagecnt if only the driver has a ref */
+ bool can_flip; /* page can be flipped and reused */
};
/* A list of pages registered with the device during setup and used by a queue
@@ -61,6 +74,7 @@
dma_addr_t data_bus; /* dma mapping of the slots */
struct gve_rx_slot_page_info *page_info; /* page info of the buffers */
struct gve_queue_page_list *qpl; /* qpl assigned to this queue */
+ bool raw_addressing; /* use raw_addressing? */
};
struct gve_priv;
@@ -75,6 +89,13 @@
u32 cnt; /* free-running total number of completed packets */
u32 fill_cnt; /* free-running total number of descs and buffs posted */
u32 mask; /* masks the cnt and fill_cnt to the size of the ring */
+ u32 db_threshold; /* threshold for posting new buffs and descs */
+ u64 rx_copybreak_pkt; /* free-running count of copybreak packets */
+ u64 rx_copied_pkt; /* free-running total number of copied packets */
+ u64 rx_skb_alloc_fail; /* free-running count of skb alloc fails */
+ u64 rx_buf_alloc_fail; /* free-running count of buffer alloc fails */
+ u64 rx_desc_err_dropped_pkt; /* free-running count of packets dropped by descriptor error */
+ u64 rx_no_refill_dropped_pkt; /* free-running count of packets dropped because of lack of buffer refill */
u32 q_num; /* queue index */
u32 ntfy_id; /* notification block index */
struct gve_queue_resources *q_resources; /* head and tail pointer idx */
@@ -95,12 +116,20 @@
u32 iov_padding; /* padding associated with this segment */
};
+struct gve_tx_dma_buf {
+ DEFINE_DMA_UNMAP_ADDR(dma);
+ DEFINE_DMA_UNMAP_LEN(len);
+};
+
/* Tracks the memory in the fifo occupied by the skb. Mapped 1:1 to a desc
* ring entry but only used for a pkt_desc not a seg_desc
*/
struct gve_tx_buffer_state {
struct sk_buff *skb; /* skb for this pkt */
- struct gve_tx_iovec iov[GVE_TX_MAX_IOVEC]; /* segments of this pkt */
+ union {
+ struct gve_tx_iovec iov[GVE_TX_MAX_IOVEC]; /* segments of this pkt */
+ struct gve_tx_dma_buf buf;
+ };
};
/* A TX buffer - each queue has one */
@@ -123,13 +152,16 @@
__be32 last_nic_done ____cacheline_aligned; /* NIC tail pointer */
u64 pkt_done; /* free-running - total packets completed */
u64 bytes_done; /* free-running - total bytes completed */
+ u32 dropped_pkt; /* free-running - total packets dropped */
/* Cacheline 2 -- Read-mostly fields */
union gve_tx_desc *desc ____cacheline_aligned;
struct gve_tx_buffer_state *info; /* Maps 1:1 to a desc */
struct netdev_queue *netdev_txq;
struct gve_queue_resources *q_resources; /* head and tail pointer idx */
+ struct device *dev;
u32 mask; /* masks req and done down to queue size */
+ bool raw_addressing; /* use raw_addressing? */
/* Slow-path fields */
u32 q_num ____cacheline_aligned; /* queue idx */
@@ -145,13 +177,13 @@
* associated with that irq.
*/
struct gve_notify_block {
- __be32 irq_db_index; /* idx into Bar2 - set by device, must be 1st */
+ __be32 *irq_db_index; /* pointer to idx into Bar2 */
char name[IFNAMSIZ + 16]; /* name registered with the kernel */
struct napi_struct napi; /* kernel napi struct for this block */
struct gve_priv *priv;
struct gve_tx_ring *tx; /* tx rings on this block */
struct gve_rx_ring *rx; /* rx rings on this block */
-} ____cacheline_aligned;
+};
/* Tracks allowed and current queue settings */
struct gve_queue_config {
@@ -165,13 +197,18 @@
unsigned long *qpl_id_map; /* bitmap of used qpl ids */
};
+struct gve_irq_db {
+ __be32 index;
+} ____cacheline_aligned;
+
struct gve_priv {
struct net_device *dev;
struct gve_tx_ring *tx; /* array of tx_cfg.num_queues */
struct gve_rx_ring *rx; /* array of rx_cfg.num_queues */
struct gve_queue_page_list *qpls; /* array of num qpls */
struct gve_notify_block *ntfy_blocks; /* array of num_ntfy_blks */
- dma_addr_t ntfy_block_bus;
+ struct gve_irq_db *irq_db_indices; /* array of num_ntfy_blks */
+ dma_addr_t irq_db_indices_bus;
struct msix_entry *msix_vectors; /* array of num_ntfy_blks + 1 */
char mgmt_msix_name[IFNAMSIZ + 16];
u32 mgmt_msix_idx;
@@ -182,11 +219,12 @@
u16 tx_desc_cnt; /* num desc per ring */
u16 rx_desc_cnt; /* num desc per ring */
u16 tx_pages_per_qpl; /* tx buffer length */
- u16 rx_pages_per_qpl; /* rx buffer length */
+ u16 rx_data_slot_cnt; /* rx buffer length */
u64 max_registered_pages;
u64 num_registered_pages; /* num pages registered with NIC */
u32 rx_copybreak; /* copy packets smaller than this */
u16 default_num_queues; /* default num queues to set up */
+ bool raw_addressing; /* true if this dev supports raw addressing */
struct gve_queue_config tx_cfg;
struct gve_queue_config rx_cfg;
@@ -206,27 +244,67 @@
dma_addr_t adminq_bus_addr;
u32 adminq_mask; /* masks prod_cnt to adminq size */
u32 adminq_prod_cnt; /* free-running count of AQ cmds executed */
+ u32 adminq_cmd_fail; /* free-running count of AQ cmds failed */
+ u32 adminq_timeouts; /* free-running count of AQ cmds timeouts */
+ /* free-running count of per AQ cmd executed */
+ u32 adminq_describe_device_cnt;
+ u32 adminq_cfg_device_resources_cnt;
+ u32 adminq_register_page_list_cnt;
+ u32 adminq_unregister_page_list_cnt;
+ u32 adminq_create_tx_queue_cnt;
+ u32 adminq_create_rx_queue_cnt;
+ u32 adminq_destroy_tx_queue_cnt;
+ u32 adminq_destroy_rx_queue_cnt;
+ u32 adminq_dcfg_device_resources_cnt;
+ u32 adminq_set_driver_parameter_cnt;
+ u32 adminq_report_stats_cnt;
+
+ /* Global stats */
+ u32 interface_up_cnt; /* count of times interface turned up */
+ u32 interface_down_cnt; /* count of times interface turned down */
+ u32 reset_cnt; /* count of reset */
+ u32 page_alloc_fail; /* count of page alloc fails */
+ u32 dma_mapping_error; /* count of dma mapping errors */
struct workqueue_struct *gve_wq;
struct work_struct service_task;
unsigned long service_task_flags;
unsigned long state_flags;
+
+ struct gve_stats_report *stats_report;
+ u64 stats_report_len;
+ dma_addr_t stats_report_bus; /* dma address for the stats report */
+ unsigned long ethtool_flags;
+
+ unsigned long service_timer_period;
+ struct timer_list service_timer;
+
+ /* Gvnic device's dma mask, set during probe. */
+ u8 dma_mask;
+
+ /* Gvnic device link speed from hypervisor. */
+ u64 link_speed;
#if (LINUX_VERSION_CODE < KERNEL_VERSION(4,10,0))
int max_mtu;
#endif /* LINUX_VERSION_CODE < KERNEL_VERSION(4,10,0) */
};
-enum gve_service_task_flags {
- GVE_PRIV_FLAGS_DO_RESET = BIT(1),
- GVE_PRIV_FLAGS_RESET_IN_PROGRESS = BIT(2),
- GVE_PRIV_FLAGS_PROBE_IN_PROGRESS = BIT(3),
+enum gve_service_task_flags_bit {
+ GVE_PRIV_FLAGS_DO_RESET = 1,
+ GVE_PRIV_FLAGS_RESET_IN_PROGRESS = 2,
+ GVE_PRIV_FLAGS_PROBE_IN_PROGRESS = 3,
+ GVE_PRIV_FLAGS_DO_REPORT_STATS = 4,
};
-enum gve_state_flags {
- GVE_PRIV_FLAGS_ADMIN_QUEUE_OK = BIT(1),
- GVE_PRIV_FLAGS_DEVICE_RESOURCES_OK = BIT(2),
- GVE_PRIV_FLAGS_DEVICE_RINGS_OK = BIT(3),
- GVE_PRIV_FLAGS_NAPI_ENABLED = BIT(4),
+enum gve_state_flags_bit {
+ GVE_PRIV_FLAGS_ADMIN_QUEUE_OK = 1,
+ GVE_PRIV_FLAGS_DEVICE_RESOURCES_OK = 2,
+ GVE_PRIV_FLAGS_DEVICE_RINGS_OK = 3,
+ GVE_PRIV_FLAGS_NAPI_ENABLED = 4,
+};
+
+enum gve_ethtool_flags_bit {
+ GVE_PRIV_FLAGS_REPORT_STATS = 0,
};
static inline bool gve_get_do_reset(struct gve_priv *priv)
@@ -276,6 +354,22 @@
clear_bit(GVE_PRIV_FLAGS_PROBE_IN_PROGRESS, &priv->service_task_flags);
}
+static inline bool gve_get_do_report_stats(struct gve_priv *priv)
+{
+ return test_bit(GVE_PRIV_FLAGS_DO_REPORT_STATS,
+ &priv->service_task_flags);
+}
+
+static inline void gve_set_do_report_stats(struct gve_priv *priv)
+{
+ set_bit(GVE_PRIV_FLAGS_DO_REPORT_STATS, &priv->service_task_flags);
+}
+
+static inline void gve_clear_do_report_stats(struct gve_priv *priv)
+{
+ clear_bit(GVE_PRIV_FLAGS_DO_REPORT_STATS, &priv->service_task_flags);
+}
+
static inline bool gve_get_admin_queue_ok(struct gve_priv *priv)
{
return test_bit(GVE_PRIV_FLAGS_ADMIN_QUEUE_OK, &priv->state_flags);
@@ -336,12 +430,27 @@
clear_bit(GVE_PRIV_FLAGS_NAPI_ENABLED, &priv->state_flags);
}
+static inline bool gve_get_report_stats(struct gve_priv *priv)
+{
+ return test_bit(GVE_PRIV_FLAGS_REPORT_STATS, &priv->ethtool_flags);
+}
+
+static inline void gve_set_report_stats(struct gve_priv *priv)
+{
+ set_bit(GVE_PRIV_FLAGS_REPORT_STATS, &priv->ethtool_flags);
+}
+
+static inline void gve_clear_report_stats(struct gve_priv *priv)
+{
+ clear_bit(GVE_PRIV_FLAGS_REPORT_STATS, &priv->ethtool_flags);
+}
+
/* Returns the address of the ntfy_blocks irq doorbell
*/
static inline __be32 __iomem *gve_irq_doorbell(struct gve_priv *priv,
struct gve_notify_block *block)
{
- return &priv->db_bar2[be32_to_cpu(block->irq_db_index)];
+ return &priv->db_bar2[be32_to_cpu(*block->irq_db_index)];
}
/* Returns the index into ntfy_blocks of the given tx ring's block
@@ -362,14 +471,22 @@
*/
static inline u32 gve_num_tx_qpls(struct gve_priv *priv)
{
- return priv->tx_cfg.num_queues;
+ if (priv->raw_addressing) {
+ return 0;
+ } else {
+ return priv->tx_cfg.num_queues;
+ }
}
/* Returns the number of rx queue page lists
*/
static inline u32 gve_num_rx_qpls(struct gve_priv *priv)
{
- return priv->rx_cfg.num_queues;
+ if (priv->raw_addressing) {
+ return 0;
+ } else {
+ return priv->rx_cfg.num_queues;
+ }
}
/* Returns a pointer to the next available tx qpl in the list of qpls
@@ -423,24 +540,10 @@
return DMA_FROM_DEVICE;
}
-/* Returns true if the max mtu allows page recycling */
-static inline bool gve_can_recycle_pages(struct net_device *dev)
-{
- /* We can't recycle the pages if we can't fit a packet into half a
- * page.
- */
-#if (LINUX_VERSION_CODE < KERNEL_VERSION(4,10,0))
- struct gve_priv *priv = netdev_priv(dev);
-
- return priv->max_mtu <= PAGE_SIZE / 2;
-#else /* LINUX_VERSION_CODE < KERNEL_VERSION(4,10,0) */
- return dev->max_mtu <= PAGE_SIZE / 2;
-#endif /* LINUX_VERSION_CODE < KERNEL_VERSION(4,10,0) */
-}
-
/* buffers */
-int gve_alloc_page(struct device *dev, struct page **page, dma_addr_t *dma,
- enum dma_data_direction);
+int gve_alloc_page(struct gve_priv *priv, struct device *dev,
+ struct page **page, dma_addr_t *dma,
+ enum dma_data_direction, gfp_t gfp_flags);
void gve_free_page(struct device *dev, struct page *page, dma_addr_t dma,
enum dma_data_direction);
/* tx handling */
@@ -463,6 +566,8 @@
int gve_adjust_queues(struct gve_priv *priv,
struct gve_queue_config new_rx_config,
struct gve_queue_config new_tx_config);
+/* report stats handling */
+void gve_handle_report_stats(struct gve_priv *priv);
/* exported by ethtool.c */
extern const struct ethtool_ops gve_ethtool_ops;
/* needed by ethtool */
diff --git a/drivers/net/ethernet/google/gve/gve_adminq.c b/drivers/net/ethernet/google/gve/gve_adminq.c
index 5544194..052b6b8 100644
--- a/drivers/net/ethernet/google/gve/gve_adminq.c
+++ b/drivers/net/ethernet/google/gve/gve_adminq.c
@@ -24,6 +24,19 @@
priv->adminq_mask = (PAGE_SIZE / sizeof(union gve_adminq_command)) - 1;
priv->adminq_prod_cnt = 0;
+ priv->adminq_cmd_fail = 0;
+ priv->adminq_timeouts = 0;
+ priv->adminq_describe_device_cnt = 0;
+ priv->adminq_cfg_device_resources_cnt = 0;
+ priv->adminq_register_page_list_cnt = 0;
+ priv->adminq_unregister_page_list_cnt = 0;
+ priv->adminq_create_tx_queue_cnt = 0;
+ priv->adminq_create_rx_queue_cnt = 0;
+ priv->adminq_destroy_tx_queue_cnt = 0;
+ priv->adminq_destroy_rx_queue_cnt = 0;
+ priv->adminq_dcfg_device_resources_cnt = 0;
+ priv->adminq_set_driver_parameter_cnt = 0;
+ priv->adminq_report_stats_cnt = 0;
/* Setup Admin queue with the device */
iowrite32be(priv->adminq_bus_addr / PAGE_SIZE,
@@ -82,17 +95,18 @@
return false;
}
-static int gve_adminq_parse_err(struct device *dev, u32 status)
+static int gve_adminq_parse_err(struct gve_priv *priv, u32 status)
{
if (status != GVE_ADMINQ_COMMAND_PASSED &&
- status != GVE_ADMINQ_COMMAND_UNSET)
- dev_err(dev, "AQ command failed with status %d\n", status);
-
+ status != GVE_ADMINQ_COMMAND_UNSET) {
+ dev_err(&priv->pdev->dev, "AQ command failed with status %d\n", status);
+ priv->adminq_cmd_fail++;
+ }
switch (status) {
case GVE_ADMINQ_COMMAND_PASSED:
return 0;
case GVE_ADMINQ_COMMAND_UNSET:
- dev_err(dev, "parse_aq_err: err and status both unset, this should not be possible.\n");
+ dev_err(&priv->pdev->dev, "parse_aq_err: err and status both unset, this should not be possible.\n");
return -EINVAL;
case GVE_ADMINQ_COMMAND_ERROR_ABORTED:
case GVE_ADMINQ_COMMAND_ERROR_CANCELLED:
@@ -117,42 +131,144 @@
case GVE_ADMINQ_COMMAND_ERROR_UNIMPLEMENTED:
return -ENOTSUPP;
default:
- dev_err(dev, "parse_aq_err: unknown status code %d\n", status);
+ dev_err(&priv->pdev->dev, "parse_aq_err: unknown status code %d\n", status);
return -EINVAL;
}
}
+/* Flushes all AQ commands currently queued and waits for them to complete.
+ * If there are failures, it will return the first error.
+ */
+static int gve_adminq_kick_and_wait(struct gve_priv *priv)
+{
+ u32 tail, head;
+ int i;
+
+ tail = ioread32be(&priv->reg_bar0->adminq_event_counter);
+ head = priv->adminq_prod_cnt;
+
+ gve_adminq_kick_cmd(priv, head);
+ if (!gve_adminq_wait_for_cmd(priv, head)) {
+ dev_err(&priv->pdev->dev, "AQ commands timed out, need to reset AQ\n");
+ priv->adminq_timeouts++;
+ return -ENOTRECOVERABLE;
+ }
+
+ for (i = tail; i < head; i++) {
+ union gve_adminq_command *cmd;
+ u32 status, err;
+
+ cmd = &priv->adminq[i & priv->adminq_mask];
+ status = be32_to_cpu(READ_ONCE(cmd->status));
+ err = gve_adminq_parse_err(priv, status);
+ if (err)
+ // Return the first error if we failed.
+ return err;
+ }
+
+ return 0;
+}
+
/* This function is not threadsafe - the caller is responsible for any
* necessary locks.
*/
-int gve_adminq_execute_cmd(struct gve_priv *priv,
- union gve_adminq_command *cmd_orig)
+static int gve_adminq_issue_cmd(struct gve_priv *priv,
+ union gve_adminq_command *cmd_orig)
{
union gve_adminq_command *cmd;
- u32 status = 0;
- u32 prod_cnt;
+ u32 tail;
+ u32 opcode;
+
+ tail = ioread32be(&priv->reg_bar0->adminq_event_counter);
+
+ // Check if next command will overflow the buffer.
+ if (((priv->adminq_prod_cnt + 1) & priv->adminq_mask) == tail) {
+ int err;
+
+ // Flush existing commands to make room.
+ err = gve_adminq_kick_and_wait(priv);
+ if (err)
+ return err;
+
+ // Retry.
+ tail = ioread32be(&priv->reg_bar0->adminq_event_counter);
+ if (((priv->adminq_prod_cnt + 1) & priv->adminq_mask) == tail) {
+ // This should never happen. We just flushed the
+ // command queue so there should be enough space.
+ return -ENOMEM;
+ }
+ }
cmd = &priv->adminq[priv->adminq_prod_cnt & priv->adminq_mask];
priv->adminq_prod_cnt++;
- prod_cnt = priv->adminq_prod_cnt;
memcpy(cmd, cmd_orig, sizeof(*cmd_orig));
+ opcode = be32_to_cpu(READ_ONCE(cmd->opcode));
- gve_adminq_kick_cmd(priv, prod_cnt);
- if (!gve_adminq_wait_for_cmd(priv, prod_cnt)) {
- dev_err(&priv->pdev->dev, "AQ command timed out, need to reset AQ\n");
- return -ENOTRECOVERABLE;
+ switch(opcode) {
+ case GVE_ADMINQ_DESCRIBE_DEVICE:
+ priv->adminq_describe_device_cnt++;
+ break;
+ case GVE_ADMINQ_CONFIGURE_DEVICE_RESOURCES:
+ priv->adminq_cfg_device_resources_cnt++;
+ break;
+ case GVE_ADMINQ_REGISTER_PAGE_LIST:
+ priv->adminq_register_page_list_cnt++;
+ break;
+ case GVE_ADMINQ_UNREGISTER_PAGE_LIST:
+ priv->adminq_unregister_page_list_cnt++;
+ break;
+ case GVE_ADMINQ_CREATE_TX_QUEUE:
+ priv->adminq_create_tx_queue_cnt++;
+ break;
+ case GVE_ADMINQ_CREATE_RX_QUEUE:
+ priv->adminq_create_rx_queue_cnt++;
+ break;
+ case GVE_ADMINQ_DESTROY_TX_QUEUE:
+ priv->adminq_destroy_tx_queue_cnt++;
+ break;
+ case GVE_ADMINQ_DESTROY_RX_QUEUE:
+ priv->adminq_destroy_rx_queue_cnt++;
+ break;
+ case GVE_ADMINQ_DECONFIGURE_DEVICE_RESOURCES:
+ priv->adminq_dcfg_device_resources_cnt++;
+ break;
+ case GVE_ADMINQ_SET_DRIVER_PARAMETER:
+ priv->adminq_set_driver_parameter_cnt++;
+ break;
+ case GVE_ADMINQ_REPORT_STATS:
+ priv->adminq_report_stats_cnt++;
+ break;
+ default:
+ dev_err(&priv->pdev->dev, "unknown AQ command opcode %d\n", opcode);
}
- memcpy(cmd_orig, cmd, sizeof(*cmd));
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,20,0)
- status = be32_to_cpu(READ_ONCE(cmd->status));
-#else /* LINUX_VERSION_CODE < KERNEL_VERSION(3,20,0) */
- status = be32_to_cpu(ACCESS_ONCE(cmd->status));
-#endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(3,20,0) */
- return gve_adminq_parse_err(&priv->pdev->dev, status);
+ return 0;
}
+/* This function is not threadsafe - the caller is responsible for any
+ * necessary locks.
+ * The caller is also responsible for making sure there are no commands
+ * waiting to be executed.
+ */
+static int gve_adminq_execute_cmd(struct gve_priv *priv,
+ union gve_adminq_command *cmd_orig)
+{
+ u32 tail, head;
+ int err;
+
+ tail = ioread32be(&priv->reg_bar0->adminq_event_counter);
+ head = priv->adminq_prod_cnt;
+ if (tail != head)
+ // This is not a valid path
+ return -EINVAL;
+
+ err = gve_adminq_issue_cmd(priv, cmd_orig);
+ if (err)
+ return err;
+
+ return gve_adminq_kick_and_wait(priv);
+}
/* The device specifies that the management vector can either be the first irq
* or the last irq. ntfy_blk_msix_base_idx indicates the first irq assigned to
* the ntfy blks. It if is 0 then the management vector is last, if it is 1 then
@@ -177,7 +293,7 @@
.num_counters = cpu_to_be32(num_counters),
.irq_db_addr = cpu_to_be64(db_array_bus_addr),
.num_irq_dbs = cpu_to_be32(num_ntfy_blks),
- .irq_db_stride = cpu_to_be32(sizeof(priv->ntfy_blocks[0])),
+ .irq_db_stride = cpu_to_be32(sizeof(*priv->irq_db_indices)),
.ntfy_blk_msix_base_idx =
cpu_to_be32(GVE_NTFY_BLK_BASE_MSIX_IDX),
};
@@ -195,80 +311,120 @@
return gve_adminq_execute_cmd(priv, &cmd);
}
-int gve_adminq_create_tx_queue(struct gve_priv *priv, u32 queue_index)
+int gve_adminq_create_tx_queues(struct gve_priv *priv, u32 num_queues)
{
- struct gve_tx_ring *tx = &priv->tx[queue_index];
union gve_adminq_command cmd;
+ struct gve_tx_ring *tx;
+ u32 qpl_id;
+ int err;
+ int i;
- memset(&cmd, 0, sizeof(cmd));
- cmd.opcode = cpu_to_be32(GVE_ADMINQ_CREATE_TX_QUEUE);
- cmd.create_tx_queue = (struct gve_adminq_create_tx_queue) {
- .queue_id = cpu_to_be32(queue_index),
- .reserved = 0,
- .queue_resources_addr = cpu_to_be64(tx->q_resources_bus),
- .tx_ring_addr = cpu_to_be64(tx->bus),
- .queue_page_list_id = cpu_to_be32(tx->tx_fifo.qpl->id),
- .ntfy_id = cpu_to_be32(tx->ntfy_id),
- };
+ for (i = 0; i < num_queues; i++) {
+ tx = &priv->tx[i];
+ qpl_id = priv->raw_addressing ? GVE_RAW_ADDRESSING_QPL_ID :
+ tx->tx_fifo.qpl->id;
+ memset(&cmd, 0, sizeof(cmd));
+ cmd.opcode = cpu_to_be32(GVE_ADMINQ_CREATE_TX_QUEUE);
+ cmd.create_tx_queue = (struct gve_adminq_create_tx_queue) {
+ .queue_id = cpu_to_be32(i),
+ .reserved = 0,
+ .queue_resources_addr =
+ cpu_to_be64(tx->q_resources_bus),
+ .tx_ring_addr = cpu_to_be64(tx->bus),
+ .queue_page_list_id = cpu_to_be32(qpl_id),
+ .ntfy_id = cpu_to_be32(tx->ntfy_id),
+ };
+ err = gve_adminq_issue_cmd(priv, &cmd);
+ if (err)
+ return err;
+ }
- return gve_adminq_execute_cmd(priv, &cmd);
+ return gve_adminq_kick_and_wait(priv);
}
-int gve_adminq_create_rx_queue(struct gve_priv *priv, u32 queue_index)
+int gve_adminq_create_rx_queues(struct gve_priv *priv, u32 num_queues)
{
- struct gve_rx_ring *rx = &priv->rx[queue_index];
union gve_adminq_command cmd;
+ struct gve_rx_ring *rx;
+ u32 qpl_id;
+ int err;
+ int i;
- memset(&cmd, 0, sizeof(cmd));
- cmd.opcode = cpu_to_be32(GVE_ADMINQ_CREATE_RX_QUEUE);
- cmd.create_rx_queue = (struct gve_adminq_create_rx_queue) {
- .queue_id = cpu_to_be32(queue_index),
- .index = cpu_to_be32(queue_index),
- .reserved = 0,
- .ntfy_id = cpu_to_be32(rx->ntfy_id),
- .queue_resources_addr = cpu_to_be64(rx->q_resources_bus),
- .rx_desc_ring_addr = cpu_to_be64(rx->desc.bus),
- .rx_data_ring_addr = cpu_to_be64(rx->data.data_bus),
- .queue_page_list_id = cpu_to_be32(rx->data.qpl->id),
- };
+ for (i = 0; i < num_queues; i++) {
+ rx = &priv->rx[i];
+ qpl_id = priv->raw_addressing ? GVE_RAW_ADDRESSING_QPL_ID :
+ rx->data.qpl->id;
+ memset(&cmd, 0, sizeof(cmd));
+ cmd.opcode = cpu_to_be32(GVE_ADMINQ_CREATE_RX_QUEUE);
+ cmd.create_rx_queue = (struct gve_adminq_create_rx_queue) {
+ .queue_id = cpu_to_be32(i),
+ .index = cpu_to_be32(i),
+ .reserved = 0,
+ .ntfy_id = cpu_to_be32(rx->ntfy_id),
+ .queue_resources_addr = cpu_to_be64(rx->q_resources_bus),
+ .rx_desc_ring_addr = cpu_to_be64(rx->desc.bus),
+ .rx_data_ring_addr = cpu_to_be64(rx->data.data_bus),
+ .queue_page_list_id = cpu_to_be32(qpl_id),
+ };
+ err = gve_adminq_issue_cmd(priv, &cmd);
+ if (err)
+ return err;
+ }
- return gve_adminq_execute_cmd(priv, &cmd);
+ return gve_adminq_kick_and_wait(priv);
}
-int gve_adminq_destroy_tx_queue(struct gve_priv *priv, u32 queue_index)
+int gve_adminq_destroy_tx_queues(struct gve_priv *priv, u32 num_queues)
{
union gve_adminq_command cmd;
+ int err;
+ int i;
- memset(&cmd, 0, sizeof(cmd));
- cmd.opcode = cpu_to_be32(GVE_ADMINQ_DESTROY_TX_QUEUE);
- cmd.destroy_tx_queue = (struct gve_adminq_destroy_tx_queue) {
- .queue_id = cpu_to_be32(queue_index),
- };
+ for (i = 0; i < num_queues; i++) {
+ memset(&cmd, 0, sizeof(cmd));
+ cmd.opcode = cpu_to_be32(GVE_ADMINQ_DESTROY_TX_QUEUE);
+ cmd.destroy_tx_queue = (struct gve_adminq_destroy_tx_queue) {
+ .queue_id = cpu_to_be32(i),
+ };
+ err = gve_adminq_issue_cmd(priv, &cmd);
+ if (err)
+ return err;
+ }
- return gve_adminq_execute_cmd(priv, &cmd);
+ return gve_adminq_kick_and_wait(priv);
}
-int gve_adminq_destroy_rx_queue(struct gve_priv *priv, u32 queue_index)
+int gve_adminq_destroy_rx_queues(struct gve_priv *priv, u32 num_queues)
{
union gve_adminq_command cmd;
+ int err;
+ int i;
- memset(&cmd, 0, sizeof(cmd));
- cmd.opcode = cpu_to_be32(GVE_ADMINQ_DESTROY_RX_QUEUE);
- cmd.destroy_rx_queue = (struct gve_adminq_destroy_rx_queue) {
- .queue_id = cpu_to_be32(queue_index),
- };
+ for (i = 0; i < num_queues; i++) {
+ memset(&cmd, 0, sizeof(cmd));
+ cmd.opcode = cpu_to_be32(GVE_ADMINQ_DESTROY_RX_QUEUE);
+ cmd.destroy_rx_queue = (struct gve_adminq_destroy_rx_queue) {
+ .queue_id = cpu_to_be32(i),
+ };
+ err = gve_adminq_issue_cmd(priv, &cmd);
+ if (err)
+ return err;
+ }
- return gve_adminq_execute_cmd(priv, &cmd);
+ return gve_adminq_kick_and_wait(priv);
}
int gve_adminq_describe_device(struct gve_priv *priv)
{
struct gve_device_descriptor *descriptor;
+ struct gve_device_option *dev_opt;
union gve_adminq_command cmd;
dma_addr_t descriptor_bus;
+ u16 num_options;
int err = 0;
u8 *mac;
u16 mtu;
+ int i;
memset(&cmd, 0, sizeof(cmd));
descriptor = dma_alloc_coherent(&priv->pdev->dev, PAGE_SIZE,
@@ -288,9 +444,9 @@
priv->tx_desc_cnt = be16_to_cpu(descriptor->tx_queue_entries);
if (priv->tx_desc_cnt * sizeof(priv->tx->desc[0]) < PAGE_SIZE) {
- netif_err(priv, drv, priv->dev, "Tx desc count %d too low\n",
- priv->tx_desc_cnt);
- err = -EINVAL;
+ dev_err(&priv->pdev->dev, "Tx desc count %d too low\n",
+ priv->tx_desc_cnt);
+ err = -EINVAL;
goto free_device_descriptor;
}
priv->rx_desc_cnt = be16_to_cpu(descriptor->rx_queue_entries);
@@ -298,8 +454,10 @@
< PAGE_SIZE ||
priv->rx_desc_cnt * sizeof(priv->rx->data.data_ring[0])
< PAGE_SIZE) {
- netif_err(priv, drv, priv->dev, "Rx desc count %d too low\n",
- priv->rx_desc_cnt);
+ dev_err(&priv->pdev->dev, "Rx desc count %d too low\n",
+ priv->rx_desc_cnt);
+ err = -EINVAL;
+
err = -EINVAL;
goto free_device_descriptor;
}
@@ -307,8 +465,9 @@
be64_to_cpu(descriptor->max_registered_pages);
mtu = be16_to_cpu(descriptor->mtu);
if (mtu < ETH_MIN_MTU) {
- netif_err(priv, drv, priv->dev, "MTU %d below minimum MTU\n",
- mtu);
+ dev_err(&priv->pdev->dev, "MTU %d below minimum MTU\n", mtu);
+ err = -EINVAL;
+
err = -EINVAL;
goto free_device_descriptor;
}
@@ -324,18 +483,64 @@
memcpy(priv->dev->dev_addr, descriptor->mac, ETH_ALEN);
#endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(3,14,0) */
mac = descriptor->mac;
- netif_info(priv, drv, priv->dev, "MAC addr: %pM\n", mac);
+ dev_info(&priv->pdev->dev, "MAC addr: %pM\n", mac);
priv->tx_pages_per_qpl = be16_to_cpu(descriptor->tx_pages_per_qpl);
- priv->rx_pages_per_qpl = be16_to_cpu(descriptor->rx_pages_per_qpl);
- if (priv->rx_pages_per_qpl < priv->rx_desc_cnt) {
- netif_err(priv, drv, priv->dev, "rx_pages_per_qpl cannot be smaller than rx_desc_cnt, setting rx_desc_cnt down to %d.\n",
- priv->rx_pages_per_qpl);
- priv->rx_desc_cnt = priv->rx_pages_per_qpl;
+ priv->rx_data_slot_cnt = be16_to_cpu(descriptor->rx_pages_per_qpl);
+ if (priv->rx_data_slot_cnt < priv->rx_desc_cnt) {
+ dev_err(&priv->pdev->dev, "rx_data_slot_cnt cannot be smaller than rx_desc_cnt, setting rx_desc_cnt down to %d.\n",
+ priv->rx_data_slot_cnt);
+ priv->rx_desc_cnt = priv->rx_data_slot_cnt;
}
priv->default_num_queues = be16_to_cpu(descriptor->default_num_queues);
+ dev_opt = (struct gve_device_option *)((void *)descriptor +
+ sizeof(*descriptor));
+
+ num_options = be16_to_cpu(descriptor->num_device_options);
+ for (i = 0; i < num_options; i++) {
+ u16 option_id;
+ u16 option_length;
+
+ if ((void *)dev_opt + sizeof(*dev_opt) > (void *)descriptor +
+ be16_to_cpu(descriptor->total_length)) {
+ dev_err(&priv->dev->dev,
+ "num_options in device_descriptor does not match total length.\n");
+ err = -EINVAL;
+ goto free_device_descriptor;
+ }
+
+ option_id = be16_to_cpu(dev_opt->option_id);
+ option_length = be16_to_cpu(dev_opt->option_length);
+ switch(option_id) {
+ case GVE_DEV_OPT_ID_RAW_ADDRESSING:
+ /* If the length or feature mask doesn't match,
+ * continue without enabling the feature.
+ */
+ if (option_length != GVE_DEV_OPT_LEN_RAW_ADDRESSING ||
+ be32_to_cpu(dev_opt->feat_mask) !=
+ GVE_DEV_OPT_FEAT_MASK_RAW_ADDRESSING) {
+ dev_info(&priv->pdev->dev,
+ "Raw addressing device option not enabled, length or features mask did not match expected.\n");
+ priv->raw_addressing = false;
+ } else {
+ dev_info(&priv->pdev->dev,
+ "Raw addressing device option enabled.\n");
+ priv->raw_addressing = true;
+ }
+ break;
+ default:
+ /* If we don't recognize the option just continue
+ * without doing anything.
+ */
+ dev_info(&priv->pdev->dev,
+ "Unrecognized device option 0x%hx not enabled.\n",
+ option_id);
+ break;
+ }
+ dev_opt = (void *)dev_opt + sizeof(*dev_opt) + option_length;
+ }
free_device_descriptor:
- dma_free_coherent(&priv->pdev->dev, sizeof(*descriptor), descriptor,
+ dma_free_coherent(&priv->pdev->dev, PAGE_SIZE, descriptor,
descriptor_bus);
return err;
}
@@ -398,3 +603,45 @@
return gve_adminq_execute_cmd(priv, &cmd);
}
+
+int gve_adminq_report_stats(struct gve_priv *priv, u64 stats_report_len,
+ dma_addr_t stats_report_addr, u64 interval)
+{
+ union gve_adminq_command cmd;
+
+ memset(&cmd, 0, sizeof(cmd));
+ cmd.opcode = cpu_to_be32(GVE_ADMINQ_REPORT_STATS);
+ cmd.report_stats = (struct gve_adminq_report_stats) {
+ .stats_report_len = cpu_to_be64(stats_report_len),
+ .stats_report_addr = cpu_to_be64(stats_report_addr),
+ .interval = cpu_to_be64(interval),
+ };
+
+ return gve_adminq_execute_cmd(priv, &cmd);
+}
+
+int gve_adminq_report_link_speed(struct gve_priv *priv)
+{
+ union gve_adminq_command gvnic_cmd;
+ dma_addr_t link_speed_region_bus;
+ u64* link_speed_region;
+ int err;
+
+ link_speed_region = dma_alloc_coherent(&priv->pdev->dev,
+ sizeof(*link_speed_region), &link_speed_region_bus, GFP_KERNEL);
+
+ if (!link_speed_region)
+ return -ENOMEM;
+
+ memset(&gvnic_cmd, 0, sizeof(gvnic_cmd));
+ gvnic_cmd.opcode = cpu_to_be32(GVE_ADMINQ_REPORT_LINK_SPEED);
+ gvnic_cmd.report_link_speed.link_speed_address =
+ cpu_to_be64(link_speed_region_bus);
+
+ err = gve_adminq_execute_cmd(priv, &gvnic_cmd);
+
+ priv->link_speed = be64_to_cpu(*link_speed_region);
+ dma_free_coherent(&priv->pdev->dev, sizeof(*link_speed_region),
+ link_speed_region, link_speed_region_bus);
+ return err;
+}
diff --git a/drivers/net/ethernet/google/gve/gve_adminq.h b/drivers/net/ethernet/google/gve/gve_adminq.h
index 4fa9771..0b12486 100644
--- a/drivers/net/ethernet/google/gve/gve_adminq.h
+++ b/drivers/net/ethernet/google/gve/gve_adminq.h
@@ -25,6 +25,8 @@
GVE_ADMINQ_DESTROY_RX_QUEUE = 0x8,
GVE_ADMINQ_DECONFIGURE_DEVICE_RESOURCES = 0x9,
GVE_ADMINQ_SET_DRIVER_PARAMETER = 0xB,
+ GVE_ADMINQ_REPORT_STATS = 0xC,
+ GVE_ADMINQ_REPORT_LINK_SPEED = 0xD
};
/* Admin queue status codes */
@@ -81,12 +83,17 @@
static_assert(sizeof(struct gve_device_descriptor) == 40);
-struct device_option {
- __be32 option_id;
- __be32 option_length;
+struct gve_device_option {
+ __be16 option_id;
+ __be16 option_length;
+ __be32 feat_mask;
};
-static_assert(sizeof(struct device_option) == 8);
+static_assert(sizeof(struct gve_device_option) == 8);
+
+#define GVE_DEV_OPT_ID_RAW_ADDRESSING 0x1
+#define GVE_DEV_OPT_LEN_RAW_ADDRESSING 0x0
+#define GVE_DEV_OPT_FEAT_MASK_RAW_ADDRESSING 0x0
struct gve_adminq_configure_device_resources {
__be64 counter_array;
@@ -113,6 +120,8 @@
static_assert(sizeof(struct gve_adminq_unregister_page_list) == 4);
+#define GVE_RAW_ADDRESSING_QPL_ID 0xFFFFFFFF
+
struct gve_adminq_create_tx_queue {
__be32 queue_id;
__be32 reserved;
@@ -176,6 +185,51 @@
static_assert(sizeof(struct gve_adminq_set_driver_parameter) == 16);
+struct gve_adminq_report_stats {
+ __be64 stats_report_len;
+ __be64 stats_report_addr;
+ __be64 interval;
+};
+
+static_assert(sizeof(struct gve_adminq_report_stats) == 24);
+
+struct gve_adminq_report_link_speed {
+ __be64 link_speed_address;
+};
+
+static_assert(sizeof(struct gve_adminq_report_link_speed) == 8);
+
+struct stats {
+ __be32 stat_name;
+ __be32 queue_id;
+ __be64 value;
+};
+
+static_assert(sizeof(struct stats) == 16);
+
+struct gve_stats_report {
+ __be64 written_count;
+ struct stats stats[0];
+};
+
+static_assert(sizeof(struct gve_stats_report) == 8);
+
+enum gve_stat_names {
+ // stats from gve
+ TX_WAKE_CNT = 1,
+ TX_STOP_CNT = 2,
+ TX_FRAMES_SENT = 3,
+ TX_BYTES_SENT = 4,
+ TX_LAST_COMPLETION_PROCESSED = 5,
+ RX_NEXT_EXPECTED_SEQUENCE = 6,
+ RX_BUFFERS_POSTED = 7,
+ // stats from NIC
+ RX_QUEUE_DROP_CNT = 65,
+ RX_NO_BUFFERS_POSTED = 66,
+ RX_DROPS_PACKET_OVER_MRU = 67,
+ RX_DROPS_INVALID_CHECKSUM = 68,
+};
+
union gve_adminq_command {
struct {
__be32 opcode;
@@ -191,6 +245,8 @@
struct gve_adminq_register_page_list reg_page_list;
struct gve_adminq_unregister_page_list unreg_page_list;
struct gve_adminq_set_driver_parameter set_driver_param;
+ struct gve_adminq_report_stats report_stats;
+ struct gve_adminq_report_link_speed report_link_speed;
};
};
u8 reserved[64];
@@ -201,8 +257,6 @@
int gve_adminq_alloc(struct device *dev, struct gve_priv *priv);
void gve_adminq_free(struct device *dev, struct gve_priv *priv);
void gve_adminq_release(struct gve_priv *priv);
-int gve_adminq_execute_cmd(struct gve_priv *priv,
- union gve_adminq_command *cmd_orig);
int gve_adminq_describe_device(struct gve_priv *priv);
int gve_adminq_configure_device_resources(struct gve_priv *priv,
dma_addr_t counter_array_bus_addr,
@@ -210,12 +264,15 @@
dma_addr_t db_array_bus_addr,
u32 num_ntfy_blks);
int gve_adminq_deconfigure_device_resources(struct gve_priv *priv);
-int gve_adminq_create_tx_queue(struct gve_priv *priv, u32 queue_id);
-int gve_adminq_destroy_tx_queue(struct gve_priv *priv, u32 queue_id);
-int gve_adminq_create_rx_queue(struct gve_priv *priv, u32 queue_id);
-int gve_adminq_destroy_rx_queue(struct gve_priv *priv, u32 queue_id);
+int gve_adminq_create_tx_queues(struct gve_priv *priv, u32 num_queues);
+int gve_adminq_destroy_tx_queues(struct gve_priv *priv, u32 queue_id);
+int gve_adminq_create_rx_queues(struct gve_priv *priv, u32 num_queues);
+int gve_adminq_destroy_rx_queues(struct gve_priv *priv, u32 queue_id);
int gve_adminq_register_page_list(struct gve_priv *priv,
struct gve_queue_page_list *qpl);
int gve_adminq_unregister_page_list(struct gve_priv *priv, u32 page_list_id);
int gve_adminq_set_mtu(struct gve_priv *priv, u64 mtu);
+int gve_adminq_report_stats(struct gve_priv *priv, u64 stats_report_len,
+ dma_addr_t stats_report_addr, u64 interval);
+int gve_adminq_report_link_speed(struct gve_priv *priv);
#endif /* _GVE_ADMINQ_H */
diff --git a/drivers/net/ethernet/google/gve/gve_desc.h b/drivers/net/ethernet/google/gve/gve_desc.h
index 87985d4..d4553fb 100644
--- a/drivers/net/ethernet/google/gve/gve_desc.h
+++ b/drivers/net/ethernet/google/gve/gve_desc.h
@@ -20,9 +20,11 @@
* Base addresses encoded in seg_addr are not assumed to be physical
* addresses. The ring format assumes these come from some linear address
* space. This could be physical memory, kernel virtual memory, user virtual
- * memory. gVNIC uses lists of registered pages. Each queue is assumed
- * to be associated with a single such linear address space to ensure a
- * consistent meaning for seg_addrs posted to its rings.
+ * memory.
+ * If raw dma addressing is not supported then gVNIC uses lists of registered
+ * pages. Each queue is assumed to be associated with a single such linear
+ * address space to ensure a consistent meaning for seg_addrs posted to its
+ * rings.
*/
struct gve_tx_pkt_desc {
@@ -76,12 +78,14 @@
} __packed;
static_assert(sizeof(struct gve_rx_desc) == 64);
-/* As with the Tx ring format, the qpl_offset entries below are offsets into an
- * ordered list of registered pages.
+/* If the device supports raw dma addressing then the addr in data slot is
+ * the dma address of the buffer.
+ * If the device only supports registered segments than the addr is a byte
+ * offset into the registered segment (an ordered list of pages) where the
+ * buffer is.
*/
struct gve_rx_data_slot {
- /* byte offset into the rx registered segment of this slot */
- __be64 qpl_offset;
+ __be64 addr;
};
/* GVE Recive Packet Descriptor Seq No */
diff --git a/drivers/net/ethernet/google/gve/gve_ethtool.c b/drivers/net/ethernet/google/gve/gve_ethtool.c
index 0764714..0ad957f 100644
--- a/drivers/net/ethernet/google/gve/gve_ethtool.c
+++ b/drivers/net/ethernet/google/gve/gve_ethtool.c
@@ -7,6 +7,7 @@
#include "gve_linux_version.h"
#include <linux/rtnetlink.h>
#include "gve.h"
+#include "gve_adminq.h"
static void gve_get_drvinfo(struct net_device *netdev,
struct ethtool_drvinfo *info)
@@ -33,43 +34,86 @@
}
static const char gve_gstrings_main_stats[][ETH_GSTRING_LEN] = {
- "rx_packets", "tx_packets", "rx_bytes", "tx_bytes",
- "rx_dropped", "tx_dropped", "tx_timeouts",
+ "rx_packets", "rx_total_bytes", "rx_total_dropped_pkt",
+ "rx_skb_alloc_fail", "rx_buf_alloc_fail", "rx_desc_err_dropped_pkt",
+ "tx_packets", "tx_total_bytes", "tx_total_dropped_pkt", "tx_timeouts",
+ "interface_up_cnt", "interface_down_cnt", "reset_cnt",
+ "page_alloc_fail", "dma_mapping_error",
+};
+
+static const char gve_gstrings_rx_stats[][ETH_GSTRING_LEN] = {
+ "rx_posted_desc[%u]", "rx_completed_desc[%u]", "rx_bytes[%u]",
+ "rx_dropped_pkt[%u]", "rx_copybreak_pkt[%u]", "rx_copied_pkt[%u]",
+ "rx_queue_drop_cnt[%u]", "rx_no_buffers_posted[%u]",
+ "rx_drops_packet_over_mru[%u]", "rx_drops_invalid_checksum[%u]",
+};
+
+static const char gve_gstrings_tx_stats[][ETH_GSTRING_LEN] = {
+ "tx_posted_desc[%u]", "tx_completed_desc[%u]", "tx_bytes[%u]",
+ "tx_wake[%u]", "tx_stop[%u]", "tx_event_counter[%u]",
+};
+
+static const char gve_gstrings_adminq_stats[][ETH_GSTRING_LEN] = {
+ "adminq_prod_cnt", "adminq_cmd_fail", "adminq_timeouts",
+ "adminq_describe_device_cnt", "adminq_cfg_device_resources_cnt",
+ "adminq_register_page_list_cnt", "adminq_unregister_page_list_cnt",
+ "adminq_create_tx_queue_cnt", "adminq_create_rx_queue_cnt",
+ "adminq_destroy_tx_queue_cnt", "adminq_destroy_rx_queue_cnt",
+ "adminq_dcfg_device_resources_cnt", "adminq_set_driver_parameter_cnt",
+ "adminq_report_stats_cnt",
+};
+
+static const char gve_gstrings_priv_flags[][ETH_GSTRING_LEN] = {
+ "report-stats",
};
#define GVE_MAIN_STATS_LEN ARRAY_SIZE(gve_gstrings_main_stats)
-#define NUM_GVE_TX_CNTS 5
-#define NUM_GVE_RX_CNTS 2
+#define GVE_ADMINQ_STATS_LEN ARRAY_SIZE(gve_gstrings_adminq_stats)
+#define NUM_GVE_TX_CNTS ARRAY_SIZE(gve_gstrings_tx_stats)
+#define NUM_GVE_RX_CNTS ARRAY_SIZE(gve_gstrings_rx_stats)
+#define GVE_PRIV_FLAGS_STR_LEN ARRAY_SIZE(gve_gstrings_priv_flags)
static void gve_get_strings(struct net_device *netdev, u32 stringset, u8 *data)
{
struct gve_priv *priv = netdev_priv(netdev);
char *s = (char *)data;
- int i;
+ int i, j;
- if (stringset != ETH_SS_STATS)
- return;
+ switch (stringset) {
+ case ETH_SS_STATS:
+ memcpy(s, *gve_gstrings_main_stats,
+ sizeof(gve_gstrings_main_stats));
+ s += sizeof(gve_gstrings_main_stats);
- memcpy(s, *gve_gstrings_main_stats,
- sizeof(gve_gstrings_main_stats));
- s += sizeof(gve_gstrings_main_stats);
- for (i = 0; i < priv->rx_cfg.num_queues; i++) {
- snprintf(s, ETH_GSTRING_LEN, "rx_desc_cnt[%u]", i);
- s += ETH_GSTRING_LEN;
- snprintf(s, ETH_GSTRING_LEN, "rx_desc_fill_cnt[%u]", i);
- s += ETH_GSTRING_LEN;
- }
- for (i = 0; i < priv->tx_cfg.num_queues; i++) {
- snprintf(s, ETH_GSTRING_LEN, "tx_req[%u]", i);
- s += ETH_GSTRING_LEN;
- snprintf(s, ETH_GSTRING_LEN, "tx_done[%u]", i);
- s += ETH_GSTRING_LEN;
- snprintf(s, ETH_GSTRING_LEN, "tx_wake[%u]", i);
- s += ETH_GSTRING_LEN;
- snprintf(s, ETH_GSTRING_LEN, "tx_stop[%u]", i);
- s += ETH_GSTRING_LEN;
- snprintf(s, ETH_GSTRING_LEN, "tx_event_counter[%u]", i);
- s += ETH_GSTRING_LEN;
+ for (i = 0; i < priv->rx_cfg.num_queues; i++) {
+ for (j = 0; j < NUM_GVE_RX_CNTS; j++) {
+ snprintf(s, ETH_GSTRING_LEN,
+ gve_gstrings_rx_stats[j], i);
+ s += ETH_GSTRING_LEN;
+ }
+ }
+
+ for (i = 0; i < priv->tx_cfg.num_queues; i++) {
+ for (j = 0; j < NUM_GVE_TX_CNTS; j++) {
+ snprintf(s, ETH_GSTRING_LEN,
+ gve_gstrings_tx_stats[j], i);
+ s += ETH_GSTRING_LEN;
+ }
+ }
+
+ memcpy(s, *gve_gstrings_adminq_stats,
+ sizeof(gve_gstrings_adminq_stats));
+ s += sizeof(gve_gstrings_adminq_stats);
+ break;
+
+ case ETH_SS_PRIV_FLAGS:
+ memcpy(s, *gve_gstrings_priv_flags,
+ sizeof(gve_gstrings_priv_flags));
+ s += sizeof(gve_gstrings_priv_flags);
+ break;
+
+ default:
+ break;
}
}
@@ -79,9 +123,11 @@
switch (sset) {
case ETH_SS_STATS:
- return GVE_MAIN_STATS_LEN +
+ return GVE_MAIN_STATS_LEN + GVE_ADMINQ_STATS_LEN +
(priv->rx_cfg.num_queues * NUM_GVE_RX_CNTS) +
(priv->tx_cfg.num_queues * NUM_GVE_TX_CNTS);
+ case ETH_SS_PRIV_FLAGS:
+ return GVE_PRIV_FLAGS_STR_LEN;
default:
return -EOPNOTSUPP;
}
@@ -91,27 +137,62 @@
gve_get_ethtool_stats(struct net_device *netdev,
struct ethtool_stats *stats, u64 *data)
{
+ u64 tmp_rx_pkts, tmp_rx_bytes, tmp_rx_skb_alloc_fail,
+ tmp_rx_buf_alloc_fail, tmp_rx_desc_err_dropped_pkt,
+ tmp_tx_pkts, tmp_tx_bytes;
+ u64 rx_pkts, rx_bytes, rx_skb_alloc_fail, rx_buf_alloc_fail,
+ rx_desc_err_dropped_pkt, tx_pkts, tx_bytes;
struct gve_priv *priv = netdev_priv(netdev);
- u64 rx_pkts, rx_bytes, tx_pkts, tx_bytes;
- unsigned int start;
+ int *rx_qid_to_stats_idx;
+ int *tx_qid_to_stats_idx;
+ struct stats *report_stats = priv->stats_report->stats;
+ int stats_idx, base_stats_idx, max_stats_idx;
+ bool skip_nic_stats;
+
+ unsigned int start;
int ring;
- int i;
+ int i, j;
ASSERT_RTNL();
#if (LINUX_VERSION_CODE < KERNEL_VERSION(4,11,0))
memset(data, 0, stats->n_stats * sizeof(*data));
#endif /* (LINUX_VERSION_CODE < KERNEL_VERSION(4,11,0)) */
- for (rx_pkts = 0, rx_bytes = 0, ring = 0;
- ring < priv->rx_cfg.num_queues; ring++) {
+ rx_qid_to_stats_idx = kmalloc_array(priv->rx_cfg.num_queues,
+ sizeof(int), GFP_KERNEL);
+ if (!rx_qid_to_stats_idx) {
+ return;
+ }
+ tx_qid_to_stats_idx = kmalloc_array(priv->tx_cfg.num_queues,
+ sizeof(int), GFP_KERNEL);
+ if (!tx_qid_to_stats_idx) {
+ kfree(rx_qid_to_stats_idx);
+ return;
+ }
+
+ for (rx_pkts = 0, rx_bytes = 0, rx_skb_alloc_fail = 0,
+ rx_buf_alloc_fail = 0, rx_desc_err_dropped_pkt = 0, ring = 0;
+ ring < priv->rx_cfg.num_queues; ring++) {
if (priv->rx) {
do {
+ struct gve_rx_ring *rx = &priv->rx[ring];
start =
u64_stats_fetch_begin(&priv->rx[ring].statss);
- rx_pkts += priv->rx[ring].rpackets;
- rx_bytes += priv->rx[ring].rbytes;
+ tmp_rx_pkts = rx->rpackets;
+ tmp_rx_bytes = rx->rbytes;
+ tmp_rx_skb_alloc_fail = rx->rx_skb_alloc_fail;
+ tmp_rx_buf_alloc_fail = rx->rx_buf_alloc_fail;
+ tmp_rx_desc_err_dropped_pkt =
+ rx->rx_desc_err_dropped_pkt;
+
} while (u64_stats_fetch_retry(&priv->rx[ring].statss,
start));
+ rx_pkts += tmp_rx_pkts;
+ rx_bytes += tmp_rx_bytes;
+ rx_skb_alloc_fail += tmp_rx_skb_alloc_fail;
+ rx_buf_alloc_fail += tmp_rx_buf_alloc_fail;
+ rx_desc_err_dropped_pkt += tmp_rx_desc_err_dropped_pkt;
+
}
}
for (tx_pkts = 0, tx_bytes = 0, ring = 0;
@@ -120,34 +201,110 @@
do {
start =
u64_stats_fetch_begin(&priv->tx[ring].statss);
- tx_pkts += priv->tx[ring].pkt_done;
- tx_bytes += priv->tx[ring].bytes_done;
+ tmp_tx_pkts = priv->tx[ring].pkt_done;
+ tmp_tx_bytes = priv->tx[ring].bytes_done;
} while (u64_stats_fetch_retry(&priv->tx[ring].statss,
start));
+ tx_pkts += tmp_tx_pkts;
+ tx_bytes += tmp_tx_bytes;
}
}
i = 0;
data[i++] = rx_pkts;
- data[i++] = tx_pkts;
data[i++] = rx_bytes;
+ /* total rx dropped packets */
+ data[i++] = rx_skb_alloc_fail + rx_buf_alloc_fail +
+ rx_desc_err_dropped_pkt;
+ data[i++] = rx_skb_alloc_fail;
+ data[i++] = rx_buf_alloc_fail;
+ data[i++] = rx_desc_err_dropped_pkt;
+ data[i++] = tx_pkts;
data[i++] = tx_bytes;
- /* Skip rx_dropped and tx_dropped */
- i += 2;
+ /* Skip tx_dropped */
+ i++;
data[i++] = priv->tx_timeo_cnt;
+ data[i++] = priv->interface_up_cnt;
+ data[i++] = priv->interface_down_cnt;
+ data[i++] = priv->reset_cnt;
+ data[i++] = priv->page_alloc_fail;
+ data[i++] = priv->dma_mapping_error;
i = GVE_MAIN_STATS_LEN;
+ /* For rx cross-reporting stats, start from nic rx stats in report */
+ base_stats_idx = GVE_TX_STATS_REPORT_NUM * priv->tx_cfg.num_queues +
+ GVE_RX_STATS_REPORT_NUM * priv->rx_cfg.num_queues;
+ max_stats_idx = NIC_RX_STATS_REPORT_NUM * priv->rx_cfg.num_queues +
+ base_stats_idx;
+ /* Preprocess the stats report for rx, map queue id to start index */
+ skip_nic_stats = false;
+ for (stats_idx = base_stats_idx; stats_idx < max_stats_idx;
+ stats_idx += NIC_RX_STATS_REPORT_NUM) {
+ u32 stat_name = be32_to_cpu(report_stats[stats_idx].stat_name);
+ u32 queue_id = be32_to_cpu(report_stats[stats_idx].queue_id);
+ if (stat_name == 0) {
+ /* no stats written by NIC yet */
+ skip_nic_stats = true;
+ break;
+ }
+ rx_qid_to_stats_idx[queue_id] = stats_idx;
+ }
/* walk RX rings */
if (priv->rx) {
for (ring = 0; ring < priv->rx_cfg.num_queues; ring++) {
struct gve_rx_ring *rx = &priv->rx[ring];
- data[i++] = rx->cnt;
data[i++] = rx->fill_cnt;
+ data[i++] = rx->cnt;
+ do {
+ start =
+ u64_stats_fetch_begin(&priv->rx[ring].statss);
+ tmp_rx_bytes = rx->rbytes;
+ tmp_rx_skb_alloc_fail = rx->rx_skb_alloc_fail;
+ tmp_rx_buf_alloc_fail = rx->rx_buf_alloc_fail;
+ tmp_rx_desc_err_dropped_pkt =
+ rx->rx_desc_err_dropped_pkt;
+ } while (u64_stats_fetch_retry(&priv->rx[ring].statss,
+ start));
+ data[i++] = tmp_rx_bytes;
+ /* rx dropped packets */
+ data[i++] = tmp_rx_skb_alloc_fail +
+ tmp_rx_buf_alloc_fail +
+ tmp_rx_desc_err_dropped_pkt;
+ data[i++] = rx->rx_copybreak_pkt;
+ data[i++] = rx->rx_copied_pkt;
+ /* stats from NIC */
+ if (skip_nic_stats) {
+ /* skip NIC rx stats */
+ i += NIC_RX_STATS_REPORT_NUM;
+ continue;
+ }
+ for (j = 0; j < NIC_RX_STATS_REPORT_NUM; j++) {
+ u64 value = be64_to_cpu(report_stats[
+ rx_qid_to_stats_idx[ring] + j].value);
+ data[i++] = value;
+ }
}
} else {
i += priv->rx_cfg.num_queues * NUM_GVE_RX_CNTS;
}
+ /* For tx cross-reporting stats, start from nic tx stats in report */
+ base_stats_idx = max_stats_idx;
+ max_stats_idx = NIC_TX_STATS_REPORT_NUM * priv->tx_cfg.num_queues +
+ max_stats_idx;
+ /* Preprocess the stats report for tx, map queue id to start index */
+ skip_nic_stats = false;
+ for (stats_idx = base_stats_idx; stats_idx < max_stats_idx;
+ stats_idx += NIC_TX_STATS_REPORT_NUM) {
+ u32 stat_name = be32_to_cpu(report_stats[stats_idx].stat_name);
+ u32 queue_id = be32_to_cpu(report_stats[stats_idx].queue_id);
+ if (stat_name == 0) {
+ /* no stats written by NIC yet */
+ skip_nic_stats = true;
+ break;
+ }
+ tx_qid_to_stats_idx[queue_id] = stats_idx;
+ }
/* walk TX rings */
if (priv->tx) {
for (ring = 0; ring < priv->tx_cfg.num_queues; ring++) {
@@ -155,14 +312,51 @@
data[i++] = tx->req;
data[i++] = tx->done;
+ do {
+ start =
+ u64_stats_fetch_begin(&priv->tx[ring].statss);
+ tmp_tx_bytes = tx->bytes_done;
+ } while (u64_stats_fetch_retry(&priv->tx[ring].statss,
+ start));
+ data[i++] = tmp_tx_bytes;
data[i++] = tx->wake_queue;
data[i++] = tx->stop_queue;
data[i++] = be32_to_cpu(gve_tx_load_event_counter(priv,
tx));
+ /* stats from NIC */
+ if (skip_nic_stats) {
+ /* skip NIC tx stats */
+ i += NIC_TX_STATS_REPORT_NUM;
+ continue;
+ }
+ for (j = 0; j < NIC_TX_STATS_REPORT_NUM; j++) {
+ u64 value = be64_to_cpu(report_stats[
+ tx_qid_to_stats_idx[ring] + j].value);
+ data[i++] = value;
+ }
}
} else {
i += priv->tx_cfg.num_queues * NUM_GVE_TX_CNTS;
}
+
+ kfree(rx_qid_to_stats_idx);
+ kfree(tx_qid_to_stats_idx);
+
+ /* AQ Stats */
+ data[i++] = priv->adminq_prod_cnt;
+ data[i++] = priv->adminq_cmd_fail;
+ data[i++] = priv->adminq_timeouts;
+ data[i++] = priv->adminq_describe_device_cnt;
+ data[i++] = priv->adminq_cfg_device_resources_cnt;
+ data[i++] = priv->adminq_register_page_list_cnt;
+ data[i++] = priv->adminq_unregister_page_list_cnt;
+ data[i++] = priv->adminq_create_tx_queue_cnt;
+ data[i++] = priv->adminq_create_rx_queue_cnt;
+ data[i++] = priv->adminq_destroy_tx_queue_cnt;
+ data[i++] = priv->adminq_destroy_rx_queue_cnt;
+ data[i++] = priv->adminq_dcfg_device_resources_cnt;
+ data[i++] = priv->adminq_set_driver_parameter_cnt;
+ data[i++] = priv->adminq_report_stats_cnt;
}
static void gve_get_channels(struct net_device *netdev,
@@ -234,6 +428,99 @@
return -EOPNOTSUPP;
}
+static int gve_get_tunable(struct net_device *netdev,
+ const struct ethtool_tunable *etuna, void *value)
+{
+ struct gve_priv *priv = netdev_priv(netdev);
+
+ switch (etuna->id) {
+ case ETHTOOL_RX_COPYBREAK:
+ *(u32 *)value = priv->rx_copybreak;
+ return 0;
+ default:
+ return -EINVAL;
+ }
+}
+
+static int gve_set_tunable(struct net_device *netdev,
+ const struct ethtool_tunable *etuna,
+ const void *value)
+{
+ struct gve_priv *priv = netdev_priv(netdev);
+ u32 len;
+
+ switch(etuna->id) {
+ case ETHTOOL_RX_COPYBREAK:
+ len = *(u32 *)value;
+ if (len > priv->dev->mtu) {
+ return -EINVAL;
+ }
+ priv->rx_copybreak = len;
+ return 0;
+ default:
+ return -EINVAL;
+ }
+}
+
+static u32 gve_get_priv_flags(struct net_device *netdev)
+{
+ struct gve_priv *priv = netdev_priv(netdev);
+ u32 i, ret_flags = 0;
+
+ for (i = 0; i < GVE_PRIV_FLAGS_STR_LEN; i++) {
+ if (priv->ethtool_flags & BIT(i)) {
+ ret_flags |= BIT(i);
+ }
+ }
+ return ret_flags;
+}
+
+static int gve_set_priv_flags(struct net_device *netdev, u32 flags)
+{
+ struct gve_priv *priv = netdev_priv(netdev);
+ u64 ori_flags, new_flags;
+ u32 i;
+
+ ori_flags = READ_ONCE(priv->ethtool_flags);
+ new_flags = ori_flags;
+
+ for (i = 0; i < GVE_PRIV_FLAGS_STR_LEN; i++) {
+ if (flags & BIT(i))
+ new_flags |= BIT(i);
+ else
+ new_flags &= ~(BIT(i));
+ priv->ethtool_flags = new_flags;
+ /* set report-stats */
+ if (strcmp(gve_gstrings_priv_flags[i], "report-stats") == 0) {
+ /* update the stats when user turns report-stats on */
+ if (flags & BIT(i))
+ gve_handle_report_stats(priv);
+ /* zero off gve stats when report-stats turned off */
+ if (!(flags & BIT(i)) && (ori_flags & BIT(i))) {
+ int tx_stats_num = GVE_TX_STATS_REPORT_NUM *
+ priv->tx_cfg.num_queues;
+ int rx_stats_num = GVE_RX_STATS_REPORT_NUM *
+ priv->rx_cfg.num_queues;
+ memset(priv->stats_report->stats, 0,
+ (tx_stats_num + rx_stats_num) *
+ sizeof(struct stats));
+ }
+ }
+ }
+
+ return 0;
+}
+
+static int gve_get_link_ksettings(struct net_device *netdev,
+ struct ethtool_link_ksettings *cmd)
+{
+ struct gve_priv *priv = netdev_priv(netdev);
+ int err = gve_adminq_report_link_speed(priv);
+
+ cmd->base.speed = priv->link_speed;
+ return err;
+}
+
const struct ethtool_ops gve_ethtool_ops = {
.get_drvinfo = gve_get_drvinfo,
.get_strings = gve_get_strings,
@@ -246,4 +533,9 @@
.get_link = ethtool_op_get_link,
.get_ringparam = gve_get_ringparam,
.reset = gve_user_reset,
+ .get_tunable = gve_get_tunable,
+ .set_tunable = gve_set_tunable,
+ .get_priv_flags = gve_get_priv_flags,
+ .set_priv_flags = gve_set_priv_flags,
+ .get_link_ksettings = gve_get_link_ksettings
};
diff --git a/drivers/net/ethernet/google/gve/gve_main.c b/drivers/net/ethernet/google/gve/gve_main.c
index d1fe9df..baad1b6 100644
--- a/drivers/net/ethernet/google/gve/gve_main.c
+++ b/drivers/net/ethernet/google/gve/gve_main.c
@@ -21,7 +21,7 @@
#define GVE_DEFAULT_RX_COPYBREAK (256)
#define DEFAULT_MSG_LEVEL (NETIF_MSG_DRV | NETIF_MSG_LINK)
-#define GVE_VERSION "1.0.1"
+#define GVE_VERSION "1.1.0"
#define GVE_VERSION_PREFIX "GVE-"
const char gve_version_str[] = GVE_VERSION;
@@ -86,6 +86,60 @@
priv->counter_array = NULL;
}
+void gve_service_task_schedule(struct gve_priv *priv)
+{
+ if (!gve_get_probe_in_progress(priv) &&
+ !gve_get_reset_in_progress(priv)) {
+ gve_set_do_report_stats(priv);
+ queue_work(priv->gve_wq, &priv->service_task);
+ }
+}
+
+static void gve_service_timer(struct timer_list *t)
+{
+ struct gve_priv *priv = from_timer(priv, t, service_timer);
+
+ mod_timer(&priv->service_timer,
+ round_jiffies(jiffies +
+ msecs_to_jiffies(priv->service_timer_period)));
+ gve_service_task_schedule(priv);
+}
+
+static int gve_alloc_stats_report(struct gve_priv *priv)
+{
+ int tx_stats_num, rx_stats_num;
+
+ tx_stats_num = (GVE_TX_STATS_REPORT_NUM + NIC_TX_STATS_REPORT_NUM) *
+ priv->tx_cfg.num_queues;
+ rx_stats_num = (GVE_RX_STATS_REPORT_NUM + NIC_RX_STATS_REPORT_NUM) *
+ priv->rx_cfg.num_queues;
+ priv->stats_report_len = sizeof(struct gve_stats_report) +
+ (tx_stats_num + rx_stats_num) *
+ sizeof(struct stats);
+ priv->stats_report =
+ dma_alloc_coherent(&priv->pdev->dev, priv->stats_report_len,
+ &priv->stats_report_bus, GFP_KERNEL);
+ if (!priv->stats_report)
+ return -ENOMEM;
+ /* Set up timer for periodic task */
+ timer_setup(&priv->service_timer, gve_service_timer, 0);
+ priv->service_timer_period = GVE_SERVICE_TIMER_PERIOD;
+ /* Start the service task timer */
+ mod_timer(&priv->service_timer,
+ round_jiffies(jiffies +
+ msecs_to_jiffies(priv->service_timer_period)));
+ return 0;
+}
+
+static void gve_free_stats_report(struct gve_priv *priv)
+{
+
+ del_timer_sync(&priv->service_timer);
+ dma_free_coherent(&priv->pdev->dev, priv->stats_report_len,
+ priv->stats_report, priv->stats_report_bus);
+ priv->stats_report = NULL;
+}
+
static irqreturn_t gve_mgmnt_intr(int irq, void *arg)
{
struct gve_priv *priv = arg;
@@ -241,15 +295,24 @@
dev_err(&priv->pdev->dev, "Did not receive management vector.\n");
goto abort_with_msix_enabled;
}
- priv->ntfy_blocks =
+
+ priv->irq_db_indices =
dma_alloc_coherent(&priv->pdev->dev,
priv->num_ntfy_blks *
- sizeof(*priv->ntfy_blocks),
- &priv->ntfy_block_bus, GFP_KERNEL);
- if (!priv->ntfy_blocks) {
+ sizeof(*priv->irq_db_indices),
+ &priv->irq_db_indices_bus, GFP_KERNEL);
+ if (!priv->irq_db_indices) {
err = -ENOMEM;
goto abort_with_mgmt_vector;
}
+
+ priv->ntfy_blocks = kvzalloc(priv->num_ntfy_blks *
+ sizeof(*priv->ntfy_blocks), GFP_KERNEL);
+ if (!priv->ntfy_blocks) {
+ err = -ENOMEM;
+ goto abort_with_irq_db_indices;
+ }
+
/* Setup the other blocks - the first n-1 vectors */
for (i = 0; i < priv->num_ntfy_blks; i++) {
struct gve_notify_block *block = &priv->ntfy_blocks[i];
@@ -267,6 +330,7 @@
}
irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector,
get_cpu_mask(i % active_cpus));
+ block->irq_db_index = &priv->irq_db_indices[i].index;
}
return 0;
abort_with_some_ntfy_blocks:
@@ -278,10 +342,13 @@
NULL);
free_irq(priv->msix_vectors[msix_idx].vector, block);
}
- dma_free_coherent(&priv->pdev->dev, priv->num_ntfy_blks *
- sizeof(*priv->ntfy_blocks),
- priv->ntfy_blocks, priv->ntfy_block_bus);
+ kvfree(priv->ntfy_blocks);
priv->ntfy_blocks = NULL;
+abort_with_irq_db_indices:
+ dma_free_coherent(&priv->pdev->dev, priv->num_ntfy_blks *
+ sizeof(*priv->irq_db_indices),
+ priv->irq_db_indices, priv->irq_db_indices_bus);
+ priv->irq_db_indices = NULL;
abort_with_mgmt_vector:
free_irq(priv->msix_vectors[priv->mgmt_msix_idx].vector, priv);
abort_with_msix_enabled:
@@ -305,10 +372,12 @@
NULL);
free_irq(priv->msix_vectors[msix_idx].vector, block);
}
- dma_free_coherent(&priv->pdev->dev,
- priv->num_ntfy_blks * sizeof(*priv->ntfy_blocks),
- priv->ntfy_blocks, priv->ntfy_block_bus);
+ kvfree(priv->ntfy_blocks);
priv->ntfy_blocks = NULL;
+ dma_free_coherent(&priv->pdev->dev, priv->num_ntfy_blks *
+ sizeof(*priv->irq_db_indices),
+ priv->irq_db_indices, priv->irq_db_indices_bus);
+ priv->irq_db_indices = NULL;
free_irq(priv->msix_vectors[priv->mgmt_msix_idx].vector, priv);
pci_disable_msix(priv->pdev);
kfree(priv->msix_vectors);
@@ -325,19 +394,30 @@
err = gve_alloc_notify_blocks(priv);
if (err)
goto abort_with_counter;
+ err = gve_alloc_stats_report(priv);
+ if (err)
+ goto abort_with_ntfy_blocks;
err = gve_adminq_configure_device_resources(priv,
priv->counter_array_bus,
priv->num_event_counters,
- priv->ntfy_block_bus,
+ priv->irq_db_indices_bus,
priv->num_ntfy_blks);
if (unlikely(err)) {
dev_err(&priv->pdev->dev,
"could not setup device_resources: err=%d\n", err);
err = -ENXIO;
- goto abort_with_ntfy_blocks;
+ goto abort_with_stats_report;
}
+ err = gve_adminq_report_stats(priv, priv->stats_report_len,
+ priv->stats_report_bus,
+ GVE_SERVICE_TIMER_PERIOD);
+ if (err)
+ dev_err(&priv->pdev->dev,
+ "Failed to report stats: err=%d\n", err);
gve_set_device_resources_ok(priv);
return 0;
+abort_with_stats_report:
+ gve_free_stats_report(priv);
abort_with_ntfy_blocks:
gve_free_notify_blocks(priv);
abort_with_counter:
@@ -353,6 +433,14 @@
/* Tell device its resources are being freed */
if (gve_get_device_resources_ok(priv)) {
+ /* detach the stats report */
+ err = gve_adminq_report_stats(priv, 0, 0x0,
+ GVE_SERVICE_TIMER_PERIOD);
+ if (err) {
+ dev_err(&priv->pdev->dev,
+ "Failed to detach stats report: err=%d\n", err);
+ gve_trigger_reset(priv);
+ }
err = gve_adminq_deconfigure_device_resources(priv);
if (err) {
dev_err(&priv->pdev->dev,
@@ -363,6 +451,7 @@
}
gve_free_counter_array(priv);
gve_free_notify_blocks(priv);
+ gve_free_stats_report(priv);
gve_clear_device_resources_ok(priv);
}
@@ -433,35 +522,37 @@
int err;
int i;
- for (i = 0; i < priv->tx_cfg.num_queues; i++) {
- err = gve_adminq_create_tx_queue(priv, i);
- if (err) {
- netif_err(priv, drv, priv->dev, "failed to create tx queue %d\n",
- i);
- /* This failure will trigger a reset - no need to clean
- * up
- */
- return err;
- }
- netif_dbg(priv, drv, priv->dev, "created tx queue %d\n", i);
- }
- for (i = 0; i < priv->rx_cfg.num_queues; i++) {
- err = gve_adminq_create_rx_queue(priv, i);
- if (err) {
- netif_err(priv, drv, priv->dev, "failed to create rx queue %d\n",
- i);
- /* This failure will trigger a reset - no need to clean
- * up
- */
- return err;
- }
- /* Rx data ring has been prefilled with packet buffers at
- * queue allocation time.
- * Write the doorbell to provide descriptor slots and packet
- * buffers to the NIC.
+ err = gve_adminq_create_tx_queues(priv, priv->tx_cfg.num_queues);
+ if (err) {
+ netif_err(priv, drv, priv->dev, "failed to create %d tx queues\n",
+ priv->tx_cfg.num_queues);
+ /* This failure will trigger a reset - no need to clean
+ * up
*/
+ return err;
+ }
+ netif_dbg(priv, drv, priv->dev, "created %d tx queues \n",
+ priv->tx_cfg.num_queues);
+
+ err = gve_adminq_create_rx_queues(priv, priv->rx_cfg.num_queues);
+ if (err) {
+ netif_err(priv, drv, priv->dev, "failed to create %d rx queues\n",
+ priv->rx_cfg.num_queues);
+ /* This failure will trigger a reset - no need to clean
+ * up
+ */
+ return err;
+ }
+ netif_dbg(priv, drv, priv->dev, "created %d rx queues \n",
+ priv->rx_cfg.num_queues);
+
+ /* Rx data ring has been prefilled with packet buffers at queue
+ * allocation time.
+ * Write the doorbell to provide descriptor slots and packet buffers
+ * to the NIC.
+ */
+ for (i = 0; i < priv->rx_cfg.num_queues; i++) {
gve_rx_write_doorbell(priv, &priv->rx[i]);
- netif_dbg(priv, drv, priv->dev, "created rx queue %d\n", i);
}
return 0;
@@ -530,34 +621,23 @@
static int gve_destroy_rings(struct gve_priv *priv)
{
int err;
- int i;
- for (i = 0; i < priv->tx_cfg.num_queues; i++) {
- err = gve_adminq_destroy_tx_queue(priv, i);
- if (err) {
- netif_err(priv, drv, priv->dev,
- "failed to destroy tx queue %d\n",
- i);
- /* This failure will trigger a reset - no need to clean
- * up
- */
- return err;
- }
- netif_dbg(priv, drv, priv->dev, "destroyed tx queue %d\n", i);
+ err = gve_adminq_destroy_tx_queues(priv, priv->tx_cfg.num_queues);
+ if (err) {
+ netif_err(priv, drv, priv->dev,
+ "failed to destroy tx queues\n");
+ /* This failure will trigger a reset - no need to clean up */
+ return err;
}
- for (i = 0; i < priv->rx_cfg.num_queues; i++) {
- err = gve_adminq_destroy_rx_queue(priv, i);
- if (err) {
- netif_err(priv, drv, priv->dev,
- "failed to destroy rx queue %d\n",
- i);
- /* This failure will trigger a reset - no need to clean
- * up
- */
- return err;
- }
- netif_dbg(priv, drv, priv->dev, "destroyed rx queue %d\n", i);
+ netif_dbg(priv, drv, priv->dev, "destroyed tx queues\n");
+ err = gve_adminq_destroy_rx_queues(priv, priv->rx_cfg.num_queues);
+ if (err) {
+ netif_err(priv, drv, priv->dev,
+ "failed to destroy rx queues\n");
+ /* This failure will trigger a reset - no need to clean up */
+ return err;
}
+ netif_dbg(priv, drv, priv->dev, "destroyed rx queues\n");
return 0;
}
@@ -586,15 +666,25 @@
}
}
-int gve_alloc_page(struct device *dev, struct page **page, dma_addr_t *dma,
- enum dma_data_direction dir)
+int gve_alloc_page(struct gve_priv* priv, struct device* dev,
+ struct page **page, dma_addr_t *dma,
+ enum dma_data_direction dir, gfp_t gfp_flags)
{
- *page = alloc_page(GFP_KERNEL);
- if (!page)
+ if (priv->dma_mask == 24)
+ gfp_flags |= GFP_DMA;
+ else if (priv->dma_mask == 32)
+ gfp_flags |= GFP_DMA32;
+
+ *page = alloc_page(gfp_flags);
+ if (!*page) {
+ priv->page_alloc_fail++;
return -ENOMEM;
+ }
*dma = dma_map_page(dev, *page, 0, PAGE_SIZE, dir);
if (dma_mapping_error(dev, *dma)) {
+ priv->dma_mapping_error++;
put_page(*page);
+ *page = NULL;
return -ENOMEM;
}
return 0;
@@ -636,9 +726,9 @@
return -ENOMEM;
for (i = 0; i < pages; i++) {
- err = gve_alloc_page(&priv->pdev->dev, &qpl->pages[i],
+ err = gve_alloc_page(priv, &priv->pdev->dev, &qpl->pages[i],
&qpl->page_buses[i],
- gve_qpl_dma_dir(priv, id));
+ gve_qpl_dma_dir(priv, id), GFP_KERNEL);
/* caller handles clean up */
if (err)
return -ENOMEM;
@@ -685,6 +775,10 @@
int i, j;
int err;
+ /* Raw addressing means no QPLs */
+ if (priv->raw_addressing)
+ return 0;
+
#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,12,0)
priv->qpls = kvzalloc(num_qpls * sizeof(*priv->qpls), GFP_KERNEL);
#else /* LINUX_VERSION_CODE >= KERNEL_VERSION(4,12,0) */
@@ -701,7 +795,7 @@
}
for (; i < num_qpls; i++) {
err = gve_alloc_queue_page_list(priv, i,
- priv->rx_pages_per_qpl);
+ priv->rx_data_slot_cnt);
if (err)
goto free_qpls;
}
@@ -732,6 +826,10 @@
int num_qpls = gve_num_tx_qpls(priv) + gve_num_rx_qpls(priv);
int i;
+ /* Raw addressing means no QPLs */
+ if (priv->raw_addressing)
+ return;
+
kfree(priv->qpl_cfg.qpl_id_map);
for (i = 0; i < num_qpls; i++)
@@ -783,7 +881,8 @@
gve_set_device_rings_ok(priv);
gve_turnup(priv);
- netif_carrier_on(dev);
+ queue_work(priv->gve_wq, &priv->service_task);
+ priv->interface_up_cnt++;
return 0;
free_rings:
@@ -825,6 +924,7 @@
gve_free_rings(priv);
gve_free_qpls(priv);
+ priv->interface_down_cnt++;
return 0;
err:
@@ -904,6 +1004,7 @@
netif_tx_disable(priv->dev);
gve_clear_napi_enabled(priv);
+ gve_clear_report_stats(priv);
}
static void gve_turnup(struct gve_priv *priv)
@@ -979,6 +1080,10 @@
dev_info(&priv->pdev->dev, "Device requested reset.\n");
gve_set_do_reset(priv);
}
+ if (GVE_DEVICE_STATUS_REPORT_STATS_MASK & status) {
+ dev_info(&priv->pdev->dev, "Device report stats on.\n");
+ gve_set_do_report_stats(priv);
+ }
}
static void gve_handle_reset(struct gve_priv *priv)
@@ -997,16 +1102,100 @@
}
}
-/* Handle NIC status register changes and reset requests */
+void gve_handle_report_stats(struct gve_priv *priv)
+{
+ int idx, stats_idx = 0, tx_bytes;
+ unsigned int start = 0;
+ struct stats *stats = priv->stats_report->stats;
+
+ if (!gve_get_report_stats(priv))
+ return;
+
+ be64_add_cpu(&priv->stats_report->written_count, 1);
+ /* tx stats */
+ if (priv->tx) {
+ for (idx = 0; idx < priv->tx_cfg.num_queues; idx++) {
+ do {
+ start = u64_stats_fetch_begin(&priv->tx[idx].statss);
+ tx_bytes = priv->tx[idx].bytes_done;
+ } while (u64_stats_fetch_retry(&priv->tx[idx].statss, start));
+ stats[stats_idx++] = (struct stats) {
+ .stat_name = cpu_to_be32(TX_WAKE_CNT),
+ .value = cpu_to_be64(priv->tx[idx].wake_queue),
+ .queue_id = cpu_to_be32(idx),
+ };
+ stats[stats_idx++] = (struct stats) {
+ .stat_name = cpu_to_be32(TX_STOP_CNT),
+ .value = cpu_to_be64(priv->tx[idx].stop_queue),
+ .queue_id = cpu_to_be32(idx),
+ };
+ stats[stats_idx++] = (struct stats) {
+ .stat_name = cpu_to_be32(TX_FRAMES_SENT),
+ .value = cpu_to_be64(priv->tx[idx].req),
+ .queue_id = cpu_to_be32(idx),
+ };
+ stats[stats_idx++] = (struct stats) {
+ .stat_name = cpu_to_be32(TX_BYTES_SENT),
+ .value = cpu_to_be64(tx_bytes),
+ .queue_id = cpu_to_be32(idx),
+ };
+ stats[stats_idx++] = (struct stats) {
+ .stat_name = cpu_to_be32(
+ TX_LAST_COMPLETION_PROCESSED),
+ .value = cpu_to_be64(priv->tx[idx].done),
+ .queue_id = cpu_to_be32(idx),
+ };
+ }
+ }
+ /* rx stats */
+ if (priv->rx) {
+ for (idx = 0; idx < priv->rx_cfg.num_queues; idx++) {
+ stats[stats_idx++] = (struct stats) {
+ .stat_name = cpu_to_be32(
+ RX_NEXT_EXPECTED_SEQUENCE),
+ .value = cpu_to_be64(priv->rx[idx].desc.seqno),
+ .queue_id = cpu_to_be32(idx),
+ };
+ stats[stats_idx++] = (struct stats) {
+ .stat_name = cpu_to_be32(RX_BUFFERS_POSTED),
+ .value = cpu_to_be64(priv->rx[0].fill_cnt),
+ .queue_id = cpu_to_be32(idx),
+ };
+ }
+ }
+}
+
+void gve_handle_link_status(struct gve_priv *priv, bool link_status)
+{
+ if (!gve_get_napi_enabled(priv))
+ return;
+
+ if (link_status == netif_carrier_ok(priv->dev))
+ return;
+
+ if (link_status) {
+ netif_carrier_on(priv->dev);
+ } else {
+ dev_info(&priv->pdev->dev, "Device link is down.\n");
+ netif_carrier_off(priv->dev);
+ }
+}
+
+/* Handle NIC status register changes, reset requests and report stats */
static void gve_service_task(struct work_struct *work)
{
struct gve_priv *priv = container_of(work, struct gve_priv,
service_task);
+ u32 status = ioread32be(&priv->reg_bar0->device_status);
- gve_handle_status(priv,
- ioread32be(&priv->reg_bar0->device_status));
+ gve_handle_status(priv, status);
gve_handle_reset(priv);
+ gve_handle_link_status(priv, GVE_DEVICE_STATUS_LINK_STATUS_MASK & status);
+ if (gve_get_do_report_stats(priv)) {
+ gve_handle_report_stats(priv);
+ gve_clear_do_report_stats(priv);
+ }
}
static int gve_init_priv(struct gve_priv *priv, bool skip_describe_device)
@@ -1025,6 +1214,7 @@
if (skip_describe_device)
goto setup_device;
+ priv->raw_addressing = false;
/* Get the initial information we need from the device */
err = gve_adminq_describe_device(priv);
if (err) {
@@ -1045,7 +1235,7 @@
#endif /* LINUX_VERSION_CODE < KERNEL_VERSION(4,10,0) */
err = gve_adminq_set_mtu(priv, priv->dev->mtu);
if (err) {
- netif_err(priv, drv, priv->dev, "Could not set mtu");
+ dev_err(&priv->pdev->dev, "Could not set mtu");
goto err;
}
}
@@ -1089,10 +1279,10 @@
priv->rx_cfg.num_queues);
}
- netif_info(priv, drv, priv->dev, "TX queues %d, RX queues %d\n",
- priv->tx_cfg.num_queues, priv->rx_cfg.num_queues);
- netif_info(priv, drv, priv->dev, "Max TX queues %d, Max RX queues %d\n",
- priv->tx_cfg.max_queues, priv->rx_cfg.max_queues);
+ dev_info(&priv->pdev->dev, "TX queues %d, RX queues %d\n",
+ priv->tx_cfg.num_queues, priv->rx_cfg.num_queues);
+ dev_info(&priv->pdev->dev, "Max TX queues %d, Max RX queues %d\n",
+ priv->tx_cfg.max_queues, priv->rx_cfg.max_queues);
setup_device:
err = gve_setup_device_resources(priv);
@@ -1172,6 +1362,9 @@
/* Set it all back up */
err = gve_reset_recovery(priv, was_up);
gve_clear_reset_in_progress(priv);
+ priv->reset_cnt++;
+ priv->interface_up_cnt = 0;
+ priv->interface_down_cnt = 0;
return err;
}
@@ -1199,6 +1392,7 @@
__be32 __iomem *db_bar;
struct gve_registers __iomem *reg_bar;
struct gve_priv *priv;
+ u8 dma_mask;
int err;
err = pci_enable_device(pdev);
@@ -1211,19 +1405,6 @@
pci_set_master(pdev);
- err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
- if (err) {
- dev_err(&pdev->dev, "Failed to set dma mask: err=%d\n", err);
- goto abort_with_pci_region;
- }
-
- err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
- if (err) {
- dev_err(&pdev->dev,
- "Failed to set consistent dma mask: err=%d\n", err);
- goto abort_with_pci_region;
- }
-
reg_bar = pci_iomap(pdev, GVE_REGISTER_BAR, 0);
if (!reg_bar) {
dev_err(&pdev->dev, "Failed to map pci bar!\n");
@@ -1238,10 +1419,28 @@
goto abort_with_reg_bar;
}
+ dma_mask = readb(®_bar->dma_mask);
+ // Default to 64 if the register isn't set
+ if (!dma_mask)
+ dma_mask = 64;
gve_write_version(®_bar->driver_version);
/* Get max queues to alloc etherdev */
- max_rx_queues = ioread32be(®_bar->max_tx_queues);
- max_tx_queues = ioread32be(®_bar->max_rx_queues);
+ max_tx_queues = ioread32be(®_bar->max_tx_queues);
+ max_rx_queues = ioread32be(®_bar->max_rx_queues);
+
+ err = pci_set_dma_mask(pdev, DMA_BIT_MASK(dma_mask));
+ if (err) {
+ dev_err(&pdev->dev, "Failed to set dma mask: err=%d\n", err);
+ goto abort_with_reg_bar;
+ }
+
+ err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(dma_mask));
+ if (err) {
+ dev_err(&pdev->dev,
+ "Failed to set consistent dma mask: err=%d\n", err);
+ goto abort_with_reg_bar;
+ }
+
/* Alloc and setup the netdev and priv */
dev = alloc_etherdev_mqs(sizeof(*priv), max_tx_queues, max_rx_queues);
if (!dev) {
@@ -1278,18 +1477,16 @@
priv->db_bar2 = db_bar;
priv->service_task_flags = 0x0;
priv->state_flags = 0x0;
+ priv->ethtool_flags = 0x0;
+ priv->dma_mask = dma_mask;
gve_set_probe_in_progress(priv);
- err = register_netdev(dev);
- if (err)
- goto abort_with_netdev;
-
priv->gve_wq = alloc_ordered_workqueue("gve", 0);
if (!priv->gve_wq) {
dev_err(&pdev->dev, "Could not allocate workqueue");
err = -ENOMEM;
- goto abort_while_registered;
+ goto abort_with_netdev;
}
INIT_WORK(&priv->service_task, gve_service_task);
priv->tx_cfg.max_queues = max_tx_queues;
@@ -1299,6 +1496,10 @@
if (err)
goto abort_with_wq;
+ err = register_netdev(dev);
+ if (err)
+ goto abort_with_wq;
+
dev_info(&pdev->dev, "GVE version %s\n", gve_version_str);
gve_clear_probe_in_progress(priv);
queue_work(priv->gve_wq, &priv->service_task);
@@ -1308,9 +1509,6 @@
abort_with_wq:
destroy_workqueue(priv->gve_wq);
-abort_while_registered:
- unregister_netdev(dev);
-
abort_with_netdev:
free_netdev(dev);
diff --git a/drivers/net/ethernet/google/gve/gve_register.h b/drivers/net/ethernet/google/gve/gve_register.h
index 84ab889..776c291 100644
--- a/drivers/net/ethernet/google/gve/gve_register.h
+++ b/drivers/net/ethernet/google/gve/gve_register.h
@@ -16,12 +16,14 @@
__be32 adminq_pfn;
__be32 adminq_doorbell;
__be32 adminq_event_counter;
- u8 reserved[3];
+ u8 reserved[2];
+ u8 dma_mask;
u8 driver_version;
};
enum gve_device_status_flags {
GVE_DEVICE_STATUS_RESET_MASK = BIT(1),
GVE_DEVICE_STATUS_LINK_STATUS_MASK = BIT(2),
+ GVE_DEVICE_STATUS_REPORT_STATS_MASK = BIT(3),
};
#endif /* _GVE_REGISTER_H_ */
diff --git a/drivers/net/ethernet/google/gve/gve_rx.c b/drivers/net/ethernet/google/gve/gve_rx.c
index 382e867..302f443 100644
--- a/drivers/net/ethernet/google/gve/gve_rx.c
+++ b/drivers/net/ethernet/google/gve/gve_rx.c
@@ -17,12 +17,22 @@
block->rx = NULL;
}
+static void gve_rx_free_buffer(struct device *dev,
+ struct gve_rx_slot_page_info *page_info,
+ struct gve_rx_data_slot *data_slot) {
+ dma_addr_t dma = (dma_addr_t)(be64_to_cpu(data_slot->addr) -
+ page_info->page_offset);
+
+ page_ref_sub(page_info->page, page_info->pagecnt_bias - 1);
+ gve_free_page(dev, page_info->page, dma, DMA_FROM_DEVICE);
+}
+
static void gve_rx_free_ring(struct gve_priv *priv, int idx)
{
struct gve_rx_ring *rx = &priv->rx[idx];
struct device *dev = &priv->pdev->dev;
size_t bytes;
- u32 slots;
+ u32 slots = rx->mask + 1;
gve_rx_remove_from_block(priv, idx);
@@ -34,11 +44,18 @@
rx->q_resources, rx->q_resources_bus);
rx->q_resources = NULL;
- gve_unassign_qpl(priv, rx->data.qpl->id);
- rx->data.qpl = NULL;
+ if (rx->data.raw_addressing) {
+ int i;
+
+ for (i = 0; i < slots; i++)
+ gve_rx_free_buffer(dev, &rx->data.page_info[i],
+ &rx->data.data_ring[i]);
+ } else {
+ gve_unassign_qpl(priv, rx->data.qpl->id);
+ rx->data.qpl = NULL;
+ }
kfree(rx->data.page_info);
- slots = rx->mask + 1;
bytes = sizeof(*rx->data.data_ring) * slots;
dma_free_coherent(dev, bytes, rx->data.data_ring,
rx->data.data_bus);
@@ -53,13 +70,17 @@
page_info->page = page;
page_info->page_offset = 0;
page_info->page_address = page_address(page);
- slot->qpl_offset = cpu_to_be64(addr);
+ slot->addr = cpu_to_be64(addr);
+ /* The page already has 1 ref */
+ page_ref_add(page, INT_MAX - 1);
+ page_info->pagecnt_bias = INT_MAX;
}
static int gve_prefill_rx_pages(struct gve_rx_ring *rx)
{
struct gve_priv *priv = rx->gve;
u32 slots;
+ int err;
int i;
/* Allocate one page per Rx queue slot. Each page is split into two
@@ -77,12 +98,31 @@
if (!rx->data.page_info)
return -ENOMEM;
- rx->data.qpl = gve_assign_rx_qpl(priv);
-
+ if (!rx->data.raw_addressing)
+ rx->data.qpl = gve_assign_rx_qpl(priv);
for (i = 0; i < slots; i++) {
- struct page *page = rx->data.qpl->pages[i];
- dma_addr_t addr = i * PAGE_SIZE;
+ struct page *page;
+ dma_addr_t addr;
+ if (rx->data.raw_addressing) {
+ err = gve_alloc_page(priv, &priv->pdev->dev, &page,
+ &addr, DMA_FROM_DEVICE,
+ GFP_KERNEL);
+ if (err) {
+ int j;
+
+ u64_stats_update_begin(&rx->statss);
+ rx->rx_buf_alloc_fail++;
+ u64_stats_update_end(&rx->statss);
+ for (j = 0; j < i; j++)
+ gve_free_page(&priv->pdev->dev, page,
+ addr, DMA_FROM_DEVICE);
+ return err;
+ }
+ } else {
+ page = rx->data.qpl->pages[i];
+ addr = i * PAGE_SIZE;
+ }
gve_setup_rx_buffer(&rx->data.page_info[i],
&rx->data.data_ring[i], addr, page);
}
@@ -116,8 +156,9 @@
rx->gve = priv;
rx->q_num = idx;
- slots = priv->rx_pages_per_qpl;
+ slots = priv->rx_data_slot_cnt;
rx->mask = slots - 1;
+ rx->data.raw_addressing = priv->raw_addressing;
/* alloc rx data ring */
bytes = sizeof(*rx->data.data_ring) * slots;
@@ -166,8 +207,8 @@
err = -ENOMEM;
goto abort_with_q_resources;
}
- rx->mask = slots - 1;
rx->cnt = 0;
+ rx->db_threshold = priv->rx_desc_cnt / 2;
rx->desc.seqno = 1;
gve_rx_add_to_block(priv, idx);
@@ -254,11 +295,11 @@
skb_copy_to_linear_data(skb, va, len);
skb->protocol = eth_type_trans(skb, dev);
+
return skb;
}
-static struct sk_buff *gve_rx_add_frags(struct net_device *dev,
- struct napi_struct *napi,
+static struct sk_buff *gve_rx_add_frags(struct napi_struct *napi,
struct gve_rx_slot_page_info *page_info,
u16 len)
{
@@ -274,14 +315,135 @@
return skb;
}
-static void gve_rx_flip_buff(struct gve_rx_slot_page_info *page_info,
- struct gve_rx_data_slot *data_ring)
+static int gve_rx_alloc_buffer(struct gve_priv *priv, struct device *dev,
+ struct gve_rx_slot_page_info *page_info,
+ struct gve_rx_data_slot *data_slot,
+ struct gve_rx_ring *rx)
{
- u64 addr = be64_to_cpu(data_ring->qpl_offset);
+ struct page *page;
+ dma_addr_t dma;
+ int err;
+ err = gve_alloc_page(priv, dev, &page, &dma, DMA_FROM_DEVICE,
+ GFP_ATOMIC);
+ if (err) {
+ u64_stats_update_begin(&rx->statss);
+ rx->rx_buf_alloc_fail++;
+ u64_stats_update_end(&rx->statss);
+ return err;
+ }
+
+ gve_setup_rx_buffer(page_info, data_slot, dma, page);
+ return 0;
+}
+
+static void gve_rx_flip_buffer(struct gve_rx_slot_page_info *page_info,
+ struct gve_rx_data_slot *data_slot)
+{
+ u64 addr = be64_to_cpu(data_slot->addr);
+
+ /* "flip" to other packet buffer on this page */
page_info->page_offset ^= PAGE_SIZE / 2;
addr ^= PAGE_SIZE / 2;
- data_ring->qpl_offset = cpu_to_be64(addr);
+ data_slot->addr = cpu_to_be64(addr);
+}
+
+static bool gve_rx_can_flip_buffers(struct net_device *netdev) {
+#if PAGE_SIZE == 4096
+ /* We can't flip a buffer if we can't fit a packet
+ * into half a page.
+ */
+ if (netdev->max_mtu + GVE_RX_PAD + ETH_HLEN > PAGE_SIZE / 2)
+ return false;
+ return true;
+#else
+ /* PAGE_SIZE != 4096 - don't try to reuse */
+ return false;
+#endif
+}
+
+static int gve_rx_can_recycle_buffer(struct gve_rx_slot_page_info *page_info)
+{
+ int pagecount = page_count(page_info->page);
+
+ /* This page is not being used by any SKBs - reuse */
+ if (pagecount == page_info->pagecnt_bias) {
+ return 1;
+ /* This page is still being used by an SKB - we can't reuse */
+ } else if (pagecount > page_info->pagecnt_bias) {
+ return 0;
+ } else {
+ WARN(pagecount < page_info->pagecnt_bias,
+ "Pagecount should never be less than the bias.");
+ return -1;
+ }
+}
+
+static void gve_rx_update_pagecnt_bias(struct gve_rx_slot_page_info *page_info)
+{
+ page_info->pagecnt_bias--;
+ if (page_info->pagecnt_bias == 0) {
+ int pagecount = page_count(page_info->page);
+
+ /* If we have run out of bias - set it back up to INT_MAX
+ * minus the existing refs.
+ */
+ page_info->pagecnt_bias = INT_MAX - (pagecount);
+ /* Set pagecount back up to max */
+ page_ref_add(page_info->page, INT_MAX - pagecount);
+ }
+}
+
+static struct sk_buff *
+gve_rx_raw_addressing(struct device *dev, struct net_device *netdev,
+ struct gve_rx_slot_page_info *page_info, u16 len,
+ struct napi_struct *napi,
+ struct gve_rx_data_slot *data_slot, bool can_flip)
+{
+ struct sk_buff *skb = gve_rx_add_frags(napi, page_info, len);
+
+ if (!skb)
+ return NULL;
+
+ /* Optimistically stop the kernel from freeing the page.
+ * We will check again in refill to determine if we need to alloc a
+ * new page.
+ */
+ gve_rx_update_pagecnt_bias(page_info);
+ page_info->can_flip = can_flip;
+
+ return skb;
+}
+
+static struct sk_buff *
+gve_rx_qpl(struct device *dev, struct net_device *netdev,
+ struct gve_rx_ring *rx, struct gve_rx_slot_page_info *page_info,
+ u16 len, struct napi_struct *napi,
+ struct gve_rx_data_slot *data_slot, bool recycle)
+{
+ struct sk_buff *skb;
+ /* if raw_addressing mode is not enabled gvnic can only receive into
+ * registered segments. If the buffer can't be recycled, our only
+ * choice is to copy the data out of it so that we can return it to the
+ * device.
+ */
+ if (recycle) {
+ skb = gve_rx_add_frags(napi, page_info, len);
+ /* No point in recycling if we didn't get the skb */
+ if (skb) {
+ /* Make sure the networking stack can't free the page */
+ gve_rx_update_pagecnt_bias(page_info);
+ gve_rx_flip_buffer(page_info, data_slot);
+ }
+ } else {
+ skb = gve_rx_copy(netdev, napi, page_info, len);
+ if (skb) {
+ u64_stats_update_begin(&rx->statss);
+ rx->rx_copied_pkt++;
+ u64_stats_update_end(&rx->statss);
+ }
+ }
+ return skb;
}
static bool gve_rx(struct gve_rx_ring *rx, struct gve_rx_desc *rx_desc,
@@ -290,67 +452,67 @@
struct gve_rx_slot_page_info *page_info;
struct gve_priv *priv = rx->gve;
struct napi_struct *napi = &priv->ntfy_blocks[rx->ntfy_id].napi;
- struct net_device *dev = priv->dev;
- struct sk_buff *skb;
- int pagecount;
+ struct net_device *netdev = priv->dev;
+ struct gve_rx_data_slot *data_slot;
+ struct sk_buff *skb = NULL;
+ dma_addr_t page_bus;
u16 len;
/* drop this packet */
- if (unlikely(rx_desc->flags_seq & GVE_RXF_ERR))
- return true;
+ if (unlikely(rx_desc->flags_seq & GVE_RXF_ERR)) {
+ u64_stats_update_begin(&rx->statss);
+ rx->rx_desc_err_dropped_pkt++;
+ u64_stats_update_end(&rx->statss);
+ return false;
+ }
len = be16_to_cpu(rx_desc->len) - GVE_RX_PAD;
page_info = &rx->data.page_info[idx];
- dma_sync_single_for_cpu(&priv->pdev->dev, rx->data.qpl->page_buses[idx],
+ data_slot = &rx->data.data_ring[idx];
+ page_bus = (rx->data.raw_addressing) ?
+ be64_to_cpu(data_slot->addr) - page_info->page_offset:
+ rx->data.qpl->page_buses[idx];
+ dma_sync_single_for_cpu(&priv->pdev->dev, page_bus,
PAGE_SIZE, DMA_FROM_DEVICE);
- /* gvnic can only receive into registered segments. If the buffer
- * can't be recycled, our only choice is to copy the data out of
- * it so that we can return it to the device.
- */
-
-#if PAGE_SIZE == 4096
if (len <= priv->rx_copybreak) {
/* Just copy small packets */
- skb = gve_rx_copy(dev, napi, page_info, len);
- goto have_skb;
- }
- if (unlikely(!gve_can_recycle_pages(dev))) {
- skb = gve_rx_copy(dev, napi, page_info, len);
- goto have_skb;
- }
- pagecount = page_count(page_info->page);
- if (pagecount == 1) {
- /* No part of this page is used by any SKBs; we attach
- * the page fragment to a new SKB and pass it up the
- * stack.
- */
- skb = gve_rx_add_frags(dev, napi, page_info, len);
- if (!skb)
- return true;
- /* Make sure the kernel stack can't release the page */
- get_page(page_info->page);
- /* "flip" to other packet buffer on this page */
- gve_rx_flip_buff(page_info, &rx->data.data_ring[idx]);
- } else if (pagecount >= 2) {
- /* We have previously passed the other half of this
- * page up the stack, but it has not yet been freed.
- */
- skb = gve_rx_copy(dev, napi, page_info, len);
+ skb = gve_rx_copy(netdev, napi, page_info, len);
+ if (skb) {
+ u64_stats_update_begin(&rx->statss);
+ rx->rx_copied_pkt++;
+ rx->rx_copybreak_pkt++;
+ u64_stats_update_end(&rx->statss);
+ }
} else {
- WARN(pagecount < 1, "Pagecount should never be < 1");
+ bool can_flip = gve_rx_can_flip_buffers(netdev);
+ int recycle = 0;
+
+ if (can_flip) {
+ recycle = gve_rx_can_recycle_buffer(page_info);
+ if (recycle < 0) {
+ gve_schedule_reset(priv);
+ return false;
+ }
+ }
+ if (rx->data.raw_addressing) {
+ skb = gve_rx_raw_addressing(&priv->pdev->dev, netdev,
+ page_info, len, napi,
+ data_slot,
+ can_flip && recycle);
+ } else {
+ skb = gve_rx_qpl(&priv->pdev->dev, netdev, rx,
+ page_info, len, napi, data_slot,
+ can_flip && recycle);
+ }
+ }
+
+ if (!skb) {
+ u64_stats_update_begin(&rx->statss);
+ rx->rx_skb_alloc_fail++;
+ u64_stats_update_end(&rx->statss);
return false;
}
-#else
- skb = gve_rx_copy(dev, napi, page_info, len);
-#endif
-
-have_skb:
- /* We didn't manage to allocate an skb but we haven't had any
- * reset worthy failures.
- */
- if (!skb)
- return true;
if (likely(feat & NETIF_F_RXCSUM)) {
/* NIC passes up the partial sum */
@@ -389,26 +551,77 @@
next_idx = rx->cnt & rx->mask;
desc = rx->desc.desc_ring + next_idx;
+ /* make sure we have synchronized the seq no with the device */
+ smp_mb();
flags_seq = desc->flags_seq;
- /* Make sure we have synchronized the seq no with the device */
- smp_rmb();
-
return (GVE_SEQNO(flags_seq) == rx->desc.seqno);
}
+static bool gve_rx_refill_buffers(struct gve_priv *priv, struct gve_rx_ring *rx)
+{
+ u32 fill_cnt = rx->fill_cnt;
+
+ while ((fill_cnt & rx->mask) != (rx->cnt & rx->mask)) {
+ u32 idx = fill_cnt & rx->mask;
+ struct gve_rx_slot_page_info *page_info =
+ &rx->data.page_info[idx];
+
+ if (page_info->can_flip) {
+ /* The other half of the page is free because it was
+ * free when we processed the descriptor. Flip to it.
+ */
+ struct gve_rx_data_slot *data_slot =
+ &rx->data.data_ring[idx];
+
+ gve_rx_flip_buffer(page_info, data_slot);
+ } else {
+ /* It is possible that the networking stack has already
+ * finished processing all outstanding packets in the buffer
+ * and it can be reused.
+ * Flipping is unceccessary here - if the networking stack still
+ * owns half the page it is impossible to tell which half. Either
+ * the whole page is free or it needs to be replaced.
+ */
+ int recycle = gve_rx_can_recycle_buffer(page_info);
+
+ if (recycle < 0) {
+ gve_schedule_reset(priv);
+ return false;
+ }
+ if (!recycle) {
+ /* We can't reuse the buffer - alloc a new one*/
+ struct gve_rx_data_slot *data_slot =
+ &rx->data.data_ring[idx];
+ struct device *dev = &priv->pdev->dev;
+
+ gve_rx_free_buffer(dev, page_info, data_slot);
+ page_info->page = NULL;
+ if (gve_rx_alloc_buffer(priv, dev, page_info,
+ data_slot, rx)) {
+ break;
+ }
+ }
+ }
+ fill_cnt++;
+ }
+ rx->fill_cnt = fill_cnt;
+ return true;
+}
+
bool gve_clean_rx_done(struct gve_rx_ring *rx, int budget,
netdev_features_t feat)
{
struct gve_priv *priv = rx->gve;
+ u32 work_done = 0, packets = 0;
struct gve_rx_desc *desc;
u32 cnt = rx->cnt;
u32 idx = cnt & rx->mask;
- u32 work_done = 0;
u64 bytes = 0;
desc = rx->desc.desc_ring + idx;
while ((GVE_SEQNO(desc->flags_seq) == rx->desc.seqno) &&
work_done < budget) {
+ bool dropped;
netif_info(priv, rx_status, priv->dev,
"[%d] idx=%d desc=%p desc->flags_seq=0x%x\n",
rx->q_num, idx, desc, desc->flags_seq);
@@ -416,9 +629,11 @@
"[%d] seqno=%d rx->desc.seqno=%d\n",
rx->q_num, GVE_SEQNO(desc->flags_seq),
rx->desc.seqno);
- bytes += be16_to_cpu(desc->len) - GVE_RX_PAD;
- if (!gve_rx(rx, desc, feat, idx))
- gve_schedule_reset(priv);
+ dropped = !gve_rx(rx, desc, feat, idx);
+ if (!dropped) {
+ bytes += be16_to_cpu(desc->len) - GVE_RX_PAD;
+ packets++;
+ }
cnt++;
idx = cnt & rx->mask;
desc = rx->desc.desc_ring + idx;
@@ -430,20 +645,28 @@
return false;
u64_stats_update_begin(&rx->statss);
- rx->rpackets += work_done;
+ rx->rpackets += packets;
rx->rbytes += bytes;
u64_stats_update_end(&rx->statss);
rx->cnt = cnt;
- rx->fill_cnt += work_done;
- /* restock desc ring slots */
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,19,0)
- dma_wmb();
-#else /* LINUX_VERSION_CODE >= KERNEL_VERSION(3,19,0) */
- wmb();
-#endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(3,19,0) */
- /* Ensure descs are visible before ringing doorbell */
- gve_rx_write_doorbell(priv, rx);
+ /* restock ring slots */
+ if (!rx->data.raw_addressing) {
+ /* In QPL mode buffs are refilled as the desc are processed */
+ rx->fill_cnt += work_done;
+ dma_wmb();/* Ensure descs are visible before ringing doorbell */
+ gve_rx_write_doorbell(priv, rx);
+ } else if (rx->fill_cnt - cnt <= rx->db_threshold) {
+ /* In raw addressing mode buffs are only refilled if the avail
+ * falls below a threshold.
+ */
+ if(!gve_rx_refill_buffers(priv, rx))
+ return false;
+ /* restock desc ring slots */
+ dma_wmb();/* Ensure descs are visible before ringing doorbell */
+ gve_rx_write_doorbell(priv, rx);
+ }
+
return gve_rx_work_pending(rx);
}
diff --git a/drivers/net/ethernet/google/gve/gve_tx.c b/drivers/net/ethernet/google/gve/gve_tx.c
index 7f43ef2c..cf66eb4 100644
--- a/drivers/net/ethernet/google/gve/gve_tx.c
+++ b/drivers/net/ethernet/google/gve/gve_tx.c
@@ -159,9 +159,11 @@
tx->q_resources, tx->q_resources_bus);
tx->q_resources = NULL;
- gve_tx_fifo_release(priv, &tx->tx_fifo);
- gve_unassign_qpl(priv, tx->tx_fifo.qpl->id);
- tx->tx_fifo.qpl = NULL;
+ if (!tx->raw_addressing) {
+ gve_tx_fifo_release(priv, &tx->tx_fifo);
+ gve_unassign_qpl(priv, tx->tx_fifo.qpl->id);
+ tx->tx_fifo.qpl = NULL;
+ }
bytes = sizeof(*tx->desc) * slots;
dma_free_coherent(hdev, bytes, tx->desc, tx->bus);
@@ -175,12 +177,16 @@
static void gve_tx_add_to_block(struct gve_priv *priv, int queue_idx)
{
+ unsigned int active_cpus = min_t(int, priv->num_ntfy_blks / 2,
+ num_online_cpus());
int ntfy_idx = gve_tx_idx_to_ntfy(priv, queue_idx);
struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
struct gve_tx_ring *tx = &priv->tx[queue_idx];
block->tx = tx;
tx->ntfy_id = ntfy_idx;
+ netif_set_xps_queue(priv->dev, get_cpu_mask(ntfy_idx % active_cpus),
+ queue_idx);
}
static int gve_tx_alloc_ring(struct gve_priv *priv, int idx)
@@ -207,11 +213,15 @@
if (!tx->desc)
goto abort_with_info;
- tx->tx_fifo.qpl = gve_assign_tx_qpl(priv);
+ tx->raw_addressing = priv->raw_addressing;
+ tx->dev = &priv->pdev->dev;
+ if (!tx->raw_addressing) {
+ tx->tx_fifo.qpl = gve_assign_tx_qpl(priv);
- /* map Tx FIFO */
- if (gve_tx_fifo_init(priv, &tx->tx_fifo))
- goto abort_with_desc;
+ /* map Tx FIFO */
+ if (gve_tx_fifo_init(priv, &tx->tx_fifo))
+ goto abort_with_desc;
+ }
tx->q_resources =
dma_alloc_coherent(hdev,
@@ -229,7 +239,8 @@
return 0;
abort_with_fifo:
- gve_tx_fifo_release(priv, &tx->tx_fifo);
+ if (!tx->raw_addressing)
+ gve_tx_fifo_release(priv, &tx->tx_fifo);
abort_with_desc:
dma_free_coherent(hdev, bytes, tx->desc, tx->bus);
tx->desc = NULL;
@@ -307,22 +318,44 @@
* payload wraps to the beginning of the FIFO.
*/
#define MAX_TX_DESC_NEEDED 3
+static void gve_tx_unmap_buf(struct device *dev,
+ struct gve_tx_dma_buf *buf)
+{
+ const int buf_len = (int)dma_unmap_len(buf, len);
+ if (buf_len > 0) {
+ dma_unmap_single(dev, dma_unmap_addr(buf, dma),
+ dma_unmap_len(buf, len),
+ DMA_TO_DEVICE);
+ dma_unmap_len_set(buf, len, 0);
+ } else if (buf_len < 0) {
+ dma_unmap_page(dev, dma_unmap_addr(buf, dma),
+ -dma_unmap_len(buf, len),
+ DMA_TO_DEVICE);
+ dma_unmap_len_set(buf, len, 0);
+ }
+}
/* Check if sufficient resources (descriptor ring space, FIFO space) are
* available to transmit the given number of bytes.
*/
static inline bool gve_can_tx(struct gve_tx_ring *tx, int bytes_required)
{
- return (gve_tx_avail(tx) >= MAX_TX_DESC_NEEDED &&
- gve_tx_fifo_can_alloc(&tx->tx_fifo, bytes_required));
+ bool can_alloc = true;
+
+ if (!tx->raw_addressing)
+ can_alloc = gve_tx_fifo_can_alloc(&tx->tx_fifo, bytes_required);
+
+ return (gve_tx_avail(tx) >= MAX_TX_DESC_NEEDED && can_alloc);
}
/* Stops the queue if the skb cannot be transmitted. */
static int gve_maybe_stop_tx(struct gve_tx_ring *tx, struct sk_buff *skb)
{
- int bytes_required;
+ int bytes_required = 0;
- bytes_required = gve_skb_fifo_bytes_required(tx, skb);
+ if (!tx->raw_addressing)
+ bytes_required = gve_skb_fifo_bytes_required(tx, skb);
+
if (likely(gve_can_tx(tx, bytes_required)))
return 0;
@@ -391,8 +424,9 @@
seg_desc->seg.seg_addr = cpu_to_be64(addr);
}
-static inline void gve_dma_sync_for_device(struct gve_priv *priv, dma_addr_t *page_buses,
- u64 iov_offset, u64 iov_len)
+static inline void gve_dma_sync_for_device(struct gve_priv *priv,
+ dma_addr_t *page_buses,
+ u64 iov_offset, u64 iov_len)
{
u64 last_page = (iov_offset + iov_len - 1) / PAGE_SIZE;
u64 first_page = iov_offset / PAGE_SIZE;
@@ -400,12 +434,14 @@
for (page = first_page; page <= last_page; page++) {
dma_addr_t dma = page_buses[page];
- dma_sync_single_for_device(&priv->pdev->dev, dma, PAGE_SIZE, DMA_TO_DEVICE);
+ dma_sync_single_for_device(&priv->pdev->dev, dma, PAGE_SIZE,
+ DMA_TO_DEVICE);
}
}
-static int gve_tx_add_skb(struct gve_tx_ring *tx, struct sk_buff *skb, struct gve_priv *priv)
+static int gve_tx_add_skb_copy(struct gve_priv *priv, struct gve_tx_ring *tx,
+ struct sk_buff *skb)
{
int pad_bytes, hlen, hdr_nfrags, payload_nfrags, l4_hdr_offset;
union gve_tx_desc *pkt_desc, *seg_desc;
@@ -472,6 +508,96 @@
return 1 + payload_nfrags;
}
+static int gve_tx_add_skb_no_copy(struct gve_priv *priv, struct gve_tx_ring *tx,
+ struct sk_buff *skb)
+{
+ const struct skb_shared_info *shinfo = skb_shinfo(skb);
+ int hlen, payload_nfrags, l4_hdr_offset, seg_idx_bias;
+ union gve_tx_desc *pkt_desc, *seg_desc;
+ struct gve_tx_buffer_state *info;
+ bool is_gso = skb_is_gso(skb);
+ u32 idx = tx->req & tx->mask;
+ struct gve_tx_dma_buf *buf;
+ int last_mapped = 0;
+ u64 addr;
+ u32 len;
+ int i;
+
+ info = &tx->info[idx];
+ pkt_desc = &tx->desc[idx];
+
+ l4_hdr_offset = skb_checksum_start_offset(skb);
+ /* If the skb is gso, then we want the tcp header in the first segment
+ * otherwise we want the linear portion of the skb (which will contain
+ * the checksum because skb->csum_start and skb->csum_offset are given
+ * relative to skb->head) in the first segment.
+ */
+ hlen = is_gso ? l4_hdr_offset + tcp_hdrlen(skb) :
+ skb_headlen(skb);
+ len = skb_headlen(skb);
+
+ info->skb = skb;
+
+ addr = dma_map_single(tx->dev, skb->data, len, DMA_TO_DEVICE);
+ if (unlikely(dma_mapping_error(tx->dev, addr))) {
+ priv->dma_mapping_error++;
+ goto drop;
+ }
+ buf = &info->buf;
+ dma_unmap_len_set(buf, len, len);
+ dma_unmap_addr_set(buf, dma, addr);
+
+ payload_nfrags = shinfo->nr_frags;
+ if (hlen < len) {
+ /* For gso the rest of the linear portion of the skb needs to
+ * be in its own descriptor.
+ */
+ payload_nfrags++;
+ gve_tx_fill_pkt_desc(pkt_desc, skb, is_gso, l4_hdr_offset,
+ 1 + payload_nfrags, hlen, addr);
+
+ len -= hlen;
+ addr += hlen;
+ seg_desc = &tx->desc[(tx->req + 1) & tx->mask];
+ seg_idx_bias = 2;
+ gve_tx_fill_seg_desc(seg_desc, skb, is_gso, len, addr);
+ } else {
+ seg_idx_bias = 1;
+ gve_tx_fill_pkt_desc(pkt_desc, skb, is_gso, l4_hdr_offset,
+ 1 + payload_nfrags, hlen, addr);
+ }
+
+ for (i = 0; i < payload_nfrags - (seg_idx_bias - 1); i++) {
+ struct skb_frag_struct frag = shinfo->frags[i];
+
+ idx = (tx->req + i + seg_idx_bias) & tx->mask;
+ seg_desc = &tx->desc[idx];
+ len = skb_frag_size(&frag);
+ addr = skb_frag_dma_map(tx->dev, &frag, 0, len, DMA_TO_DEVICE);
+ if (unlikely(dma_mapping_error(tx->dev, addr))) {
+ priv->dma_mapping_error++;
+ goto unmap_drop;
+ }
+ buf = &tx->info[idx].buf;
+ dma_unmap_len_set(buf, len, -len);
+ dma_unmap_addr_set(buf, dma, addr);
+
+ gve_tx_fill_seg_desc(seg_desc, skb, is_gso, len, addr);
+ }
+
+ return 1 + payload_nfrags;
+
+unmap_drop:
+ i--;
+ for (last_mapped = i + seg_idx_bias; last_mapped >= 0; last_mapped--) {
+ idx = (tx->req + last_mapped) & tx->mask;
+ gve_tx_unmap_buf(tx->dev, &tx->info[idx].buf);
+ }
+drop:
+ tx->dropped_pkt++;
+ return 0;
+}
+
netdev_tx_t gve_tx(struct sk_buff *skb, struct net_device *dev)
{
struct gve_priv *priv = netdev_priv(dev);
@@ -498,12 +624,20 @@
gve_tx_put_doorbell(priv, tx->q_resources, tx->req);
return NETDEV_TX_BUSY;
}
- nsegs = gve_tx_add_skb(tx, skb, priv);
+ if (tx->raw_addressing)
+ nsegs = gve_tx_add_skb_no_copy(priv, tx, skb);
+ else
+ nsegs = gve_tx_add_skb_copy(priv, tx, skb);
- netdev_tx_sent_queue(tx->netdev_txq, skb->len);
- skb_tx_timestamp(skb);
+ /* If the packet is getting sent, we need to update the skb */
+ if (nsegs) {
+ netdev_tx_sent_queue(tx->netdev_txq, skb->len);
+ skb_tx_timestamp(skb);
+ }
- /* give packets to NIC */
+ /* Give packets to NIC. Even if this packet failed to send the doorbell
+ * might need to be rung because of xmit_more.
+ */
tx->req += nsegs;
/* If we have xmit_more - don't ring the doorbell unless we are stopped */
@@ -548,6 +682,10 @@
info = &tx->info[idx];
skb = info->skb;
+ /* Unmap the buffer */
+ if (tx->raw_addressing)
+ gve_tx_unmap_buf(tx->dev, &tx->info[idx].buf);
+
/* Mark as free */
if (skb) {
info->skb = NULL;
@@ -558,18 +696,22 @@
#else /* LINUX_VERSION_CODE >= KERNEL_VERSION(3,14,0) */
dev_kfree_skb_any(skb);
#endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(3,14,0) */
- /* FIFO free */
- for (i = 0; i < ARRAY_SIZE(info->iov); i++) {
- space_freed += info->iov[i].iov_len +
- info->iov[i].iov_padding;
- info->iov[i].iov_len = 0;
- info->iov[i].iov_padding = 0;
+ if (!tx->raw_addressing) {
+ /* FIFO free */
+ for (i = 0; i < ARRAY_SIZE(info->iov); i++) {
+ space_freed += info->iov[i].iov_len +
+ info->iov[i].iov_padding;
+ info->iov[i].iov_len = 0;
+ info->iov[i].iov_padding = 0;
+ }
}
}
tx->done++;
}
- gve_tx_free_fifo(&tx->tx_fifo, space_freed);
+ if (!tx->raw_addressing) {
+ gve_tx_free_fifo(&tx->tx_fifo, space_freed);
+ }
u64_stats_update_begin(&tx->statss);
tx->bytes_done += bytes;
tx->pkt_done += pkts;