Files
kernel_arpi/drivers/net/wireless/mediatek/mt76/dma.c
Lorenzo Bianconi 7bd0650be6 mt76: dma: fix buffer unmap with non-linear skbs
mt76 dma layer is supposed to unmap skb data buffers while keep txwi
mapped on hw dma ring. At the moment mt76 wrongly unmap txwi or does
not unmap data fragments in even positions for non-linear skbs. This
issue may result in hw hangs with A-MSDU if the system relies on IOMMU
or SWIOTLB. Fix this behaviour properly unmapping data fragments on
non-linear skbs.

Fixes: 17f1de56df ("mt76: add common code shared between multiple chipsets")
Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
2019-10-30 16:59:48 +02:00

598 lines
12 KiB
C

// SPDX-License-Identifier: ISC
/*
* Copyright (C) 2016 Felix Fietkau <nbd@nbd.name>
*/
#include <linux/dma-mapping.h>
#include "mt76.h"
#include "dma.h"
static int
mt76_dma_alloc_queue(struct mt76_dev *dev, struct mt76_queue *q,
int idx, int n_desc, int bufsize,
u32 ring_base)
{
int size;
int i;
spin_lock_init(&q->lock);
q->regs = dev->mmio.regs + ring_base + idx * MT_RING_SIZE;
q->ndesc = n_desc;
q->buf_size = bufsize;
q->hw_idx = idx;
size = q->ndesc * sizeof(struct mt76_desc);
q->desc = dmam_alloc_coherent(dev->dev, size, &q->desc_dma, GFP_KERNEL);
if (!q->desc)
return -ENOMEM;
size = q->ndesc * sizeof(*q->entry);
q->entry = devm_kzalloc(dev->dev, size, GFP_KERNEL);
if (!q->entry)
return -ENOMEM;
/* clear descriptors */
for (i = 0; i < q->ndesc; i++)
q->desc[i].ctrl = cpu_to_le32(MT_DMA_CTL_DMA_DONE);
writel(q->desc_dma, &q->regs->desc_base);
writel(0, &q->regs->cpu_idx);
writel(0, &q->regs->dma_idx);
writel(q->ndesc, &q->regs->ring_size);
return 0;
}
static int
mt76_dma_add_buf(struct mt76_dev *dev, struct mt76_queue *q,
struct mt76_queue_buf *buf, int nbufs, u32 info,
struct sk_buff *skb, void *txwi)
{
struct mt76_desc *desc;
u32 ctrl;
int i, idx = -1;
if (txwi) {
q->entry[q->head].txwi = DMA_DUMMY_DATA;
q->entry[q->head].skip_buf0 = true;
}
for (i = 0; i < nbufs; i += 2, buf += 2) {
u32 buf0 = buf[0].addr, buf1 = 0;
ctrl = FIELD_PREP(MT_DMA_CTL_SD_LEN0, buf[0].len);
if (i < nbufs - 1) {
buf1 = buf[1].addr;
ctrl |= FIELD_PREP(MT_DMA_CTL_SD_LEN1, buf[1].len);
}
if (i == nbufs - 1)
ctrl |= MT_DMA_CTL_LAST_SEC0;
else if (i == nbufs - 2)
ctrl |= MT_DMA_CTL_LAST_SEC1;
idx = q->head;
q->head = (q->head + 1) % q->ndesc;
desc = &q->desc[idx];
WRITE_ONCE(desc->buf0, cpu_to_le32(buf0));
WRITE_ONCE(desc->buf1, cpu_to_le32(buf1));
WRITE_ONCE(desc->info, cpu_to_le32(info));
WRITE_ONCE(desc->ctrl, cpu_to_le32(ctrl));
q->queued++;
}
q->entry[idx].txwi = txwi;
q->entry[idx].skb = skb;
return idx;
}
static void
mt76_dma_tx_cleanup_idx(struct mt76_dev *dev, struct mt76_queue *q, int idx,
struct mt76_queue_entry *prev_e)
{
struct mt76_queue_entry *e = &q->entry[idx];
__le32 __ctrl = READ_ONCE(q->desc[idx].ctrl);
u32 ctrl = le32_to_cpu(__ctrl);
if (!e->skip_buf0) {
__le32 addr = READ_ONCE(q->desc[idx].buf0);
u32 len = FIELD_GET(MT_DMA_CTL_SD_LEN0, ctrl);
dma_unmap_single(dev->dev, le32_to_cpu(addr), len,
DMA_TO_DEVICE);
}
if (!(ctrl & MT_DMA_CTL_LAST_SEC0)) {
__le32 addr = READ_ONCE(q->desc[idx].buf1);
u32 len = FIELD_GET(MT_DMA_CTL_SD_LEN1, ctrl);
dma_unmap_single(dev->dev, le32_to_cpu(addr), len,
DMA_TO_DEVICE);
}
if (e->txwi == DMA_DUMMY_DATA)
e->txwi = NULL;
if (e->skb == DMA_DUMMY_DATA)
e->skb = NULL;
*prev_e = *e;
memset(e, 0, sizeof(*e));
}
static void
mt76_dma_sync_idx(struct mt76_dev *dev, struct mt76_queue *q)
{
writel(q->desc_dma, &q->regs->desc_base);
writel(q->ndesc, &q->regs->ring_size);
q->head = readl(&q->regs->dma_idx);
q->tail = q->head;
writel(q->head, &q->regs->cpu_idx);
}
static void
mt76_dma_tx_cleanup(struct mt76_dev *dev, enum mt76_txq_id qid, bool flush)
{
struct mt76_sw_queue *sq = &dev->q_tx[qid];
struct mt76_queue *q = sq->q;
struct mt76_queue_entry entry;
unsigned int n_swq_queued[4] = {};
unsigned int n_queued = 0;
bool wake = false;
int i, last;
if (!q)
return;
if (flush)
last = -1;
else
last = readl(&q->regs->dma_idx);
while ((q->queued > n_queued) && q->tail != last) {
mt76_dma_tx_cleanup_idx(dev, q, q->tail, &entry);
if (entry.schedule)
n_swq_queued[entry.qid]++;
q->tail = (q->tail + 1) % q->ndesc;
n_queued++;
if (entry.skb)
dev->drv->tx_complete_skb(dev, qid, &entry);
if (entry.txwi) {
if (!(dev->drv->txwi_flags & MT_TXWI_NO_FREE))
mt76_put_txwi(dev, entry.txwi);
wake = !flush;
}
if (!flush && q->tail == last)
last = readl(&q->regs->dma_idx);
}
spin_lock_bh(&q->lock);
q->queued -= n_queued;
for (i = 0; i < ARRAY_SIZE(n_swq_queued); i++) {
if (!n_swq_queued[i])
continue;
dev->q_tx[i].swq_queued -= n_swq_queued[i];
}
if (flush)
mt76_dma_sync_idx(dev, q);
wake = wake && q->stopped &&
qid < IEEE80211_NUM_ACS && q->queued < q->ndesc - 8;
if (wake)
q->stopped = false;
if (!q->queued)
wake_up(&dev->tx_wait);
spin_unlock_bh(&q->lock);
if (wake)
ieee80211_wake_queue(dev->hw, qid);
}
static void *
mt76_dma_get_buf(struct mt76_dev *dev, struct mt76_queue *q, int idx,
int *len, u32 *info, bool *more)
{
struct mt76_queue_entry *e = &q->entry[idx];
struct mt76_desc *desc = &q->desc[idx];
dma_addr_t buf_addr;
void *buf = e->buf;
int buf_len = SKB_WITH_OVERHEAD(q->buf_size);
buf_addr = le32_to_cpu(READ_ONCE(desc->buf0));
if (len) {
u32 ctl = le32_to_cpu(READ_ONCE(desc->ctrl));
*len = FIELD_GET(MT_DMA_CTL_SD_LEN0, ctl);
*more = !(ctl & MT_DMA_CTL_LAST_SEC0);
}
if (info)
*info = le32_to_cpu(desc->info);
dma_unmap_single(dev->dev, buf_addr, buf_len, DMA_FROM_DEVICE);
e->buf = NULL;
return buf;
}
static void *
mt76_dma_dequeue(struct mt76_dev *dev, struct mt76_queue *q, bool flush,
int *len, u32 *info, bool *more)
{
int idx = q->tail;
*more = false;
if (!q->queued)
return NULL;
if (!flush && !(q->desc[idx].ctrl & cpu_to_le32(MT_DMA_CTL_DMA_DONE)))
return NULL;
q->tail = (q->tail + 1) % q->ndesc;
q->queued--;
return mt76_dma_get_buf(dev, q, idx, len, info, more);
}
static void
mt76_dma_kick_queue(struct mt76_dev *dev, struct mt76_queue *q)
{
writel(q->head, &q->regs->cpu_idx);
}
static int
mt76_dma_tx_queue_skb_raw(struct mt76_dev *dev, enum mt76_txq_id qid,
struct sk_buff *skb, u32 tx_info)
{
struct mt76_queue *q = dev->q_tx[qid].q;
struct mt76_queue_buf buf;
dma_addr_t addr;
addr = dma_map_single(dev->dev, skb->data, skb->len,
DMA_TO_DEVICE);
if (unlikely(dma_mapping_error(dev->dev, addr)))
return -ENOMEM;
buf.addr = addr;
buf.len = skb->len;
spin_lock_bh(&q->lock);
mt76_dma_add_buf(dev, q, &buf, 1, tx_info, skb, NULL);
mt76_dma_kick_queue(dev, q);
spin_unlock_bh(&q->lock);
return 0;
}
static int
mt76_dma_tx_queue_skb(struct mt76_dev *dev, enum mt76_txq_id qid,
struct sk_buff *skb, struct mt76_wcid *wcid,
struct ieee80211_sta *sta)
{
struct mt76_queue *q = dev->q_tx[qid].q;
struct mt76_tx_info tx_info = {
.skb = skb,
};
int len, n = 0, ret = -ENOMEM;
struct mt76_queue_entry e;
struct mt76_txwi_cache *t;
struct sk_buff *iter;
dma_addr_t addr;
u8 *txwi;
t = mt76_get_txwi(dev);
if (!t) {
ieee80211_free_txskb(dev->hw, skb);
return -ENOMEM;
}
txwi = mt76_get_txwi_ptr(dev, t);
skb->prev = skb->next = NULL;
if (dev->drv->tx_aligned4_skbs)
mt76_insert_hdr_pad(skb);
len = skb_headlen(skb);
addr = dma_map_single(dev->dev, skb->data, len, DMA_TO_DEVICE);
if (unlikely(dma_mapping_error(dev->dev, addr)))
goto free;
tx_info.buf[n].addr = t->dma_addr;
tx_info.buf[n++].len = dev->drv->txwi_size;
tx_info.buf[n].addr = addr;
tx_info.buf[n++].len = len;
skb_walk_frags(skb, iter) {
if (n == ARRAY_SIZE(tx_info.buf))
goto unmap;
addr = dma_map_single(dev->dev, iter->data, iter->len,
DMA_TO_DEVICE);
if (unlikely(dma_mapping_error(dev->dev, addr)))
goto unmap;
tx_info.buf[n].addr = addr;
tx_info.buf[n++].len = iter->len;
}
tx_info.nbuf = n;
dma_sync_single_for_cpu(dev->dev, t->dma_addr, dev->drv->txwi_size,
DMA_TO_DEVICE);
ret = dev->drv->tx_prepare_skb(dev, txwi, qid, wcid, sta, &tx_info);
dma_sync_single_for_device(dev->dev, t->dma_addr, dev->drv->txwi_size,
DMA_TO_DEVICE);
if (ret < 0)
goto unmap;
if (q->queued + (tx_info.nbuf + 1) / 2 >= q->ndesc - 1) {
ret = -ENOMEM;
goto unmap;
}
return mt76_dma_add_buf(dev, q, tx_info.buf, tx_info.nbuf,
tx_info.info, tx_info.skb, t);
unmap:
for (n--; n > 0; n--)
dma_unmap_single(dev->dev, tx_info.buf[n].addr,
tx_info.buf[n].len, DMA_TO_DEVICE);
free:
e.skb = tx_info.skb;
e.txwi = t;
dev->drv->tx_complete_skb(dev, qid, &e);
mt76_put_txwi(dev, t);
return ret;
}
static int
mt76_dma_rx_fill(struct mt76_dev *dev, struct mt76_queue *q)
{
dma_addr_t addr;
void *buf;
int frames = 0;
int len = SKB_WITH_OVERHEAD(q->buf_size);
int offset = q->buf_offset;
int idx;
spin_lock_bh(&q->lock);
while (q->queued < q->ndesc - 1) {
struct mt76_queue_buf qbuf;
buf = page_frag_alloc(&q->rx_page, q->buf_size, GFP_ATOMIC);
if (!buf)
break;
addr = dma_map_single(dev->dev, buf, len, DMA_FROM_DEVICE);
if (unlikely(dma_mapping_error(dev->dev, addr))) {
skb_free_frag(buf);
break;
}
qbuf.addr = addr + offset;
qbuf.len = len - offset;
idx = mt76_dma_add_buf(dev, q, &qbuf, 1, 0, buf, NULL);
frames++;
}
if (frames)
mt76_dma_kick_queue(dev, q);
spin_unlock_bh(&q->lock);
return frames;
}
static void
mt76_dma_rx_cleanup(struct mt76_dev *dev, struct mt76_queue *q)
{
struct page *page;
void *buf;
bool more;
spin_lock_bh(&q->lock);
do {
buf = mt76_dma_dequeue(dev, q, true, NULL, NULL, &more);
if (!buf)
break;
skb_free_frag(buf);
} while (1);
spin_unlock_bh(&q->lock);
if (!q->rx_page.va)
return;
page = virt_to_page(q->rx_page.va);
__page_frag_cache_drain(page, q->rx_page.pagecnt_bias);
memset(&q->rx_page, 0, sizeof(q->rx_page));
}
static void
mt76_dma_rx_reset(struct mt76_dev *dev, enum mt76_rxq_id qid)
{
struct mt76_queue *q = &dev->q_rx[qid];
int i;
for (i = 0; i < q->ndesc; i++)
q->desc[i].ctrl &= ~cpu_to_le32(MT_DMA_CTL_DMA_DONE);
mt76_dma_rx_cleanup(dev, q);
mt76_dma_sync_idx(dev, q);
mt76_dma_rx_fill(dev, q);
if (!q->rx_head)
return;
dev_kfree_skb(q->rx_head);
q->rx_head = NULL;
}
static void
mt76_add_fragment(struct mt76_dev *dev, struct mt76_queue *q, void *data,
int len, bool more)
{
struct page *page = virt_to_head_page(data);
int offset = data - page_address(page);
struct sk_buff *skb = q->rx_head;
offset += q->buf_offset;
skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page, offset, len,
q->buf_size);
if (more)
return;
q->rx_head = NULL;
dev->drv->rx_skb(dev, q - dev->q_rx, skb);
}
static int
mt76_dma_rx_process(struct mt76_dev *dev, struct mt76_queue *q, int budget)
{
int len, data_len, done = 0;
struct sk_buff *skb;
unsigned char *data;
bool more;
while (done < budget) {
u32 info;
data = mt76_dma_dequeue(dev, q, false, &len, &info, &more);
if (!data)
break;
if (q->rx_head)
data_len = q->buf_size;
else
data_len = SKB_WITH_OVERHEAD(q->buf_size);
if (data_len < len + q->buf_offset) {
dev_kfree_skb(q->rx_head);
q->rx_head = NULL;
skb_free_frag(data);
continue;
}
if (q->rx_head) {
mt76_add_fragment(dev, q, data, len, more);
continue;
}
skb = build_skb(data, q->buf_size);
if (!skb) {
skb_free_frag(data);
continue;
}
skb_reserve(skb, q->buf_offset);
if (q == &dev->q_rx[MT_RXQ_MCU]) {
u32 *rxfce = (u32 *)skb->cb;
*rxfce = info;
}
__skb_put(skb, len);
done++;
if (more) {
q->rx_head = skb;
continue;
}
dev->drv->rx_skb(dev, q - dev->q_rx, skb);
}
mt76_dma_rx_fill(dev, q);
return done;
}
static int
mt76_dma_rx_poll(struct napi_struct *napi, int budget)
{
struct mt76_dev *dev;
int qid, done = 0, cur;
dev = container_of(napi->dev, struct mt76_dev, napi_dev);
qid = napi - dev->napi;
rcu_read_lock();
do {
cur = mt76_dma_rx_process(dev, &dev->q_rx[qid], budget - done);
mt76_rx_poll_complete(dev, qid, napi);
done += cur;
} while (cur && done < budget);
rcu_read_unlock();
if (done < budget) {
napi_complete(napi);
dev->drv->rx_poll_complete(dev, qid);
}
return done;
}
static int
mt76_dma_init(struct mt76_dev *dev)
{
int i;
init_dummy_netdev(&dev->napi_dev);
for (i = 0; i < ARRAY_SIZE(dev->q_rx); i++) {
netif_napi_add(&dev->napi_dev, &dev->napi[i], mt76_dma_rx_poll,
64);
mt76_dma_rx_fill(dev, &dev->q_rx[i]);
skb_queue_head_init(&dev->rx_skb[i]);
napi_enable(&dev->napi[i]);
}
return 0;
}
static const struct mt76_queue_ops mt76_dma_ops = {
.init = mt76_dma_init,
.alloc = mt76_dma_alloc_queue,
.tx_queue_skb_raw = mt76_dma_tx_queue_skb_raw,
.tx_queue_skb = mt76_dma_tx_queue_skb,
.tx_cleanup = mt76_dma_tx_cleanup,
.rx_reset = mt76_dma_rx_reset,
.kick = mt76_dma_kick_queue,
};
void mt76_dma_attach(struct mt76_dev *dev)
{
dev->queue_ops = &mt76_dma_ops;
}
EXPORT_SYMBOL_GPL(mt76_dma_attach);
void mt76_dma_cleanup(struct mt76_dev *dev)
{
int i;
netif_napi_del(&dev->tx_napi);
for (i = 0; i < ARRAY_SIZE(dev->q_tx); i++)
mt76_dma_tx_cleanup(dev, i, true);
for (i = 0; i < ARRAY_SIZE(dev->q_rx); i++) {
netif_napi_del(&dev->napi[i]);
mt76_dma_rx_cleanup(dev, &dev->q_rx[i]);
}
}
EXPORT_SYMBOL_GPL(mt76_dma_cleanup);