* [PATCH 0/4] dma: catch mistakes with CONFIG_DMA_API_DEBUG
@ 2023-11-29 6:17 Ahmad Fatoum
2023-11-29 6:17 ` [PATCH 1/4] dma: factor out dma map generic implementations into file Ahmad Fatoum
` (4 more replies)
0 siblings, 5 replies; 8+ messages in thread
From: Ahmad Fatoum @ 2023-11-29 6:17 UTC (permalink / raw)
To: barebox; +Cc: Denis Orlov, str, lst
Cache invalidation issues around DMA accesses can be difficult to debug.
Motivated by recent fixes to the macb driver[1], let's add some optional
sanity checking to the DMA API inspired by the Linux CONFIG_DMA_API_DEBUG
option.
This would have caught the issue fixed by [1] in the macb driver and it
already caught a misuse of the API on the STM32MP system I tested it on.
Usage is simple: just enable it and ensure no warnings are printed.
All warnings are printed alongside the extents of the DMA buffer in
question and a stack trace at the time the check failed.
[1]: https://lore.barebox.org/barebox/20231128-v2023-08-0-topic-macb-v1-0-9faff73bc990@pengutronix.de/T/#t
Ahmad Fatoum (4):
dma: factor out dma map generic implementations into file
dma: add DMA API debugging support
mci: core: remove broken, unneeded write bounce buffer
mci: stm32_sdmmc2: correct usage of DMA API
common/Kconfig | 14 +++
drivers/dma/Makefile | 2 +
drivers/dma/debug.c | 183 +++++++++++++++++++++++++++++++++++++
drivers/dma/debug.h | 56 ++++++++++++
drivers/dma/map.c | 43 +++++++++
drivers/mci/mci-core.c | 10 +-
drivers/mci/stm32_sdmmc2.c | 41 +++++----
include/dma.h | 61 ++++---------
8 files changed, 340 insertions(+), 70 deletions(-)
create mode 100644 drivers/dma/debug.c
create mode 100644 drivers/dma/debug.h
create mode 100644 drivers/dma/map.c
--
2.39.2
^ permalink raw reply [flat|nested] 8+ messages in thread
* [PATCH 1/4] dma: factor out dma map generic implementations into file
2023-11-29 6:17 [PATCH 0/4] dma: catch mistakes with CONFIG_DMA_API_DEBUG Ahmad Fatoum
@ 2023-11-29 6:17 ` Ahmad Fatoum
2023-12-05 8:37 ` Sascha Hauer
2023-11-29 6:17 ` [PATCH 2/4] dma: add DMA API debugging support Ahmad Fatoum
` (3 subsequent siblings)
4 siblings, 1 reply; 8+ messages in thread
From: Ahmad Fatoum @ 2023-11-29 6:17 UTC (permalink / raw)
To: barebox; +Cc: Denis Orlov, str, lst, Ahmad Fatoum
In preparation for adding optional debugging code for the DMA mapping
API, move the definition out of the header file into a source file.
Signed-off-by: Ahmad Fatoum <a.fatoum@pengutronix.de>
---
drivers/dma/Makefile | 1 +
drivers/dma/map.c | 32 +++++++++++++++++++++++
include/dma.h | 61 ++++++++++++++------------------------------
3 files changed, 52 insertions(+), 42 deletions(-)
create mode 100644 drivers/dma/map.c
diff --git a/drivers/dma/Makefile b/drivers/dma/Makefile
index 8e1aac9f6f67..e45476c23f14 100644
--- a/drivers/dma/Makefile
+++ b/drivers/dma/Makefile
@@ -1,2 +1,3 @@
# SPDX-License-Identifier: GPL-2.0-only
+obj-$(CONFIG_HAS_DMA) += map.o
obj-$(CONFIG_MXS_APBH_DMA) += apbh_dma.o
diff --git a/drivers/dma/map.c b/drivers/dma/map.c
new file mode 100644
index 000000000000..270a4899fd05
--- /dev/null
+++ b/drivers/dma/map.c
@@ -0,0 +1,32 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#include <dma.h>
+
+void dma_sync_single_for_cpu(struct device *dev, dma_addr_t address,
+ size_t size, enum dma_data_direction dir)
+{
+ void *ptr = dma_to_cpu(dev, address);
+
+ arch_sync_dma_for_cpu(ptr, size, dir);
+}
+
+void dma_sync_single_for_device(struct device *dev, dma_addr_t address,
+ size_t size, enum dma_data_direction dir)
+{
+ void *ptr = dma_to_cpu(dev, address);
+
+ arch_sync_dma_for_device(ptr, size, dir);
+}
+
+dma_addr_t dma_map_single(struct device *dev, void *ptr,
+ size_t size, enum dma_data_direction dir)
+{
+ arch_sync_dma_for_device(ptr, size, dir);
+
+ return cpu_to_dma(dev, ptr);
+}
+
+void dma_unmap_single(struct device *dev, dma_addr_t dma_addr,
+ size_t size, enum dma_data_direction dir)
+{
+ dma_sync_single_for_cpu(dev, dma_addr, size, dir);
+}
diff --git a/include/dma.h b/include/dma.h
index 2a09b747d1e2..6eef55a7325d 100644
--- a/include/dma.h
+++ b/include/dma.h
@@ -68,8 +68,6 @@ static inline void *dma_to_cpu(struct device *dev, dma_addr_t addr)
return phys_to_virt(addr);
}
-#ifndef __PBL__
-/* streaming DMA - implement the below calls to support HAS_DMA */
#ifndef arch_sync_dma_for_cpu
void arch_sync_dma_for_cpu(void *vaddr, size_t size,
enum dma_data_direction dir);
@@ -79,57 +77,36 @@ void arch_sync_dma_for_cpu(void *vaddr, size_t size,
void arch_sync_dma_for_device(void *vaddr, size_t size,
enum dma_data_direction dir);
#endif
+
+#ifndef __PBL__
+void dma_sync_single_for_cpu(struct device *dev, dma_addr_t address,
+ size_t size, enum dma_data_direction dir);
+
+void dma_sync_single_for_device(struct device *dev, dma_addr_t address,
+ size_t size, enum dma_data_direction dir);
#else
-#ifndef arch_sync_dma_for_cpu
/*
* assumes buffers are in coherent/uncached memory, e.g. because
* MMU is only enabled in barebox_arm_entry which hasn't run yet.
*/
-static inline void arch_sync_dma_for_cpu(void *vaddr, size_t size,
- enum dma_data_direction dir)
+static inline void dma_sync_single_for_cpu(void *vaddr, size_t size,
+ enum dma_data_direction dir)
+{
+ barrier_data(vaddr);
+}
+
+static inline void dma_sync_single_for_device(void *vaddr, size_t size,
+ enum dma_data_direction dir)
{
barrier_data(vaddr);
}
#endif
-#ifndef arch_sync_dma_for_device
-static inline void arch_sync_dma_for_device(void *vaddr, size_t size,
- enum dma_data_direction dir)
-{
- barrier_data(vaddr);
-}
-#endif
-#endif
+dma_addr_t dma_map_single(struct device *dev, void *ptr,
+ size_t size, enum dma_data_direction dir);
-static inline void dma_sync_single_for_cpu(struct device *dev, dma_addr_t address,
- size_t size, enum dma_data_direction dir)
-{
- void *ptr = dma_to_cpu(dev, address);
-
- arch_sync_dma_for_cpu(ptr, size, dir);
-}
-
-static inline void dma_sync_single_for_device(struct device *dev, dma_addr_t address,
- size_t size, enum dma_data_direction dir)
-{
- void *ptr = dma_to_cpu(dev, address);
-
- arch_sync_dma_for_device(ptr, size, dir);
-}
-
-static inline dma_addr_t dma_map_single(struct device *dev, void *ptr,
- size_t size, enum dma_data_direction dir)
-{
- arch_sync_dma_for_device(ptr, size, dir);
-
- return cpu_to_dma(dev, ptr);
-}
-
-static inline void dma_unmap_single(struct device *dev, dma_addr_t dma_addr,
- size_t size, enum dma_data_direction dir)
-{
- dma_sync_single_for_cpu(dev, dma_addr, size, dir);
-}
+void dma_unmap_single(struct device *dev, dma_addr_t dma_addr,
+ size_t size, enum dma_data_direction dir);
#ifndef dma_alloc_coherent
void *dma_alloc_coherent(size_t size, dma_addr_t *dma_handle);
--
2.39.2
^ permalink raw reply [flat|nested] 8+ messages in thread
* [PATCH 2/4] dma: add DMA API debugging support
2023-11-29 6:17 [PATCH 0/4] dma: catch mistakes with CONFIG_DMA_API_DEBUG Ahmad Fatoum
2023-11-29 6:17 ` [PATCH 1/4] dma: factor out dma map generic implementations into file Ahmad Fatoum
@ 2023-11-29 6:17 ` Ahmad Fatoum
2023-11-29 6:17 ` [PATCH 3/4] mci: core: remove broken, unneeded write bounce buffer Ahmad Fatoum
` (2 subsequent siblings)
4 siblings, 0 replies; 8+ messages in thread
From: Ahmad Fatoum @ 2023-11-29 6:17 UTC (permalink / raw)
To: barebox; +Cc: Denis Orlov, str, lst, Ahmad Fatoum
For DMA_FROM_DEVICE, calling dma_sync_single_for_cpu
before arch_sync_dma_for_device has been called is wrong:
- Memory region is dirty in CPU cache
- Device writes packet into region
- CPU cache lines are written back
- Buffer memory is corrupted
In order to spot such issues, let's add a new CONFIG_DMA_API_DEBUG
that will warn about mismatch in order.
Signed-off-by: Ahmad Fatoum <a.fatoum@pengutronix.de>
---
common/Kconfig | 14 ++++
drivers/dma/Makefile | 1 +
drivers/dma/debug.c | 183 +++++++++++++++++++++++++++++++++++++++++++
drivers/dma/debug.h | 56 +++++++++++++
drivers/dma/map.c | 13 ++-
5 files changed, 266 insertions(+), 1 deletion(-)
create mode 100644 drivers/dma/debug.c
create mode 100644 drivers/dma/debug.h
diff --git a/common/Kconfig b/common/Kconfig
index 8bd8fa8df655..c8c23a8e03a2 100644
--- a/common/Kconfig
+++ b/common/Kconfig
@@ -1690,6 +1690,20 @@ config DEBUG_PROBES
Most consoles do not implement a remove callback to remain operable until
the very end. Consoles using DMA, however, must be removed.
+config DMA_API_DEBUG
+ bool "Enable debugging of DMA-API usage"
+ depends on HAS_DMA
+ help
+ Enable this option to debug the use of the DMA API by device drivers.
+ With this option you will be able to detect common bugs in device
+ drivers like double-freeing of DMA mappings or freeing mappings that
+ were never allocated.
+
+ This option causes a performance degradation. Use only if you want to
+ debug device drivers and dma interactions.
+
+ If unsure, say N.
+
config PBL_BREAK
bool "Execute software break on pbl start"
depends on ARM && (!CPU_32v4T && !ARCH_TEGRA)
diff --git a/drivers/dma/Makefile b/drivers/dma/Makefile
index e45476c23f14..b55c16e768d5 100644
--- a/drivers/dma/Makefile
+++ b/drivers/dma/Makefile
@@ -1,3 +1,4 @@
# SPDX-License-Identifier: GPL-2.0-only
obj-$(CONFIG_HAS_DMA) += map.o
+obj-$(CONFIG_DMA_API_DEBUG) += debug.o
obj-$(CONFIG_MXS_APBH_DMA) += apbh_dma.o
diff --git a/drivers/dma/debug.c b/drivers/dma/debug.c
new file mode 100644
index 000000000000..b3bfbff9b2f5
--- /dev/null
+++ b/drivers/dma/debug.c
@@ -0,0 +1,183 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#include <dma.h>
+#include <linux/list.h>
+#include "debug.h"
+
+static LIST_HEAD(dma_mappings);
+
+struct dma_debug_entry {
+ struct list_head list;
+ struct device *dev;
+ dma_addr_t dev_addr;
+ size_t size;
+ int direction;
+};
+
+static const char *dir2name[] = {
+ [DMA_BIDIRECTIONAL] = "bidirectional",
+ [DMA_TO_DEVICE] = "to-device",
+ [DMA_FROM_DEVICE] = "from-device",
+ [DMA_NONE] = "none",
+};
+
+#define dma_dev_printf(level, args...) do { \
+ if (level > LOGLEVEL) \
+ break; \
+ dev_printf((level), args); \
+ if ((level) <= MSG_WARNING) \
+ dump_stack(); \
+} while (0)
+
+#define dma_dev_warn(args...) dma_dev_printf(MSG_WARNING, args)
+
+static void dma_printf(int level, struct dma_debug_entry *entry,
+ const char *fmt, ...)
+{
+ struct va_format vaf;
+ va_list va;
+
+ va_start(va, fmt);
+
+ vaf.fmt = fmt;
+ vaf.va = &va;
+
+ dma_dev_printf(level, entry->dev, "%s mapping 0x%llx+0x%zx: %pV\n",
+ dir2name[(entry)->direction], (u64)(entry)->dev_addr,
+ (entry)->size, &vaf);
+
+ va_end(va);
+}
+
+#define dma_warn(args...) dma_printf(MSG_WARNING, args)
+#define dma_debug(args...) dma_printf(MSG_DEBUG, args)
+
+static inline int region_contains(struct dma_debug_entry *entry,
+ dma_addr_t buf_start, size_t buf_size)
+{
+ dma_addr_t dev_addr_end = entry->dev_addr + entry->size - 1;
+ dma_addr_t buf_end = buf_start + buf_size - 1;
+
+ /* Is the buffer completely within the mapping? */
+ if (entry->dev_addr <= buf_start && dev_addr_end >= buf_end)
+ return 1;
+
+ /* Does the buffer partially overlap the mapping? */
+ if (entry->dev_addr <= buf_end && dev_addr_end >= buf_start)
+ return -1;
+
+ return 0;
+}
+
+static struct dma_debug_entry *
+dma_debug_entry_find(struct device *dev, dma_addr_t dev_addr, size_t size)
+{
+ struct dma_debug_entry *entry;
+
+ /*
+ * DMA functions should be called with a device argument to support
+ * non-1:1 device mappings.
+ */
+ if (!dev)
+ dma_dev_warn(NULL, "unportable NULL device passed with buffer 0x%llx+0x%zx!\n",
+ (u64)dev_addr, size);
+
+ list_for_each_entry(entry, &dma_mappings, list) {
+ if (dev != entry->dev)
+ continue;
+
+ switch (region_contains(entry, dev_addr, size)) {
+ case 1:
+ return entry;
+ case -1:
+ /* The same device shouldn't have two mappings for the same address */
+ dma_warn(entry, "unexpected partial overlap looking for 0x%llx+0x%zx!\n",
+ (u64)dev_addr, size);
+ fallthrough;
+ case 0:
+ continue;
+ }
+ }
+
+ return NULL;
+}
+
+void debug_dma_map(struct device *dev, void *addr,
+ size_t size,
+ int direction, dma_addr_t dev_addr)
+{
+ struct dma_debug_entry *entry;
+
+ entry = dma_debug_entry_find(dev, dev_addr, size);
+ if (entry) {
+ /* The same device shouldn't have two mappings for the same address */
+ dma_warn(entry, "duplicate mapping\n");
+ return;
+ }
+
+ entry = xmalloc(sizeof(*entry));
+
+ entry->dev = dev;
+ entry->dev_addr = dev_addr;
+ entry->size = size;
+ entry->direction = direction;
+
+ list_add(&entry->list, &dma_mappings);
+
+ dma_debug(entry, "allocated\n");
+}
+
+void debug_dma_unmap(struct device *dev, dma_addr_t addr,
+ size_t size, int direction)
+{
+ struct dma_debug_entry *entry;
+
+ entry = dma_debug_entry_find(dev, addr, size);
+ if (!entry) {
+ /* Potential double free */
+ dma_dev_warn(dev, "Unmapping non-mapped %s buffer 0x%llx+0x%zx!\n",
+ dir2name[direction], (u64)addr, size);
+ return;
+ }
+
+ /* Mismatched size or direction may result in memory corruption */
+ if (entry->size != size)
+ dma_warn(entry, "mismatch unmapping 0x%zx bytes\n", size);
+ if (entry->direction != direction)
+ dma_warn(entry, "mismatch unmapping %s\n",
+ dir2name[direction]);
+
+ dma_debug(entry, "deallocating\n");
+ list_del(&entry->list);
+ free(entry);
+}
+
+void debug_dma_sync_single_for_cpu(struct device *dev,
+ dma_addr_t dma_handle, size_t size,
+ int direction)
+{
+ struct dma_debug_entry *entry;
+
+ entry = dma_debug_entry_find(dev, dma_handle, size);
+ if (!entry)
+ dma_dev_warn(dev, "sync for CPU of never-mapped %s buffer 0x%llx+0x%zx!\n",
+ dir2name[direction], (u64)dma_handle, size);
+}
+
+void debug_dma_sync_single_for_device(struct device *dev,
+ dma_addr_t dma_handle,
+ size_t size, int direction)
+{
+ struct dma_debug_entry *entry;
+
+ /*
+ * If dma_map_single was omitted, CPU cache may contain dirty cache lines
+ * for a buffer used for DMA. These lines may be evicted and written back
+ * after device DMA and before consumption by CPU, resulting in memory
+ * corruption
+ */
+ entry = dma_debug_entry_find(dev, dma_handle, size);
+ if (!entry)
+ dma_dev_warn(dev, "Syncing for device of never-mapped %s buffer 0x%llx+0x%zx!\n",
+ dir2name[direction], (u64)dma_handle, size);
+}
diff --git a/drivers/dma/debug.h b/drivers/dma/debug.h
new file mode 100644
index 000000000000..020bb5c19678
--- /dev/null
+++ b/drivers/dma/debug.h
@@ -0,0 +1,56 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2008 Advanced Micro Devices, Inc.
+ *
+ * Author: Joerg Roedel <joerg.roedel@amd.com>
+ */
+
+#ifndef _KERNEL_DMA_DEBUG_H
+#define _KERNEL_DMA_DEBUG_H
+
+#include <linux/types.h>
+
+struct device;
+
+#ifdef CONFIG_DMA_API_DEBUG
+extern void debug_dma_map(struct device *dev, void *addr,
+ size_t size,
+ int direction, dma_addr_t dma_addr);
+
+extern void debug_dma_unmap(struct device *dev, dma_addr_t addr,
+ size_t size, int direction);
+
+extern void debug_dma_sync_single_for_cpu(struct device *dev,
+ dma_addr_t dma_handle, size_t size,
+ int direction);
+
+extern void debug_dma_sync_single_for_device(struct device *dev,
+ dma_addr_t dma_handle,
+ size_t size, int direction);
+
+#else /* CONFIG_DMA_API_DEBUG */
+static inline void debug_dma_map(struct device *dev, void *addr,
+ size_t size,
+ int direction, dma_addr_t dma_addr)
+{
+}
+
+static inline void debug_dma_unmap(struct device *dev, dma_addr_t addr,
+ size_t size, int direction)
+{
+}
+
+static inline void debug_dma_sync_single_for_cpu(struct device *dev,
+ dma_addr_t dma_handle,
+ size_t size, int direction)
+{
+}
+
+static inline void debug_dma_sync_single_for_device(struct device *dev,
+ dma_addr_t dma_handle,
+ size_t size, int direction)
+{
+}
+
+#endif /* CONFIG_DMA_API_DEBUG */
+#endif /* _KERNEL_DMA_DEBUG_H */
diff --git a/drivers/dma/map.c b/drivers/dma/map.c
index 270a4899fd05..e320f6aad4ac 100644
--- a/drivers/dma/map.c
+++ b/drivers/dma/map.c
@@ -1,11 +1,14 @@
/* SPDX-License-Identifier: GPL-2.0-only */
#include <dma.h>
+#include "debug.h"
void dma_sync_single_for_cpu(struct device *dev, dma_addr_t address,
size_t size, enum dma_data_direction dir)
{
void *ptr = dma_to_cpu(dev, address);
+ debug_dma_sync_single_for_cpu(dev, address, size, dir);
+
arch_sync_dma_for_cpu(ptr, size, dir);
}
@@ -14,19 +17,27 @@ void dma_sync_single_for_device(struct device *dev, dma_addr_t address,
{
void *ptr = dma_to_cpu(dev, address);
+ debug_dma_sync_single_for_device(dev, address, size, dir);
+
arch_sync_dma_for_device(ptr, size, dir);
}
dma_addr_t dma_map_single(struct device *dev, void *ptr,
size_t size, enum dma_data_direction dir)
{
+ dma_addr_t dma_addr = cpu_to_dma(dev, ptr);
+
+ debug_dma_map(dev, ptr, size, dir, dma_addr);
+
arch_sync_dma_for_device(ptr, size, dir);
- return cpu_to_dma(dev, ptr);
+ return dma_addr;
}
void dma_unmap_single(struct device *dev, dma_addr_t dma_addr,
size_t size, enum dma_data_direction dir)
{
dma_sync_single_for_cpu(dev, dma_addr, size, dir);
+
+ debug_dma_unmap(dev, dma_addr, size, dir);
}
--
2.39.2
^ permalink raw reply [flat|nested] 8+ messages in thread
* [PATCH 3/4] mci: core: remove broken, unneeded write bounce buffer
2023-11-29 6:17 [PATCH 0/4] dma: catch mistakes with CONFIG_DMA_API_DEBUG Ahmad Fatoum
2023-11-29 6:17 ` [PATCH 1/4] dma: factor out dma map generic implementations into file Ahmad Fatoum
2023-11-29 6:17 ` [PATCH 2/4] dma: add DMA API debugging support Ahmad Fatoum
@ 2023-11-29 6:17 ` Ahmad Fatoum
2023-11-29 6:17 ` [PATCH 4/4] mci: stm32_sdmmc2: correct usage of DMA API Ahmad Fatoum
2023-12-05 7:52 ` [PATCH 0/4] dma: catch mistakes with CONFIG_DMA_API_DEBUG Sascha Hauer
4 siblings, 0 replies; 8+ messages in thread
From: Ahmad Fatoum @ 2023-11-29 6:17 UTC (permalink / raw)
To: barebox; +Cc: Denis Orlov, str, lst, Ahmad Fatoum
mci_block_write uses a 512-byte long bounce buffer if the src argument
is not 4-byte aligned. This can never happen as src is the address of a
block cache chunk, which is always aligned for DMA, which is always a
multiple of 4 bytes. Furthermore, the bounce buffer is just a single
sector and the function may write multiple blocks resulting in
out-of-bounds read if that code hadn't been dead.
Signed-off-by: Ahmad Fatoum <a.fatoum@pengutronix.de>
---
drivers/mci/mci-core.c | 10 +---------
1 file changed, 1 insertion(+), 9 deletions(-)
diff --git a/drivers/mci/mci-core.c b/drivers/mci/mci-core.c
index 07eca96a9d61..280d08eb6253 100644
--- a/drivers/mci/mci-core.c
+++ b/drivers/mci/mci-core.c
@@ -218,7 +218,6 @@ static int mci_block_write(struct mci *mci, const void *src, int blocknum,
{
struct mci_cmd cmd;
struct mci_data data;
- const void *buf;
unsigned mmccmd;
int ret;
@@ -238,19 +237,12 @@ static int mci_block_write(struct mci *mci, const void *src, int blocknum,
else
mmccmd = MMC_CMD_WRITE_SINGLE_BLOCK;
- if ((unsigned long)src & 0x3) {
- memcpy(sector_buf, src, SECTOR_SIZE);
- buf = sector_buf;
- } else {
- buf = src;
- }
-
mci_setup_cmd(&cmd,
mmccmd,
mci->high_capacity != 0 ? blocknum : blocknum * mci->write_bl_len,
MMC_RSP_R1);
- data.src = buf;
+ data.src = src;
data.blocks = blocks;
data.blocksize = mci->write_bl_len;
data.flags = MMC_DATA_WRITE;
--
2.39.2
^ permalink raw reply [flat|nested] 8+ messages in thread
* [PATCH 4/4] mci: stm32_sdmmc2: correct usage of DMA API
2023-11-29 6:17 [PATCH 0/4] dma: catch mistakes with CONFIG_DMA_API_DEBUG Ahmad Fatoum
` (2 preceding siblings ...)
2023-11-29 6:17 ` [PATCH 3/4] mci: core: remove broken, unneeded write bounce buffer Ahmad Fatoum
@ 2023-11-29 6:17 ` Ahmad Fatoum
2023-12-05 7:52 ` [PATCH 0/4] dma: catch mistakes with CONFIG_DMA_API_DEBUG Sascha Hauer
4 siblings, 0 replies; 8+ messages in thread
From: Ahmad Fatoum @ 2023-11-29 6:17 UTC (permalink / raw)
To: barebox; +Cc: Denis Orlov, str, lst, Ahmad Fatoum
The new CONFIG_DMA_API_DEBUG option correctly detects that
dma_sync_single_for_device is called without dma_map_single.
In the particular case of the STM32 SDMMC2 driver, this shouldn't lead
to errors as dma_sync_single_for_cpu is only called after successful
DMA, not before. In other cases though, dirty cache lines may be evicted
and written back to cache, just before dma_sync_single_for_cpu,
resulting in memory corruption.
Switch to using dma_map_single to fix this.
Signed-off-by: Ahmad Fatoum <a.fatoum@pengutronix.de>
---
drivers/mci/stm32_sdmmc2.c | 41 ++++++++++++++++++++------------------
1 file changed, 22 insertions(+), 19 deletions(-)
diff --git a/drivers/mci/stm32_sdmmc2.c b/drivers/mci/stm32_sdmmc2.c
index 1bfef1ccf0eb..84969a29d0f4 100644
--- a/drivers/mci/stm32_sdmmc2.c
+++ b/drivers/mci/stm32_sdmmc2.c
@@ -257,11 +257,12 @@ static void stm32_sdmmc2_pwron(struct stm32_sdmmc2_priv *priv)
udelay(DIV_ROUND_UP(74 * USEC_PER_SEC, priv->mci.clock));
}
-static void stm32_sdmmc2_start_data(struct stm32_sdmmc2_priv *priv,
+static int stm32_sdmmc2_start_data(struct stm32_sdmmc2_priv *priv,
struct mci_data *data, u32 data_length)
{
unsigned int num_bytes = data->blocks * data->blocksize;
- u32 data_ctrl, idmabase0;
+ dma_addr_t idmabase0;
+ u32 data_ctrl;
/* Configure the SDMMC DPSM (Data Path State Machine) */
data_ctrl = (__ilog2_u32(data->blocksize) <<
@@ -270,27 +271,27 @@ static void stm32_sdmmc2_start_data(struct stm32_sdmmc2_priv *priv,
if (data->flags & MMC_DATA_READ) {
data_ctrl |= SDMMC_DCTRL_DTDIR;
- idmabase0 = (u32)data->dest;
+ idmabase0 = dma_map_single(priv->dev, (void *)data->src, num_bytes,
+ DMA_FROM_DEVICE);
} else {
- idmabase0 = (u32)data->src;
+ idmabase0 = dma_map_single(priv->dev, (void *)data->src, num_bytes,
+ DMA_TO_DEVICE);
}
+ if (dma_mapping_error(priv->dev, idmabase0))
+ return DMA_ERROR_CODE;
+
/* Set the SDMMC DataLength value */
writel(data_length, priv->base + SDMMC_DLEN);
/* Write to SDMMC DCTRL */
writel(data_ctrl, priv->base + SDMMC_DCTRL);
- if (data->flags & MMC_DATA_WRITE)
- dma_sync_single_for_device(priv->dev, (unsigned long)idmabase0,
- num_bytes, DMA_TO_DEVICE);
- else
- dma_sync_single_for_device(priv->dev, (unsigned long)idmabase0,
- num_bytes, DMA_FROM_DEVICE);
-
/* Enable internal DMA */
writel(idmabase0, priv->base + SDMMC_IDMABASE0);
writel(SDMMC_IDMACTRL_IDMAEN, priv->base + SDMMC_IDMACTRL);
+
+ return idmabase0;
}
static void stm32_sdmmc2_start_cmd(struct stm32_sdmmc2_priv *priv,
@@ -415,7 +416,8 @@ static int stm32_sdmmc2_end_cmd(struct stm32_sdmmc2_priv *priv,
static int stm32_sdmmc2_end_data(struct stm32_sdmmc2_priv *priv,
struct mci_cmd *cmd,
- struct mci_data *data)
+ struct mci_data *data,
+ dma_addr_t dma_addr)
{
u32 mask = SDMMC_STA_DCRCFAIL | SDMMC_STA_DTIMEOUT |
SDMMC_STA_IDMATE | SDMMC_STA_DATAEND;
@@ -436,12 +438,10 @@ static int stm32_sdmmc2_end_data(struct stm32_sdmmc2_priv *priv,
return ret;
}
- if (data->flags & MMC_DATA_WRITE)
- dma_sync_single_for_cpu(priv->dev, (unsigned long)data->src,
- num_bytes, DMA_TO_DEVICE);
+ if (data->flags & MMC_DATA_READ)
+ dma_unmap_single(priv->dev, dma_addr, num_bytes, DMA_FROM_DEVICE);
else
- dma_sync_single_for_cpu(priv->dev, (unsigned long)data->dest,
- num_bytes, DMA_FROM_DEVICE);
+ dma_unmap_single(priv->dev, dma_addr, num_bytes, DMA_TO_DEVICE);
if (status & SDMMC_STA_DCRCFAIL) {
dev_err(priv->dev, "error SDMMC_STA_DCRCFAIL (0x%x) for cmd %d\n",
@@ -481,12 +481,15 @@ static int stm32_sdmmc2_send_cmd(struct mci_host *mci, struct mci_cmd *cmd,
{
struct stm32_sdmmc2_priv *priv = to_mci_host(mci);
u32 cmdat = data ? SDMMC_CMD_CMDTRANS : 0;
+ dma_addr_t dma_addr = DMA_ERROR_CODE;
u32 data_length = 0;
int ret;
if (data) {
data_length = data->blocks * data->blocksize;
- stm32_sdmmc2_start_data(priv, data, data_length);
+ dma_addr = stm32_sdmmc2_start_data(priv, data, data_length);
+ if (dma_addr == DMA_ERROR_CODE)
+ return -EFAULT;
}
stm32_sdmmc2_start_cmd(priv, cmd, cmdat, data_length);
@@ -497,7 +500,7 @@ static int stm32_sdmmc2_send_cmd(struct mci_host *mci, struct mci_cmd *cmd,
ret = stm32_sdmmc2_end_cmd(priv, cmd);
if (data && !ret)
- ret = stm32_sdmmc2_end_data(priv, cmd, data);
+ ret = stm32_sdmmc2_end_data(priv, cmd, data, dma_addr);
/* Clear flags */
writel(SDMMC_ICR_STATIC_FLAGS, priv->base + SDMMC_ICR);
--
2.39.2
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH 0/4] dma: catch mistakes with CONFIG_DMA_API_DEBUG
2023-11-29 6:17 [PATCH 0/4] dma: catch mistakes with CONFIG_DMA_API_DEBUG Ahmad Fatoum
` (3 preceding siblings ...)
2023-11-29 6:17 ` [PATCH 4/4] mci: stm32_sdmmc2: correct usage of DMA API Ahmad Fatoum
@ 2023-12-05 7:52 ` Sascha Hauer
4 siblings, 0 replies; 8+ messages in thread
From: Sascha Hauer @ 2023-12-05 7:52 UTC (permalink / raw)
To: Ahmad Fatoum; +Cc: barebox, Denis Orlov, str, lst
On Wed, Nov 29, 2023 at 07:17:54AM +0100, Ahmad Fatoum wrote:
> Cache invalidation issues around DMA accesses can be difficult to debug.
> Motivated by recent fixes to the macb driver[1], let's add some optional
> sanity checking to the DMA API inspired by the Linux CONFIG_DMA_API_DEBUG
> option.
>
> This would have caught the issue fixed by [1] in the macb driver and it
> already caught a misuse of the API on the STM32MP system I tested it on.
>
> Usage is simple: just enable it and ensure no warnings are printed.
> All warnings are printed alongside the extents of the DMA buffer in
> question and a stack trace at the time the check failed.
>
> [1]: https://lore.barebox.org/barebox/20231128-v2023-08-0-topic-macb-v1-0-9faff73bc990@pengutronix.de/T/#t
>
> Ahmad Fatoum (4):
> dma: factor out dma map generic implementations into file
> dma: add DMA API debugging support
> mci: core: remove broken, unneeded write bounce buffer
> mci: stm32_sdmmc2: correct usage of DMA API
Applied, thanks
Sascha
>
> common/Kconfig | 14 +++
> drivers/dma/Makefile | 2 +
> drivers/dma/debug.c | 183 +++++++++++++++++++++++++++++++++++++
> drivers/dma/debug.h | 56 ++++++++++++
> drivers/dma/map.c | 43 +++++++++
> drivers/mci/mci-core.c | 10 +-
> drivers/mci/stm32_sdmmc2.c | 41 +++++----
> include/dma.h | 61 ++++---------
> 8 files changed, 340 insertions(+), 70 deletions(-)
> create mode 100644 drivers/dma/debug.c
> create mode 100644 drivers/dma/debug.h
> create mode 100644 drivers/dma/map.c
>
> --
> 2.39.2
>
>
>
--
Pengutronix e.K. | |
Steuerwalder Str. 21 | http://www.pengutronix.de/ |
31137 Hildesheim, Germany | Phone: +49-5121-206917-0 |
Amtsgericht Hildesheim, HRA 2686 | Fax: +49-5121-206917-5555 |
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH 1/4] dma: factor out dma map generic implementations into file
2023-11-29 6:17 ` [PATCH 1/4] dma: factor out dma map generic implementations into file Ahmad Fatoum
@ 2023-12-05 8:37 ` Sascha Hauer
2023-12-05 8:42 ` Ahmad Fatoum
0 siblings, 1 reply; 8+ messages in thread
From: Sascha Hauer @ 2023-12-05 8:37 UTC (permalink / raw)
To: Ahmad Fatoum; +Cc: barebox, Denis Orlov, str, lst
On Wed, Nov 29, 2023 at 07:17:55AM +0100, Ahmad Fatoum wrote:
> In preparation for adding optional debugging code for the DMA mapping
> API, move the definition out of the header file into a source file.
>
> Signed-off-by: Ahmad Fatoum <a.fatoum@pengutronix.de>
> ---
> drivers/dma/Makefile | 1 +
> drivers/dma/map.c | 32 +++++++++++++++++++++++
> include/dma.h | 61 ++++++++++++++------------------------------
> 3 files changed, 52 insertions(+), 42 deletions(-)
> create mode 100644 drivers/dma/map.c
>
> diff --git a/drivers/dma/Makefile b/drivers/dma/Makefile
> index 8e1aac9f6f67..e45476c23f14 100644
> --- a/drivers/dma/Makefile
> +++ b/drivers/dma/Makefile
> @@ -1,2 +1,3 @@
> # SPDX-License-Identifier: GPL-2.0-only
> +obj-$(CONFIG_HAS_DMA) += map.o
> obj-$(CONFIG_MXS_APBH_DMA) += apbh_dma.o
> diff --git a/drivers/dma/map.c b/drivers/dma/map.c
> new file mode 100644
> index 000000000000..270a4899fd05
> --- /dev/null
> +++ b/drivers/dma/map.c
> @@ -0,0 +1,32 @@
> +/* SPDX-License-Identifier: GPL-2.0-only */
> +#include <dma.h>
> +
> +void dma_sync_single_for_cpu(struct device *dev, dma_addr_t address,
> + size_t size, enum dma_data_direction dir)
> +{
> + void *ptr = dma_to_cpu(dev, address);
> +
> + arch_sync_dma_for_cpu(ptr, size, dir);
> +}
> +
> +void dma_sync_single_for_device(struct device *dev, dma_addr_t address,
> + size_t size, enum dma_data_direction dir)
> +{
> + void *ptr = dma_to_cpu(dev, address);
> +
> + arch_sync_dma_for_device(ptr, size, dir);
> +}
> +
> +dma_addr_t dma_map_single(struct device *dev, void *ptr,
> + size_t size, enum dma_data_direction dir)
> +{
> + arch_sync_dma_for_device(ptr, size, dir);
> +
> + return cpu_to_dma(dev, ptr);
> +}
> +
> +void dma_unmap_single(struct device *dev, dma_addr_t dma_addr,
> + size_t size, enum dma_data_direction dir)
> +{
> + dma_sync_single_for_cpu(dev, dma_addr, size, dir);
> +}
> diff --git a/include/dma.h b/include/dma.h
> index 2a09b747d1e2..6eef55a7325d 100644
> --- a/include/dma.h
> +++ b/include/dma.h
> @@ -68,8 +68,6 @@ static inline void *dma_to_cpu(struct device *dev, dma_addr_t addr)
> return phys_to_virt(addr);
> }
>
> -#ifndef __PBL__
> -/* streaming DMA - implement the below calls to support HAS_DMA */
> #ifndef arch_sync_dma_for_cpu
> void arch_sync_dma_for_cpu(void *vaddr, size_t size,
> enum dma_data_direction dir);
> @@ -79,57 +77,36 @@ void arch_sync_dma_for_cpu(void *vaddr, size_t size,
> void arch_sync_dma_for_device(void *vaddr, size_t size,
> enum dma_data_direction dir);
> #endif
> +
> +#ifndef __PBL__
> +void dma_sync_single_for_cpu(struct device *dev, dma_addr_t address,
> + size_t size, enum dma_data_direction dir);
> +
> +void dma_sync_single_for_device(struct device *dev, dma_addr_t address,
> + size_t size, enum dma_data_direction dir);
> #else
> -#ifndef arch_sync_dma_for_cpu
> /*
> * assumes buffers are in coherent/uncached memory, e.g. because
> * MMU is only enabled in barebox_arm_entry which hasn't run yet.
> */
> -static inline void arch_sync_dma_for_cpu(void *vaddr, size_t size,
> - enum dma_data_direction dir)
> +static inline void dma_sync_single_for_cpu(void *vaddr, size_t size,
> + enum dma_data_direction dir)
> +{
> + barrier_data(vaddr);
> +}
> +
> +static inline void dma_sync_single_for_device(void *vaddr, size_t size,
> + enum dma_data_direction dir)
> {
> barrier_data(vaddr);
> }
The prototypes are wrong here. Should be
static inline void dma_sync_single_for_device(struct device *dev, dma_addr_t address,
size_t size, enum dma_data_direction dir)
Fixed this.
Sascha
--
Pengutronix e.K. | |
Steuerwalder Str. 21 | http://www.pengutronix.de/ |
31137 Hildesheim, Germany | Phone: +49-5121-206917-0 |
Amtsgericht Hildesheim, HRA 2686 | Fax: +49-5121-206917-5555 |
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH 1/4] dma: factor out dma map generic implementations into file
2023-12-05 8:37 ` Sascha Hauer
@ 2023-12-05 8:42 ` Ahmad Fatoum
0 siblings, 0 replies; 8+ messages in thread
From: Ahmad Fatoum @ 2023-12-05 8:42 UTC (permalink / raw)
To: Sascha Hauer; +Cc: barebox, Denis Orlov, str, lst
On 05.12.23 09:37, Sascha Hauer wrote:
> On Wed, Nov 29, 2023 at 07:17:55AM +0100, Ahmad Fatoum wrote:
>> In preparation for adding optional debugging code for the DMA mapping
>> API, move the definition out of the header file into a source file.
>>
>> Signed-off-by: Ahmad Fatoum <a.fatoum@pengutronix.de>
>> ---
>> drivers/dma/Makefile | 1 +
>> drivers/dma/map.c | 32 +++++++++++++++++++++++
>> include/dma.h | 61 ++++++++++++++------------------------------
>> 3 files changed, 52 insertions(+), 42 deletions(-)
>> create mode 100644 drivers/dma/map.c
>>
>> diff --git a/drivers/dma/Makefile b/drivers/dma/Makefile
>> index 8e1aac9f6f67..e45476c23f14 100644
>> --- a/drivers/dma/Makefile
>> +++ b/drivers/dma/Makefile
>> @@ -1,2 +1,3 @@
>> # SPDX-License-Identifier: GPL-2.0-only
>> +obj-$(CONFIG_HAS_DMA) += map.o
>> obj-$(CONFIG_MXS_APBH_DMA) += apbh_dma.o
>> diff --git a/drivers/dma/map.c b/drivers/dma/map.c
>> new file mode 100644
>> index 000000000000..270a4899fd05
>> --- /dev/null
>> +++ b/drivers/dma/map.c
>> @@ -0,0 +1,32 @@
>> +/* SPDX-License-Identifier: GPL-2.0-only */
>> +#include <dma.h>
>> +
>> +void dma_sync_single_for_cpu(struct device *dev, dma_addr_t address,
>> + size_t size, enum dma_data_direction dir)
>> +{
>> + void *ptr = dma_to_cpu(dev, address);
>> +
>> + arch_sync_dma_for_cpu(ptr, size, dir);
>> +}
>> +
>> +void dma_sync_single_for_device(struct device *dev, dma_addr_t address,
>> + size_t size, enum dma_data_direction dir)
>> +{
>> + void *ptr = dma_to_cpu(dev, address);
>> +
>> + arch_sync_dma_for_device(ptr, size, dir);
>> +}
>> +
>> +dma_addr_t dma_map_single(struct device *dev, void *ptr,
>> + size_t size, enum dma_data_direction dir)
>> +{
>> + arch_sync_dma_for_device(ptr, size, dir);
>> +
>> + return cpu_to_dma(dev, ptr);
>> +}
>> +
>> +void dma_unmap_single(struct device *dev, dma_addr_t dma_addr,
>> + size_t size, enum dma_data_direction dir)
>> +{
>> + dma_sync_single_for_cpu(dev, dma_addr, size, dir);
>> +}
>> diff --git a/include/dma.h b/include/dma.h
>> index 2a09b747d1e2..6eef55a7325d 100644
>> --- a/include/dma.h
>> +++ b/include/dma.h
>> @@ -68,8 +68,6 @@ static inline void *dma_to_cpu(struct device *dev, dma_addr_t addr)
>> return phys_to_virt(addr);
>> }
>>
>> -#ifndef __PBL__
>> -/* streaming DMA - implement the below calls to support HAS_DMA */
>> #ifndef arch_sync_dma_for_cpu
>> void arch_sync_dma_for_cpu(void *vaddr, size_t size,
>> enum dma_data_direction dir);
>> @@ -79,57 +77,36 @@ void arch_sync_dma_for_cpu(void *vaddr, size_t size,
>> void arch_sync_dma_for_device(void *vaddr, size_t size,
>> enum dma_data_direction dir);
>> #endif
>> +
>> +#ifndef __PBL__
>> +void dma_sync_single_for_cpu(struct device *dev, dma_addr_t address,
>> + size_t size, enum dma_data_direction dir);
>> +
>> +void dma_sync_single_for_device(struct device *dev, dma_addr_t address,
>> + size_t size, enum dma_data_direction dir);
>> #else
>> -#ifndef arch_sync_dma_for_cpu
>> /*
>> * assumes buffers are in coherent/uncached memory, e.g. because
>> * MMU is only enabled in barebox_arm_entry which hasn't run yet.
>> */
>> -static inline void arch_sync_dma_for_cpu(void *vaddr, size_t size,
>> - enum dma_data_direction dir)
>> +static inline void dma_sync_single_for_cpu(void *vaddr, size_t size,
>> + enum dma_data_direction dir)
>> +{
>> + barrier_data(vaddr);
>> +}
>> +
>> +static inline void dma_sync_single_for_device(void *vaddr, size_t size,
>> + enum dma_data_direction dir)
>> {
>> barrier_data(vaddr);
>> }
>
> The prototypes are wrong here. Should be
>
> static inline void dma_sync_single_for_device(struct device *dev, dma_addr_t address,
> size_t size, enum dma_data_direction dir)
>
> Fixed this.
Thanks,
Ahmad
>
> Sascha
>
--
Pengutronix e.K. | |
Steuerwalder Str. 21 | http://www.pengutronix.de/ |
31137 Hildesheim, Germany | Phone: +49-5121-206917-0 |
Amtsgericht Hildesheim, HRA 2686 | Fax: +49-5121-206917-5555 |
^ permalink raw reply [flat|nested] 8+ messages in thread
end of thread, other threads:[~2023-12-05 8:43 UTC | newest]
Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-11-29 6:17 [PATCH 0/4] dma: catch mistakes with CONFIG_DMA_API_DEBUG Ahmad Fatoum
2023-11-29 6:17 ` [PATCH 1/4] dma: factor out dma map generic implementations into file Ahmad Fatoum
2023-12-05 8:37 ` Sascha Hauer
2023-12-05 8:42 ` Ahmad Fatoum
2023-11-29 6:17 ` [PATCH 2/4] dma: add DMA API debugging support Ahmad Fatoum
2023-11-29 6:17 ` [PATCH 3/4] mci: core: remove broken, unneeded write bounce buffer Ahmad Fatoum
2023-11-29 6:17 ` [PATCH 4/4] mci: stm32_sdmmc2: correct usage of DMA API Ahmad Fatoum
2023-12-05 7:52 ` [PATCH 0/4] dma: catch mistakes with CONFIG_DMA_API_DEBUG Sascha Hauer
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox