* [PATCH 00/22] ARM: mmu: refactor 32-bit and 64-bit code
@ 2025-08-06 12:36 Ahmad Fatoum
2025-08-06 12:36 ` [PATCH 01/22] ARM: mmu: introduce new maptype_t type Ahmad Fatoum
` (22 more replies)
0 siblings, 23 replies; 24+ messages in thread
From: Ahmad Fatoum @ 2025-08-06 12:36 UTC (permalink / raw)
To: barebox
There is duplication and subtle differences between the 32-bit and
64-bit MMU code for historical reasons.
Let's refactor the code for more similarity, implement
flush_cacheable_pages for 32-bit and prepare for observing
break-before-make requirements.
Ahmad Fatoum (22):
ARM: mmu: introduce new maptype_t type
ARM: mmu: compare only lowest 16 bits for map type
ARM: mmu: prefix pre-MMU functions with early_
ARM: mmu: panic when alloc_pte fails
ARM: mmu32: introduce new mmu_addr_t type
ARM: mmu: provide zero page control in PBL
ARM: mmu: print map type as string
ARM: mmu64: rename create_sections to __arch_remap_range
ARM: mmu: move get_pte_attrs call into __arch_remap_range
ARM: mmu64: print debug message in __arch_remap_range
ARM: mmu: make force_pages a maptype_t flag
ARM: mmu64: move granule_size to the top of the file
ARM: mmu64: fix benign off-by-one in flush_cacheable_pages
ARM: mmu64: make flush_cacheable_pages less 64-bit dependent
ARM: mmu64: allow asserting last level page in __find_pte
ARM: mmu64: rename __find_pte to find_pte
ARM: mmu32: rework find_pte to have ARM64 find_pte semantics
ARM: mmu64: factor out flush_cacheable_pages for reusability
ARM: mmu32: flush only cacheable pages on remap
ARM: mmu32: factor out set_pte_range helper
ARM: mmu64: factor out set_pte_range helper
ARM: mmu: define dma_alloc_writecombine in common code
arch/arm/cpu/Makefile | 2 +-
arch/arm/cpu/flush_cacheable_pages.h | 77 +++++
arch/arm/cpu/mmu-common.c | 23 +-
arch/arm/cpu/mmu-common.h | 29 +-
arch/arm/cpu/mmu_32.c | 237 +++++++++-----
arch/arm/cpu/mmu_32.h | 2 +
arch/arm/cpu/mmu_64.c | 445 ++++++++++++---------------
arch/arm/cpu/mmu_64.h | 2 +
arch/arm/include/asm/mmu.h | 2 +-
arch/powerpc/cpu-85xx/mmu.c | 4 +-
arch/powerpc/include/asm/mmu.h | 2 +-
commands/memtest.c | 8 +-
include/linux/types.h | 2 +
include/mmu.h | 25 +-
include/zero_page.h | 2 +-
15 files changed, 509 insertions(+), 353 deletions(-)
create mode 100644 arch/arm/cpu/flush_cacheable_pages.h
--
2.39.5
^ permalink raw reply [flat|nested] 24+ messages in thread
* [PATCH 01/22] ARM: mmu: introduce new maptype_t type
2025-08-06 12:36 [PATCH 00/22] ARM: mmu: refactor 32-bit and 64-bit code Ahmad Fatoum
@ 2025-08-06 12:36 ` Ahmad Fatoum
2025-08-06 12:36 ` [PATCH 02/22] ARM: mmu: compare only lowest 16 bits for map type Ahmad Fatoum
` (21 subsequent siblings)
22 siblings, 0 replies; 24+ messages in thread
From: Ahmad Fatoum @ 2025-08-06 12:36 UTC (permalink / raw)
To: barebox; +Cc: Ahmad Fatoum
The unsigned flags parameter in a lot of the MMU functions is ambiguous
as it's used for both the generic map type and the actual
architecture-specific page table entry attributes.
Clear this up by using maptype_t as type for all generic mapping types
and by rewording the parameter name where appropriate.
Signed-off-by: Ahmad Fatoum <a.fatoum@pengutronix.de>
---
arch/arm/cpu/mmu-common.c | 4 ++--
arch/arm/cpu/mmu-common.h | 4 ++--
arch/arm/cpu/mmu_32.c | 10 +++++-----
arch/arm/cpu/mmu_64.c | 16 ++++++++--------
arch/arm/include/asm/mmu.h | 2 +-
arch/powerpc/cpu-85xx/mmu.c | 2 +-
arch/powerpc/include/asm/mmu.h | 2 +-
commands/memtest.c | 8 ++++----
include/linux/types.h | 2 ++
include/mmu.h | 8 ++++----
10 files changed, 30 insertions(+), 28 deletions(-)
diff --git a/arch/arm/cpu/mmu-common.c b/arch/arm/cpu/mmu-common.c
index 1de20d931876..2e61dc36b7cc 100644
--- a/arch/arm/cpu/mmu-common.c
+++ b/arch/arm/cpu/mmu-common.c
@@ -24,7 +24,7 @@ void arch_sync_dma_for_cpu(void *vaddr, size_t size,
}
void *dma_alloc_map(struct device *dev,
- size_t size, dma_addr_t *dma_handle, unsigned flags)
+ size_t size, dma_addr_t *dma_handle, maptype_t map_type)
{
void *ret;
@@ -36,7 +36,7 @@ void *dma_alloc_map(struct device *dev,
memset(ret, 0, size);
dma_flush_range(ret, size);
- remap_range(ret, size, flags);
+ remap_range(ret, size, map_type);
return ret;
}
diff --git a/arch/arm/cpu/mmu-common.h b/arch/arm/cpu/mmu-common.h
index f76c7c4c38d6..a545958b5cc2 100644
--- a/arch/arm/cpu/mmu-common.h
+++ b/arch/arm/cpu/mmu-common.h
@@ -17,11 +17,11 @@ struct device;
void dma_inv_range(void *ptr, size_t size);
void dma_flush_range(void *ptr, size_t size);
-void *dma_alloc_map(struct device *dev, size_t size, dma_addr_t *dma_handle, unsigned flags);
+void *dma_alloc_map(struct device *dev, size_t size, dma_addr_t *dma_handle, maptype_t map_type);
void setup_trap_pages(void);
void __mmu_init(bool mmu_on);
-static inline unsigned arm_mmu_maybe_skip_permissions(unsigned map_type)
+static inline maptype_t arm_mmu_maybe_skip_permissions(maptype_t map_type)
{
if (IS_ENABLED(CONFIG_ARM_MMU_PERMISSIONS))
return map_type;
diff --git a/arch/arm/cpu/mmu_32.c b/arch/arm/cpu/mmu_32.c
index 985a063bbdda..8d1343b5d7d7 100644
--- a/arch/arm/cpu/mmu_32.c
+++ b/arch/arm/cpu/mmu_32.c
@@ -223,7 +223,7 @@ static u32 pte_flags_to_pmd(u32 pte)
return pmd;
}
-static uint32_t get_pte_flags(int map_type)
+static uint32_t get_pte_flags(maptype_t map_type)
{
if (cpu_architecture() >= CPU_ARCH_ARMv7) {
switch (map_type) {
@@ -261,13 +261,13 @@ static uint32_t get_pte_flags(int map_type)
}
}
-static uint32_t get_pmd_flags(int map_type)
+static uint32_t get_pmd_flags(maptype_t map_type)
{
return pte_flags_to_pmd(get_pte_flags(map_type));
}
static void __arch_remap_range(void *_virt_addr, phys_addr_t phys_addr, size_t size,
- unsigned map_type, bool force_pages)
+ maptype_t map_type, bool force_pages)
{
u32 virt_addr = (u32)_virt_addr;
u32 pte_flags, pmd_flags;
@@ -364,12 +364,12 @@ static void __arch_remap_range(void *_virt_addr, phys_addr_t phys_addr, size_t s
tlb_invalidate();
}
-static void early_remap_range(u32 addr, size_t size, unsigned map_type, bool force_pages)
+static void early_remap_range(u32 addr, size_t size, maptype_t map_type, bool force_pages)
{
__arch_remap_range((void *)addr, addr, size, map_type, force_pages);
}
-int arch_remap_range(void *virt_addr, phys_addr_t phys_addr, size_t size, unsigned map_type)
+int arch_remap_range(void *virt_addr, phys_addr_t phys_addr, size_t size, maptype_t map_type)
{
map_type = arm_mmu_maybe_skip_permissions(map_type);
diff --git a/arch/arm/cpu/mmu_64.c b/arch/arm/cpu/mmu_64.c
index e7d2e9697a7e..ad96bda702b8 100644
--- a/arch/arm/cpu/mmu_64.c
+++ b/arch/arm/cpu/mmu_64.c
@@ -287,9 +287,9 @@ static void flush_cacheable_pages(void *start, size_t size)
v8_flush_dcache_range(flush_start, flush_end);
}
-static unsigned long get_pte_attrs(unsigned flags)
+static unsigned long get_pte_attrs(maptype_t map_type)
{
- switch (flags) {
+ switch (map_type) {
case MAP_CACHED:
return attrs_xn() | CACHED_MEM;
case MAP_UNCACHED:
@@ -309,9 +309,9 @@ static unsigned long get_pte_attrs(unsigned flags)
}
}
-static void early_remap_range(uint64_t addr, size_t size, unsigned flags, bool force_pages)
+static void early_remap_range(uint64_t addr, size_t size, maptype_t map_type, bool force_pages)
{
- unsigned long attrs = get_pte_attrs(flags);
+ unsigned long attrs = get_pte_attrs(map_type);
if (WARN_ON(attrs == ~0UL))
return;
@@ -319,18 +319,18 @@ static void early_remap_range(uint64_t addr, size_t size, unsigned flags, bool f
create_sections(addr, addr, size, attrs, force_pages);
}
-int arch_remap_range(void *virt_addr, phys_addr_t phys_addr, size_t size, unsigned flags)
+int arch_remap_range(void *virt_addr, phys_addr_t phys_addr, size_t size, maptype_t map_type)
{
unsigned long attrs;
- flags = arm_mmu_maybe_skip_permissions(flags);
+ map_type = arm_mmu_maybe_skip_permissions(map_type);
- attrs = get_pte_attrs(flags);
+ attrs = get_pte_attrs(map_type);
if (attrs == ~0UL)
return -EINVAL;
- if (flags != MAP_CACHED)
+ if (map_type != MAP_CACHED)
flush_cacheable_pages(virt_addr, size);
create_sections((uint64_t)virt_addr, phys_addr, (uint64_t)size, attrs, false);
diff --git a/arch/arm/include/asm/mmu.h b/arch/arm/include/asm/mmu.h
index 5538cd3558e8..eef6c53b5912 100644
--- a/arch/arm/include/asm/mmu.h
+++ b/arch/arm/include/asm/mmu.h
@@ -23,7 +23,7 @@ static inline void setup_dma_coherent(unsigned long offset)
#ifdef CONFIG_MMU
#define ARCH_HAS_REMAP
#define MAP_ARCH_DEFAULT MAP_CACHED
-int arch_remap_range(void *virt_addr, phys_addr_t phys_addr, size_t size, unsigned flags);
+int arch_remap_range(void *virt_addr, phys_addr_t phys_addr, size_t size, maptype_t map_type);
void *map_io_sections(unsigned long physaddr, void *start, size_t size);
#else
#define MAP_ARCH_DEFAULT MAP_UNCACHED
diff --git a/arch/powerpc/cpu-85xx/mmu.c b/arch/powerpc/cpu-85xx/mmu.c
index 3bd75281eb98..5fe9ba9db6d8 100644
--- a/arch/powerpc/cpu-85xx/mmu.c
+++ b/arch/powerpc/cpu-85xx/mmu.c
@@ -17,7 +17,7 @@
#include <mmu.h>
#include <mach/mmu.h>
-int arch_remap_range(void *virt_addr, phys_addr_t phys_addr, size_t size, unsigned flags)
+int arch_remap_range(void *virt_addr, phys_addr_t phys_addr, size_t size, maptype_t flags)
{
uint32_t ptr, start, tsize, valid, wimge, pte_flags;
unsigned long epn;
diff --git a/arch/powerpc/include/asm/mmu.h b/arch/powerpc/include/asm/mmu.h
index 10b15a47b9aa..288c5903a277 100644
--- a/arch/powerpc/include/asm/mmu.h
+++ b/arch/powerpc/include/asm/mmu.h
@@ -563,7 +563,7 @@ extern int write_bat(ppc_bat_t bat, unsigned long upper, unsigned long lower);
#ifdef CONFIG_MMU
#define ARCH_HAS_REMAP
-int arch_remap_range(void *virt_addr, phys_addr_t phys_addr, size_t size, unsigned flags);
+int arch_remap_range(void *virt_addr, phys_addr_t phys_addr, size_t size, maptype_t flags);
#endif
#endif
diff --git a/commands/memtest.c b/commands/memtest.c
index 9fa148b3aa41..cc7e3c29ad06 100644
--- a/commands/memtest.c
+++ b/commands/memtest.c
@@ -13,7 +13,7 @@
#include <mmu.h>
static int do_test_one_area(struct mem_test_resource *r, int bus_only,
- unsigned cache_flag)
+ maptype_t cache_flag)
{
unsigned flags = MEMTEST_VERBOSE;
int ret;
@@ -39,7 +39,7 @@ static int do_test_one_area(struct mem_test_resource *r, int bus_only,
}
static int do_memtest_thorough(struct list_head *memtest_regions,
- int bus_only, unsigned cache_flag)
+ int bus_only, maptype_t cache_flag)
{
struct mem_test_resource *r;
int ret;
@@ -54,7 +54,7 @@ static int do_memtest_thorough(struct list_head *memtest_regions,
}
static int do_memtest_biggest(struct list_head *memtest_regions,
- int bus_only, unsigned cache_flag)
+ int bus_only, maptype_t cache_flag)
{
struct mem_test_resource *r;
@@ -70,7 +70,7 @@ static int do_memtest(int argc, char *argv[])
int bus_only = 0, ret, opt;
uint32_t i, max_i = 1;
struct list_head memtest_used_regions;
- int (*memtest)(struct list_head *, int, unsigned);
+ int (*memtest)(struct list_head *, int, maptype_t);
int cached = 0, uncached = 0;
memtest = do_memtest_biggest;
diff --git a/include/linux/types.h b/include/linux/types.h
index c5e38fee9595..93e7fe46295f 100644
--- a/include/linux/types.h
+++ b/include/linux/types.h
@@ -198,6 +198,8 @@ typedef u32 phys_size_t;
typedef phys_addr_t resource_size_t;
+typedef unsigned maptype_t;
+
struct ustat {
__kernel_daddr_t f_tfree;
__kernel_ino_t f_tinode;
diff --git a/include/mmu.h b/include/mmu.h
index 17c04d2fa05f..db8453f58521 100644
--- a/include/mmu.h
+++ b/include/mmu.h
@@ -27,9 +27,9 @@
#ifndef ARCH_HAS_REMAP
static inline int arch_remap_range(void *virt_addr, phys_addr_t phys_addr,
- size_t size, unsigned flags)
+ size_t size, maptype_t map_type)
{
- if (flags == MAP_ARCH_DEFAULT && phys_addr == virt_to_phys(virt_addr))
+ if (map_type == MAP_ARCH_DEFAULT && phys_addr == virt_to_phys(virt_addr))
return 0;
return -EINVAL;
@@ -46,9 +46,9 @@ static inline bool arch_can_remap(void)
}
#endif
-static inline int remap_range(void *start, size_t size, unsigned flags)
+static inline int remap_range(void *start, size_t size, maptype_t map_type)
{
- return arch_remap_range(start, virt_to_phys(start), size, flags);
+ return arch_remap_range(start, virt_to_phys(start), size, map_type);
}
#ifdef CONFIG_MMUINFO
--
2.39.5
^ permalink raw reply [flat|nested] 24+ messages in thread
* [PATCH 02/22] ARM: mmu: compare only lowest 16 bits for map type
2025-08-06 12:36 [PATCH 00/22] ARM: mmu: refactor 32-bit and 64-bit code Ahmad Fatoum
2025-08-06 12:36 ` [PATCH 01/22] ARM: mmu: introduce new maptype_t type Ahmad Fatoum
@ 2025-08-06 12:36 ` Ahmad Fatoum
2025-08-06 12:36 ` [PATCH 03/22] ARM: mmu: prefix pre-MMU functions with early_ Ahmad Fatoum
` (20 subsequent siblings)
22 siblings, 0 replies; 24+ messages in thread
From: Ahmad Fatoum @ 2025-08-06 12:36 UTC (permalink / raw)
To: barebox; +Cc: Ahmad Fatoum
Regions remapped as MAP_CODE are still cacheable, even if they aren't
MAP_CACHED. To handle that and to support a future use of flags in the
maptype_t, let's limit the existing memory type enumeration to the lower
16 bits and use a MAP_CODE/MAP_CACHED aware comparison helper.
Signed-off-by: Ahmad Fatoum <a.fatoum@pengutronix.de>
---
arch/arm/cpu/mmu-common.h | 6 +++---
arch/arm/cpu/mmu_32.c | 10 +++++-----
arch/arm/cpu/mmu_64.c | 4 ++--
arch/powerpc/cpu-85xx/mmu.c | 2 +-
include/mmu.h | 19 ++++++++++++++++++-
5 files changed, 29 insertions(+), 12 deletions(-)
diff --git a/arch/arm/cpu/mmu-common.h b/arch/arm/cpu/mmu-common.h
index a545958b5cc2..e9005dfae766 100644
--- a/arch/arm/cpu/mmu-common.h
+++ b/arch/arm/cpu/mmu-common.h
@@ -10,8 +10,8 @@
#include <linux/kernel.h>
#include <linux/sizes.h>
-#define ARCH_MAP_CACHED_RWX ((unsigned)-2)
-#define ARCH_MAP_CACHED_RO ((unsigned)-3)
+#define ARCH_MAP_CACHED_RWX MAP_ARCH(2)
+#define ARCH_MAP_CACHED_RO MAP_ARCH(3)
struct device;
@@ -26,7 +26,7 @@ static inline maptype_t arm_mmu_maybe_skip_permissions(maptype_t map_type)
if (IS_ENABLED(CONFIG_ARM_MMU_PERMISSIONS))
return map_type;
- switch (map_type) {
+ switch (map_type & MAP_TYPE_MASK) {
case MAP_CODE:
case MAP_CACHED:
case ARCH_MAP_CACHED_RO:
diff --git a/arch/arm/cpu/mmu_32.c b/arch/arm/cpu/mmu_32.c
index 8d1343b5d7d7..ae86c27e7e27 100644
--- a/arch/arm/cpu/mmu_32.c
+++ b/arch/arm/cpu/mmu_32.c
@@ -226,7 +226,7 @@ static u32 pte_flags_to_pmd(u32 pte)
static uint32_t get_pte_flags(maptype_t map_type)
{
if (cpu_architecture() >= CPU_ARCH_ARMv7) {
- switch (map_type) {
+ switch (map_type & MAP_TYPE_MASK) {
case ARCH_MAP_CACHED_RWX:
return PTE_FLAGS_CACHED_V7_RWX;
case ARCH_MAP_CACHED_RO:
@@ -244,7 +244,7 @@ static uint32_t get_pte_flags(maptype_t map_type)
return 0x0;
}
} else {
- switch (map_type) {
+ switch (map_type & MAP_TYPE_MASK) {
case ARCH_MAP_CACHED_RO:
case MAP_CODE:
return PTE_FLAGS_CACHED_RO_V4;
@@ -300,7 +300,7 @@ static void __arch_remap_range(void *_virt_addr, phys_addr_t phys_addr, size_t s
*/
chunk = PGDIR_SIZE;
val = phys_addr | pmd_flags;
- if (map_type != MAP_FAULT)
+ if (!maptype_is_compatible(map_type, MAP_FAULT))
val |= PMD_TYPE_SECT;
// TODO break-before-make missing
set_pte(pgd, val);
@@ -346,7 +346,7 @@ static void __arch_remap_range(void *_virt_addr, phys_addr_t phys_addr, size_t s
val = phys_addr + i * PAGE_SIZE;
val |= pte_flags;
- if (map_type != MAP_FAULT)
+ if (!maptype_is_compatible(map_type, MAP_FAULT))
val |= PTE_TYPE_SMALL;
// TODO break-before-make missing
@@ -375,7 +375,7 @@ int arch_remap_range(void *virt_addr, phys_addr_t phys_addr, size_t size, maptyp
__arch_remap_range(virt_addr, phys_addr, size, map_type, false);
- if (map_type == MAP_UNCACHED)
+ if (maptype_is_compatible(map_type, MAP_UNCACHED))
dma_inv_range(virt_addr, size);
return 0;
diff --git a/arch/arm/cpu/mmu_64.c b/arch/arm/cpu/mmu_64.c
index ad96bda702b8..9e8d36d94944 100644
--- a/arch/arm/cpu/mmu_64.c
+++ b/arch/arm/cpu/mmu_64.c
@@ -289,7 +289,7 @@ static void flush_cacheable_pages(void *start, size_t size)
static unsigned long get_pte_attrs(maptype_t map_type)
{
- switch (map_type) {
+ switch (map_type & MAP_TYPE_MASK) {
case MAP_CACHED:
return attrs_xn() | CACHED_MEM;
case MAP_UNCACHED:
@@ -330,7 +330,7 @@ int arch_remap_range(void *virt_addr, phys_addr_t phys_addr, size_t size, maptyp
if (attrs == ~0UL)
return -EINVAL;
- if (map_type != MAP_CACHED)
+ if (!maptype_is_compatible(map_type, MAP_CACHED))
flush_cacheable_pages(virt_addr, size);
create_sections((uint64_t)virt_addr, phys_addr, (uint64_t)size, attrs, false);
diff --git a/arch/powerpc/cpu-85xx/mmu.c b/arch/powerpc/cpu-85xx/mmu.c
index 5fe9ba9db6d8..eec4d3e05b56 100644
--- a/arch/powerpc/cpu-85xx/mmu.c
+++ b/arch/powerpc/cpu-85xx/mmu.c
@@ -27,7 +27,7 @@ int arch_remap_range(void *virt_addr, phys_addr_t phys_addr, size_t size, maptyp
if (phys_addr != virt_to_phys(virt_addr))
return -ENOSYS;
- switch (flags) {
+ switch (flags & MAP_TYPE_MASK) {
case MAP_UNCACHED:
pte_flags = MAS2_I;
break;
diff --git a/include/mmu.h b/include/mmu.h
index db8453f58521..29992ae1d6c6 100644
--- a/include/mmu.h
+++ b/include/mmu.h
@@ -16,6 +16,9 @@
#define MAP_WRITECOMBINE MAP_UNCACHED
#endif
+#define MAP_TYPE_MASK 0xFFFF
+#define MAP_ARCH(x) ((u16)~(x))
+
/*
* Depending on the architecture the default mapping can be
* cached or uncached. Without ARCH_HAS_REMAP being set this
@@ -25,11 +28,25 @@
#include <asm/mmu.h>
+static inline bool maptype_is_compatible(maptype_t active, maptype_t check)
+{
+ active &= MAP_TYPE_MASK;
+ check &= MAP_TYPE_MASK;
+
+ if (active == check)
+ return true;
+ if (active == MAP_CODE && check == MAP_CACHED)
+ return true;
+
+ return false;
+}
+
#ifndef ARCH_HAS_REMAP
static inline int arch_remap_range(void *virt_addr, phys_addr_t phys_addr,
size_t size, maptype_t map_type)
{
- if (map_type == MAP_ARCH_DEFAULT && phys_addr == virt_to_phys(virt_addr))
+ if (maptype_is_compatible(map_type, MAP_ARCH_DEFAULT) &&
+ phys_addr == virt_to_phys(virt_addr))
return 0;
return -EINVAL;
--
2.39.5
^ permalink raw reply [flat|nested] 24+ messages in thread
* [PATCH 03/22] ARM: mmu: prefix pre-MMU functions with early_
2025-08-06 12:36 [PATCH 00/22] ARM: mmu: refactor 32-bit and 64-bit code Ahmad Fatoum
2025-08-06 12:36 ` [PATCH 01/22] ARM: mmu: introduce new maptype_t type Ahmad Fatoum
2025-08-06 12:36 ` [PATCH 02/22] ARM: mmu: compare only lowest 16 bits for map type Ahmad Fatoum
@ 2025-08-06 12:36 ` Ahmad Fatoum
2025-08-06 12:36 ` [PATCH 04/22] ARM: mmu: panic when alloc_pte fails Ahmad Fatoum
` (19 subsequent siblings)
22 siblings, 0 replies; 24+ messages in thread
From: Ahmad Fatoum @ 2025-08-06 12:36 UTC (permalink / raw)
To: barebox; +Cc: Ahmad Fatoum
We already have early_remap_range for remapping operations done before
enabling the MMU. Let's use the same name scheme for all early page
table operations.
Signed-off-by: Ahmad Fatoum <a.fatoum@pengutronix.de>
---
arch/arm/cpu/mmu_32.c | 10 +++++-----
arch/arm/cpu/mmu_64.c | 4 ++--
2 files changed, 7 insertions(+), 7 deletions(-)
diff --git a/arch/arm/cpu/mmu_32.c b/arch/arm/cpu/mmu_32.c
index ae86c27e7e27..848d2d2b8c0b 100644
--- a/arch/arm/cpu/mmu_32.c
+++ b/arch/arm/cpu/mmu_32.c
@@ -381,8 +381,8 @@ int arch_remap_range(void *virt_addr, phys_addr_t phys_addr, size_t size, maptyp
return 0;
}
-static void create_sections(unsigned long first, unsigned long last,
- unsigned int flags)
+static void early_create_sections(unsigned long first, unsigned long last,
+ unsigned int flags)
{
uint32_t *ttb = get_ttb();
unsigned long ttb_start = pgd_index(first);
@@ -395,10 +395,10 @@ static void create_sections(unsigned long first, unsigned long last,
}
}
-static inline void create_flat_mapping(void)
+static inline void early_create_flat_mapping(void)
{
/* create a flat mapping using 1MiB sections */
- create_sections(0, 0xffffffff, attrs_uncached_mem());
+ early_create_sections(0, 0xffffffff, attrs_uncached_mem());
}
void *map_io_sections(unsigned long phys, void *_start, size_t size)
@@ -634,7 +634,7 @@ void mmu_early_enable(unsigned long membase, unsigned long memsize, unsigned lon
* This marks the whole address space as uncachable as well as
* unexecutable if possible
*/
- create_flat_mapping();
+ early_create_flat_mapping();
/* maps main memory as cachable */
optee_start = membase + memsize - OPTEE_SIZE;
diff --git a/arch/arm/cpu/mmu_64.c b/arch/arm/cpu/mmu_64.c
index 9e8d36d94944..83738ed6ad0d 100644
--- a/arch/arm/cpu/mmu_64.c
+++ b/arch/arm/cpu/mmu_64.c
@@ -424,7 +424,7 @@ void *dma_alloc_writecombine(struct device *dev, size_t size, dma_addr_t *dma_ha
return dma_alloc_map(dev, size, dma_handle, MAP_WRITECOMBINE);
}
-static void init_range(size_t total_level0_tables)
+static void early_init_range(size_t total_level0_tables)
{
uint64_t *ttb = get_ttb();
uint64_t addr = 0;
@@ -460,7 +460,7 @@ void mmu_early_enable(unsigned long membase, unsigned long memsize, unsigned lon
* Assume maximum BITS_PER_PA set to 40 bits.
* Set 1:1 mapping of VA->PA. So to cover the full 1TB range we need 2 tables.
*/
- init_range(2);
+ early_init_range(2);
early_remap_range(membase, memsize, ARCH_MAP_CACHED_RWX, false);
--
2.39.5
^ permalink raw reply [flat|nested] 24+ messages in thread
* [PATCH 04/22] ARM: mmu: panic when alloc_pte fails
2025-08-06 12:36 [PATCH 00/22] ARM: mmu: refactor 32-bit and 64-bit code Ahmad Fatoum
` (2 preceding siblings ...)
2025-08-06 12:36 ` [PATCH 03/22] ARM: mmu: prefix pre-MMU functions with early_ Ahmad Fatoum
@ 2025-08-06 12:36 ` Ahmad Fatoum
2025-08-06 12:36 ` [PATCH 05/22] ARM: mmu32: introduce new mmu_addr_t type Ahmad Fatoum
` (18 subsequent siblings)
22 siblings, 0 replies; 24+ messages in thread
From: Ahmad Fatoum @ 2025-08-06 12:36 UTC (permalink / raw)
To: barebox; +Cc: Ahmad Fatoum
We don't check the return value of alloc_pte for ARM32 PBL, but we do on ARM64
PBL as well as in barebox proper.
Let's unify behavior by panicing in alloc_pte right away if we run out of
memory.
Signed-off-by: Ahmad Fatoum <a.fatoum@pengutronix.de>
---
arch/arm/cpu/mmu_32.c | 3 +--
arch/arm/cpu/mmu_64.c | 6 +-----
2 files changed, 2 insertions(+), 7 deletions(-)
diff --git a/arch/arm/cpu/mmu_32.c b/arch/arm/cpu/mmu_32.c
index 848d2d2b8c0b..e631b741f107 100644
--- a/arch/arm/cpu/mmu_32.c
+++ b/arch/arm/cpu/mmu_32.c
@@ -83,8 +83,7 @@ static uint32_t *alloc_pte(void)
idx++;
- if (idx * PTE_SIZE >= ARM_EARLY_PAGETABLE_SIZE)
- return NULL;
+ BUG_ON(idx * PTE_SIZE >= ARM_EARLY_PAGETABLE_SIZE);
return get_ttb() + idx * PTE_SIZE;
}
diff --git a/arch/arm/cpu/mmu_64.c b/arch/arm/cpu/mmu_64.c
index 83738ed6ad0d..e2cc1a5caabd 100644
--- a/arch/arm/cpu/mmu_64.c
+++ b/arch/arm/cpu/mmu_64.c
@@ -46,8 +46,7 @@ static uint64_t *alloc_pte(void)
idx++;
- if (idx * GRANULE_SIZE >= ARM_EARLY_PAGETABLE_SIZE)
- return NULL;
+ BUG_ON(idx * GRANULE_SIZE >= ARM_EARLY_PAGETABLE_SIZE);
return (void *)get_ttb() + idx * GRANULE_SIZE;
}
@@ -109,9 +108,6 @@ static void split_block(uint64_t *pte, int level)
levelshift = level2shift(level + 1);
new_table = alloc_pte();
- if (!new_table)
- panic("Unable to allocate PTE\n");
-
for (i = 0; i < MAX_PTE_ENTRIES; i++) {
set_pte(&new_table[i], old_pte | (i << levelshift));
--
2.39.5
^ permalink raw reply [flat|nested] 24+ messages in thread
* [PATCH 05/22] ARM: mmu32: introduce new mmu_addr_t type
2025-08-06 12:36 [PATCH 00/22] ARM: mmu: refactor 32-bit and 64-bit code Ahmad Fatoum
` (3 preceding siblings ...)
2025-08-06 12:36 ` [PATCH 04/22] ARM: mmu: panic when alloc_pte fails Ahmad Fatoum
@ 2025-08-06 12:36 ` Ahmad Fatoum
2025-08-06 12:36 ` [PATCH 06/22] ARM: mmu: provide zero page control in PBL Ahmad Fatoum
` (17 subsequent siblings)
22 siblings, 0 replies; 24+ messages in thread
From: Ahmad Fatoum @ 2025-08-06 12:36 UTC (permalink / raw)
To: barebox; +Cc: Ahmad Fatoum
The ARM 32-bit and 64-bit MMU code makes heavy use of uint32_t and
uint64_t respectively.
We can't use ulong for common code between them as u32 is an unsigned long
and u64 is a unsigned long long and pointers to them can not be implicitly
converted. As we don't want to rewrite all MMU code to use ullong, let's
just define a new mmu_addr_t, which can be used in code that should be
callable from either ISA.
Signed-off-by: Ahmad Fatoum <a.fatoum@pengutronix.de>
---
arch/arm/cpu/mmu_32.h | 2 ++
arch/arm/cpu/mmu_64.h | 2 ++
2 files changed, 4 insertions(+)
diff --git a/arch/arm/cpu/mmu_32.h b/arch/arm/cpu/mmu_32.h
index 607d9e8608b2..7a58a819f08a 100644
--- a/arch/arm/cpu/mmu_32.h
+++ b/arch/arm/cpu/mmu_32.h
@@ -9,6 +9,8 @@
#include "mmu-common.h"
+typedef u32 mmu_addr_t;
+
#define PGDIR_SHIFT 20
#define PGDIR_SIZE (1UL << PGDIR_SHIFT)
diff --git a/arch/arm/cpu/mmu_64.h b/arch/arm/cpu/mmu_64.h
index d3c39dabb507..ad7c1bde5631 100644
--- a/arch/arm/cpu/mmu_64.h
+++ b/arch/arm/cpu/mmu_64.h
@@ -2,6 +2,8 @@
#include "mmu-common.h"
+typedef u64 mmu_addr_t;
+
#define CACHED_MEM (PTE_BLOCK_MEMTYPE(MT_NORMAL) | \
PTE_BLOCK_OUTER_SHARE | \
PTE_BLOCK_AF)
--
2.39.5
^ permalink raw reply [flat|nested] 24+ messages in thread
* [PATCH 06/22] ARM: mmu: provide zero page control in PBL
2025-08-06 12:36 [PATCH 00/22] ARM: mmu: refactor 32-bit and 64-bit code Ahmad Fatoum
` (4 preceding siblings ...)
2025-08-06 12:36 ` [PATCH 05/22] ARM: mmu32: introduce new mmu_addr_t type Ahmad Fatoum
@ 2025-08-06 12:36 ` Ahmad Fatoum
2025-08-06 12:36 ` [PATCH 07/22] ARM: mmu: print map type as string Ahmad Fatoum
` (16 subsequent siblings)
22 siblings, 0 replies; 24+ messages in thread
From: Ahmad Fatoum @ 2025-08-06 12:36 UTC (permalink / raw)
To: barebox; +Cc: Ahmad Fatoum
There's no reason to preclude zero page control in PBL if MMU is setup
and given that we want to add code to mmu-common.c that we want to use
in PBL, let's remove this artificial limitation.
Signed-off-by: Ahmad Fatoum <a.fatoum@pengutronix.de>
---
arch/arm/cpu/Makefile | 2 +-
include/zero_page.h | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/arch/arm/cpu/Makefile b/arch/arm/cpu/Makefile
index 955059279670..467ef17bfd55 100644
--- a/arch/arm/cpu/Makefile
+++ b/arch/arm/cpu/Makefile
@@ -4,7 +4,7 @@ obj-pbl-y += cpu.o
obj-$(CONFIG_ARM_EXCEPTIONS) += exceptions_$(S64_32).o interrupts_$(S64_32).o
pbl-$(CONFIG_ARM_EXCEPTIONS_PBL) += exceptions_$(S64_32).o interrupts_$(S64_32).o
-obj-$(CONFIG_MMU) += mmu-common.o
+obj-pbl-$(CONFIG_MMU) += mmu-common.o
obj-pbl-$(CONFIG_MMU) += mmu_$(S64_32).o
obj-$(CONFIG_MMU) += dma_$(S64_32).o
obj-pbl-y += lowlevel_$(S64_32).o
diff --git a/include/zero_page.h b/include/zero_page.h
index 067f39a7ee96..8dd66a1efb9b 100644
--- a/include/zero_page.h
+++ b/include/zero_page.h
@@ -4,7 +4,7 @@
#include <common.h>
-#if defined CONFIG_ARCH_HAS_ZERO_PAGE && defined CONFIG_MMU && IN_PROPER
+#if defined CONFIG_ARCH_HAS_ZERO_PAGE && defined CONFIG_MMU
/*
* zero_page_faulting - fault when accessing the zero page
--
2.39.5
^ permalink raw reply [flat|nested] 24+ messages in thread
* [PATCH 07/22] ARM: mmu: print map type as string
2025-08-06 12:36 [PATCH 00/22] ARM: mmu: refactor 32-bit and 64-bit code Ahmad Fatoum
` (5 preceding siblings ...)
2025-08-06 12:36 ` [PATCH 06/22] ARM: mmu: provide zero page control in PBL Ahmad Fatoum
@ 2025-08-06 12:36 ` Ahmad Fatoum
2025-08-06 12:37 ` [PATCH 08/22] ARM: mmu64: rename create_sections to __arch_remap_range Ahmad Fatoum
` (15 subsequent siblings)
22 siblings, 0 replies; 24+ messages in thread
From: Ahmad Fatoum @ 2025-08-06 12:36 UTC (permalink / raw)
To: barebox; +Cc: Ahmad Fatoum
To make early debugging of the MMU easier, let's add support for
printing the MMU mapping type as a string.
Signed-off-by: Ahmad Fatoum <a.fatoum@pengutronix.de>
---
arch/arm/cpu/mmu-common.c | 14 ++++++++++++++
arch/arm/cpu/mmu-common.h | 16 ++++++++++++++++
arch/arm/cpu/mmu_32.c | 2 +-
3 files changed, 31 insertions(+), 1 deletion(-)
diff --git a/arch/arm/cpu/mmu-common.c b/arch/arm/cpu/mmu-common.c
index 2e61dc36b7cc..1770c56b6eea 100644
--- a/arch/arm/cpu/mmu-common.c
+++ b/arch/arm/cpu/mmu-common.c
@@ -16,6 +16,20 @@
#include "mmu-common.h"
#include <efi/efi-mode.h>
+const char *map_type_tostr(maptype_t map_type)
+{
+ switch (map_type) {
+ case ARCH_MAP_CACHED_RWX: return "RWX";
+ case ARCH_MAP_CACHED_RO: return "RO";
+ case MAP_CACHED: return "CACHED";
+ case MAP_UNCACHED: return "UNCACHED";
+ case MAP_CODE: return "CODE";
+ case MAP_WRITECOMBINE: return "WRITECOMBINE";
+ case MAP_FAULT: return "FAULT";
+ default: return "<unknown>";
+ }
+}
+
void arch_sync_dma_for_cpu(void *vaddr, size_t size,
enum dma_data_direction dir)
{
diff --git a/arch/arm/cpu/mmu-common.h b/arch/arm/cpu/mmu-common.h
index e9005dfae766..01d081db426e 100644
--- a/arch/arm/cpu/mmu-common.h
+++ b/arch/arm/cpu/mmu-common.h
@@ -57,4 +57,20 @@ static inline size_t resource_count_pages(const struct resource *res)
return ALIGN(resource_size(res), SZ_4K);
}
+const char *map_type_tostr(maptype_t map_type);
+
+static inline void __pr_debug_remap(const char *func, ulong virt_addr, ulong phys_addr,
+ size_t size, maptype_t map_type)
+{
+ if (phys_addr == virt_addr)
+ pr_debug("%s: 0x%08lx+0x%zx type %s\n", func,
+ virt_addr, size, map_type_tostr(map_type));
+ else
+ pr_debug("%s: 0x%08lx+0x%zx -> 0x%08lx type %s\n", func,
+ virt_addr, size, phys_addr, map_type_tostr(map_type));
+}
+
+#define pr_debug_remap(virt_addr, phys_addr, size, map_type) \
+ __pr_debug_remap(__func__, virt_addr, phys_addr, size, map_type)
+
#endif
diff --git a/arch/arm/cpu/mmu_32.c b/arch/arm/cpu/mmu_32.c
index e631b741f107..4b7f370edaea 100644
--- a/arch/arm/cpu/mmu_32.c
+++ b/arch/arm/cpu/mmu_32.c
@@ -278,7 +278,7 @@ static void __arch_remap_range(void *_virt_addr, phys_addr_t phys_addr, size_t s
pte_flags = get_pte_flags(map_type);
pmd_flags = pte_flags_to_pmd(pte_flags);
- pr_debug("%s: 0x%08x 0x%08x type %d\n", __func__, virt_addr, size, map_type);
+ pr_debug_remap(virt_addr, phys_addr, size, map_type);
size = PAGE_ALIGN(size);
if (!size)
--
2.39.5
^ permalink raw reply [flat|nested] 24+ messages in thread
* [PATCH 08/22] ARM: mmu64: rename create_sections to __arch_remap_range
2025-08-06 12:36 [PATCH 00/22] ARM: mmu: refactor 32-bit and 64-bit code Ahmad Fatoum
` (6 preceding siblings ...)
2025-08-06 12:36 ` [PATCH 07/22] ARM: mmu: print map type as string Ahmad Fatoum
@ 2025-08-06 12:37 ` Ahmad Fatoum
2025-08-06 12:37 ` [PATCH 09/22] ARM: mmu: move get_pte_attrs call into __arch_remap_range Ahmad Fatoum
` (14 subsequent siblings)
22 siblings, 0 replies; 24+ messages in thread
From: Ahmad Fatoum @ 2025-08-06 12:37 UTC (permalink / raw)
To: barebox; +Cc: Ahmad Fatoum
The equivalent function for ARM32 is called __arch_remap_range and that
name would be more fitting for ARM64, as ARMv8 doesn't use the term
section and instead uses pages and blocks.
Signed-off-by: Ahmad Fatoum <a.fatoum@pengutronix.de>
---
arch/arm/cpu/mmu_64.c | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/arch/arm/cpu/mmu_64.c b/arch/arm/cpu/mmu_64.c
index e2cc1a5caabd..1c77aa9472df 100644
--- a/arch/arm/cpu/mmu_64.c
+++ b/arch/arm/cpu/mmu_64.c
@@ -123,8 +123,8 @@ static void split_block(uint64_t *pte, int level)
set_table(pte, new_table);
}
-static void create_sections(uint64_t virt, uint64_t phys, uint64_t size,
- uint64_t attr, bool force_pages)
+static void __arch_remap_range(uint64_t virt, uint64_t phys, uint64_t size,
+ uint64_t attr, bool force_pages)
{
uint64_t *ttb = get_ttb();
uint64_t block_size;
@@ -312,7 +312,7 @@ static void early_remap_range(uint64_t addr, size_t size, maptype_t map_type, bo
if (WARN_ON(attrs == ~0UL))
return;
- create_sections(addr, addr, size, attrs, force_pages);
+ __arch_remap_range(addr, addr, size, attrs, force_pages);
}
int arch_remap_range(void *virt_addr, phys_addr_t phys_addr, size_t size, maptype_t map_type)
@@ -329,7 +329,7 @@ int arch_remap_range(void *virt_addr, phys_addr_t phys_addr, size_t size, maptyp
if (!maptype_is_compatible(map_type, MAP_CACHED))
flush_cacheable_pages(virt_addr, size);
- create_sections((uint64_t)virt_addr, phys_addr, (uint64_t)size, attrs, false);
+ __arch_remap_range((uint64_t)virt_addr, phys_addr, (uint64_t)size, attrs, false);
return 0;
}
--
2.39.5
^ permalink raw reply [flat|nested] 24+ messages in thread
* [PATCH 09/22] ARM: mmu: move get_pte_attrs call into __arch_remap_range
2025-08-06 12:36 [PATCH 00/22] ARM: mmu: refactor 32-bit and 64-bit code Ahmad Fatoum
` (7 preceding siblings ...)
2025-08-06 12:37 ` [PATCH 08/22] ARM: mmu64: rename create_sections to __arch_remap_range Ahmad Fatoum
@ 2025-08-06 12:37 ` Ahmad Fatoum
2025-08-06 12:37 ` [PATCH 10/22] ARM: mmu64: print debug message in __arch_remap_range Ahmad Fatoum
` (13 subsequent siblings)
22 siblings, 0 replies; 24+ messages in thread
From: Ahmad Fatoum @ 2025-08-06 12:37 UTC (permalink / raw)
To: barebox; +Cc: Ahmad Fatoum
For similarity with the ARM32 code, give the function the same prototype
in both architectures. This will be useful in the follow-up commit when
we add a debug print that should reference the generic flags.
Signed-off-by: Ahmad Fatoum <a.fatoum@pengutronix.de>
---
arch/arm/cpu/mmu_64.c | 73 +++++++++++++++++++------------------------
1 file changed, 32 insertions(+), 41 deletions(-)
diff --git a/arch/arm/cpu/mmu_64.c b/arch/arm/cpu/mmu_64.c
index 1c77aa9472df..bc5d1a6e8160 100644
--- a/arch/arm/cpu/mmu_64.c
+++ b/arch/arm/cpu/mmu_64.c
@@ -91,6 +91,28 @@ static __maybe_unused uint64_t *find_pte(uint64_t addr)
return __find_pte(get_ttb(), addr, NULL);
}
+static unsigned long get_pte_attrs(maptype_t map_type)
+{
+ switch (map_type & MAP_TYPE_MASK) {
+ case MAP_CACHED:
+ return attrs_xn() | CACHED_MEM;
+ case MAP_UNCACHED:
+ return attrs_xn() | UNCACHED_MEM;
+ case MAP_FAULT:
+ return 0x0;
+ case MAP_WRITECOMBINE:
+ return attrs_xn() | MEM_ALLOC_WRITECOMBINE;
+ case MAP_CODE:
+ return CACHED_MEM | PTE_BLOCK_RO;
+ case ARCH_MAP_CACHED_RO:
+ return attrs_xn() | CACHED_MEM | PTE_BLOCK_RO;
+ case ARCH_MAP_CACHED_RWX:
+ return CACHED_MEM;
+ default:
+ return ~0UL;
+ }
+}
+
#define MAX_PTE_ENTRIES 512
/* Splits a block PTE into table with subpages spanning the old block */
@@ -123,9 +145,10 @@ static void split_block(uint64_t *pte, int level)
set_table(pte, new_table);
}
-static void __arch_remap_range(uint64_t virt, uint64_t phys, uint64_t size,
- uint64_t attr, bool force_pages)
+static int __arch_remap_range(uint64_t virt, uint64_t phys, uint64_t size,
+ maptype_t map_type, bool force_pages)
{
+ unsigned long attr = get_pte_attrs(map_type);
uint64_t *ttb = get_ttb();
uint64_t block_size;
uint64_t block_shift;
@@ -138,11 +161,14 @@ static void __arch_remap_range(uint64_t virt, uint64_t phys, uint64_t size,
addr = virt;
+ if (WARN_ON(attr == ~0UL))
+ return -EINVAL;
+
attr &= ~PTE_TYPE_MASK;
size = PAGE_ALIGN(size);
if (!size)
- return;
+ return 0;
while (size) {
table = ttb;
@@ -178,6 +204,7 @@ static void __arch_remap_range(uint64_t virt, uint64_t phys, uint64_t size,
}
tlb_invalidate();
+ return 0;
}
static size_t granule_size(int level)
@@ -283,55 +310,19 @@ static void flush_cacheable_pages(void *start, size_t size)
v8_flush_dcache_range(flush_start, flush_end);
}
-static unsigned long get_pte_attrs(maptype_t map_type)
-{
- switch (map_type & MAP_TYPE_MASK) {
- case MAP_CACHED:
- return attrs_xn() | CACHED_MEM;
- case MAP_UNCACHED:
- return attrs_xn() | UNCACHED_MEM;
- case MAP_FAULT:
- return 0x0;
- case MAP_WRITECOMBINE:
- return attrs_xn() | MEM_ALLOC_WRITECOMBINE;
- case MAP_CODE:
- return CACHED_MEM | PTE_BLOCK_RO;
- case ARCH_MAP_CACHED_RO:
- return attrs_xn() | CACHED_MEM | PTE_BLOCK_RO;
- case ARCH_MAP_CACHED_RWX:
- return CACHED_MEM;
- default:
- return ~0UL;
- }
-}
-
static void early_remap_range(uint64_t addr, size_t size, maptype_t map_type, bool force_pages)
{
- unsigned long attrs = get_pte_attrs(map_type);
-
- if (WARN_ON(attrs == ~0UL))
- return;
-
- __arch_remap_range(addr, addr, size, attrs, force_pages);
+ __arch_remap_range(addr, addr, size, map_type, force_pages);
}
int arch_remap_range(void *virt_addr, phys_addr_t phys_addr, size_t size, maptype_t map_type)
{
- unsigned long attrs;
-
map_type = arm_mmu_maybe_skip_permissions(map_type);
- attrs = get_pte_attrs(map_type);
-
- if (attrs == ~0UL)
- return -EINVAL;
-
if (!maptype_is_compatible(map_type, MAP_CACHED))
flush_cacheable_pages(virt_addr, size);
- __arch_remap_range((uint64_t)virt_addr, phys_addr, (uint64_t)size, attrs, false);
-
- return 0;
+ return __arch_remap_range((uint64_t)virt_addr, phys_addr, (uint64_t)size, map_type, false);
}
static void mmu_enable(void)
--
2.39.5
^ permalink raw reply [flat|nested] 24+ messages in thread
* [PATCH 10/22] ARM: mmu64: print debug message in __arch_remap_range
2025-08-06 12:36 [PATCH 00/22] ARM: mmu: refactor 32-bit and 64-bit code Ahmad Fatoum
` (8 preceding siblings ...)
2025-08-06 12:37 ` [PATCH 09/22] ARM: mmu: move get_pte_attrs call into __arch_remap_range Ahmad Fatoum
@ 2025-08-06 12:37 ` Ahmad Fatoum
2025-08-06 12:37 ` [PATCH 11/22] ARM: mmu: make force_pages a maptype_t flag Ahmad Fatoum
` (12 subsequent siblings)
22 siblings, 0 replies; 24+ messages in thread
From: Ahmad Fatoum @ 2025-08-06 12:37 UTC (permalink / raw)
To: barebox; +Cc: Ahmad Fatoum
Like done for ARM32, let's also print a helpful debug message for ARM64
too.
Signed-off-by: Ahmad Fatoum <a.fatoum@pengutronix.de>
---
arch/arm/cpu/mmu_64.c | 2 ++
1 file changed, 2 insertions(+)
diff --git a/arch/arm/cpu/mmu_64.c b/arch/arm/cpu/mmu_64.c
index bc5d1a6e8160..0bd5e4dc98c4 100644
--- a/arch/arm/cpu/mmu_64.c
+++ b/arch/arm/cpu/mmu_64.c
@@ -164,6 +164,8 @@ static int __arch_remap_range(uint64_t virt, uint64_t phys, uint64_t size,
if (WARN_ON(attr == ~0UL))
return -EINVAL;
+ pr_debug_remap(addr, phys, size, map_type);
+
attr &= ~PTE_TYPE_MASK;
size = PAGE_ALIGN(size);
--
2.39.5
^ permalink raw reply [flat|nested] 24+ messages in thread
* [PATCH 11/22] ARM: mmu: make force_pages a maptype_t flag
2025-08-06 12:36 [PATCH 00/22] ARM: mmu: refactor 32-bit and 64-bit code Ahmad Fatoum
` (9 preceding siblings ...)
2025-08-06 12:37 ` [PATCH 10/22] ARM: mmu64: print debug message in __arch_remap_range Ahmad Fatoum
@ 2025-08-06 12:37 ` Ahmad Fatoum
2025-08-06 12:37 ` [PATCH 12/22] ARM: mmu64: move granule_size to the top of the file Ahmad Fatoum
` (11 subsequent siblings)
22 siblings, 0 replies; 24+ messages in thread
From: Ahmad Fatoum @ 2025-08-06 12:37 UTC (permalink / raw)
To: barebox; +Cc: Ahmad Fatoum
The case with force_page == false is the default and having to write an
extra parameter everywhere is needless visual clutter. Especially if we
are going to add new parameters or OR further flags, it's more readable
to use a single parameter for the flags instead of multiple.
Signed-off-by: Ahmad Fatoum <a.fatoum@pengutronix.de>
---
arch/arm/cpu/mmu-common.h | 3 +++
arch/arm/cpu/mmu_32.c | 18 ++++++++++--------
arch/arm/cpu/mmu_64.c | 21 +++++++++++----------
3 files changed, 24 insertions(+), 18 deletions(-)
diff --git a/arch/arm/cpu/mmu-common.h b/arch/arm/cpu/mmu-common.h
index 01d081db426e..a111e15a21b4 100644
--- a/arch/arm/cpu/mmu-common.h
+++ b/arch/arm/cpu/mmu-common.h
@@ -9,10 +9,13 @@
#include <linux/ioport.h>
#include <linux/kernel.h>
#include <linux/sizes.h>
+#include <linux/bits.h>
#define ARCH_MAP_CACHED_RWX MAP_ARCH(2)
#define ARCH_MAP_CACHED_RO MAP_ARCH(3)
+#define ARCH_MAP_FLAG_PAGEWISE BIT(31)
+
struct device;
void dma_inv_range(void *ptr, size_t size);
diff --git a/arch/arm/cpu/mmu_32.c b/arch/arm/cpu/mmu_32.c
index 4b7f370edaea..e43d9d0d4606 100644
--- a/arch/arm/cpu/mmu_32.c
+++ b/arch/arm/cpu/mmu_32.c
@@ -266,8 +266,9 @@ static uint32_t get_pmd_flags(maptype_t map_type)
}
static void __arch_remap_range(void *_virt_addr, phys_addr_t phys_addr, size_t size,
- maptype_t map_type, bool force_pages)
+ maptype_t map_type)
{
+ bool force_pages = map_type & ARCH_MAP_FLAG_PAGEWISE;
u32 virt_addr = (u32)_virt_addr;
u32 pte_flags, pmd_flags;
uint32_t *ttb = get_ttb();
@@ -363,16 +364,16 @@ static void __arch_remap_range(void *_virt_addr, phys_addr_t phys_addr, size_t s
tlb_invalidate();
}
-static void early_remap_range(u32 addr, size_t size, maptype_t map_type, bool force_pages)
+static void early_remap_range(u32 addr, size_t size, maptype_t map_type)
{
- __arch_remap_range((void *)addr, addr, size, map_type, force_pages);
+ __arch_remap_range((void *)addr, addr, size, map_type);
}
int arch_remap_range(void *virt_addr, phys_addr_t phys_addr, size_t size, maptype_t map_type)
{
map_type = arm_mmu_maybe_skip_permissions(map_type);
- __arch_remap_range(virt_addr, phys_addr, size, map_type, false);
+ __arch_remap_range(virt_addr, phys_addr, size, map_type);
if (maptype_is_compatible(map_type, MAP_UNCACHED))
dma_inv_range(virt_addr, size);
@@ -643,7 +644,7 @@ void mmu_early_enable(unsigned long membase, unsigned long memsize, unsigned lon
* map the bulk of the memory as sections to avoid allocating too many page tables
* at this early stage
*/
- early_remap_range(membase, barebox_start - membase, ARCH_MAP_CACHED_RWX, false);
+ early_remap_range(membase, barebox_start - membase, ARCH_MAP_CACHED_RWX);
/*
* Map the remainder of the memory explicitly with two level page tables. This is
* the place where barebox proper ends at. In barebox proper we'll remap the code
@@ -653,10 +654,11 @@ void mmu_early_enable(unsigned long membase, unsigned long memsize, unsigned lon
* a break-before-make sequence which we can't do when barebox proper is running
* at the location being remapped.
*/
- early_remap_range(barebox_start, barebox_size, ARCH_MAP_CACHED_RWX, true);
- early_remap_range(optee_start, OPTEE_SIZE, MAP_UNCACHED, false);
+ early_remap_range(barebox_start, barebox_size,
+ ARCH_MAP_CACHED_RWX | ARCH_MAP_FLAG_PAGEWISE);
+ early_remap_range(optee_start, OPTEE_SIZE, MAP_UNCACHED);
early_remap_range(PAGE_ALIGN_DOWN((uintptr_t)_stext), PAGE_ALIGN(_etext - _stext),
- ARCH_MAP_CACHED_RWX, false);
+ ARCH_MAP_CACHED_RWX);
__mmu_cache_on();
}
diff --git a/arch/arm/cpu/mmu_64.c b/arch/arm/cpu/mmu_64.c
index 0bd5e4dc98c4..6e617a15a6d7 100644
--- a/arch/arm/cpu/mmu_64.c
+++ b/arch/arm/cpu/mmu_64.c
@@ -146,8 +146,9 @@ static void split_block(uint64_t *pte, int level)
}
static int __arch_remap_range(uint64_t virt, uint64_t phys, uint64_t size,
- maptype_t map_type, bool force_pages)
+ maptype_t map_type)
{
+ bool force_pages = map_type & ARCH_MAP_FLAG_PAGEWISE;
unsigned long attr = get_pte_attrs(map_type);
uint64_t *ttb = get_ttb();
uint64_t block_size;
@@ -312,9 +313,9 @@ static void flush_cacheable_pages(void *start, size_t size)
v8_flush_dcache_range(flush_start, flush_end);
}
-static void early_remap_range(uint64_t addr, size_t size, maptype_t map_type, bool force_pages)
+static void early_remap_range(uint64_t addr, size_t size, maptype_t map_type)
{
- __arch_remap_range(addr, addr, size, map_type, force_pages);
+ __arch_remap_range(addr, addr, size, map_type);
}
int arch_remap_range(void *virt_addr, phys_addr_t phys_addr, size_t size, maptype_t map_type)
@@ -324,7 +325,7 @@ int arch_remap_range(void *virt_addr, phys_addr_t phys_addr, size_t size, maptyp
if (!maptype_is_compatible(map_type, MAP_CACHED))
flush_cacheable_pages(virt_addr, size);
- return __arch_remap_range((uint64_t)virt_addr, phys_addr, (uint64_t)size, map_type, false);
+ return __arch_remap_range((uint64_t)virt_addr, phys_addr, (uint64_t)size, map_type);
}
static void mmu_enable(void)
@@ -419,7 +420,7 @@ static void early_init_range(size_t total_level0_tables)
uint64_t addr = 0;
while (total_level0_tables--) {
- early_remap_range(addr, L0_XLAT_SIZE, MAP_UNCACHED, false);
+ early_remap_range(addr, L0_XLAT_SIZE, MAP_UNCACHED);
split_block(ttb, 0);
addr += L0_XLAT_SIZE;
ttb++;
@@ -451,7 +452,7 @@ void mmu_early_enable(unsigned long membase, unsigned long memsize, unsigned lon
*/
early_init_range(2);
- early_remap_range(membase, memsize, ARCH_MAP_CACHED_RWX, false);
+ early_remap_range(membase, memsize, ARCH_MAP_CACHED_RWX);
if (optee_get_membase(&optee_membase)) {
optee_membase = membase + memsize - OPTEE_SIZE;
@@ -459,18 +460,18 @@ void mmu_early_enable(unsigned long membase, unsigned long memsize, unsigned lon
barebox_size = optee_membase - barebox_start;
early_remap_range(optee_membase - barebox_size, barebox_size,
- ARCH_MAP_CACHED_RWX, true);
+ ARCH_MAP_CACHED_RWX | ARCH_MAP_FLAG_PAGEWISE);
} else {
barebox_size = membase + memsize - barebox_start;
early_remap_range(membase + memsize - barebox_size, barebox_size,
- ARCH_MAP_CACHED_RWX, true);
+ ARCH_MAP_CACHED_RWX | ARCH_MAP_FLAG_PAGEWISE);
}
- early_remap_range(optee_membase, OPTEE_SIZE, MAP_FAULT, false);
+ early_remap_range(optee_membase, OPTEE_SIZE, MAP_FAULT);
early_remap_range(PAGE_ALIGN_DOWN((uintptr_t)_stext), PAGE_ALIGN(_etext - _stext),
- ARCH_MAP_CACHED_RWX, false);
+ ARCH_MAP_CACHED_RWX);
mmu_enable();
}
--
2.39.5
^ permalink raw reply [flat|nested] 24+ messages in thread
* [PATCH 12/22] ARM: mmu64: move granule_size to the top of the file
2025-08-06 12:36 [PATCH 00/22] ARM: mmu: refactor 32-bit and 64-bit code Ahmad Fatoum
` (10 preceding siblings ...)
2025-08-06 12:37 ` [PATCH 11/22] ARM: mmu: make force_pages a maptype_t flag Ahmad Fatoum
@ 2025-08-06 12:37 ` Ahmad Fatoum
2025-08-06 12:37 ` [PATCH 13/22] ARM: mmu64: fix benign off-by-one in flush_cacheable_pages Ahmad Fatoum
` (10 subsequent siblings)
22 siblings, 0 replies; 24+ messages in thread
From: Ahmad Fatoum @ 2025-08-06 12:37 UTC (permalink / raw)
To: barebox; +Cc: Ahmad Fatoum
The function with its comment is a useful clue for understanding the
code, so move it at the very top.
Signed-off-by: Ahmad Fatoum <a.fatoum@pengutronix.de>
---
arch/arm/cpu/mmu_64.c | 66 +++++++++++++++++++++----------------------
1 file changed, 33 insertions(+), 33 deletions(-)
diff --git a/arch/arm/cpu/mmu_64.c b/arch/arm/cpu/mmu_64.c
index 6e617a15a6d7..9f709fc2d865 100644
--- a/arch/arm/cpu/mmu_64.c
+++ b/arch/arm/cpu/mmu_64.c
@@ -24,6 +24,39 @@
#include "mmu_64.h"
+static size_t granule_size(int level)
+{
+ /*
+ * With 4k page granule, a virtual address is split into 4 lookup parts
+ * spanning 9 bits each:
+ *
+ * _______________________________________________
+ * | | | | | | |
+ * | 0 | Lv0 | Lv1 | Lv2 | Lv3 | off |
+ * |_______|_______|_______|_______|_______|_______|
+ * 63-48 47-39 38-30 29-21 20-12 11-00
+ *
+ * mask page size
+ *
+ * Lv0: FF8000000000 --
+ * Lv1: 7FC0000000 1G
+ * Lv2: 3FE00000 2M
+ * Lv3: 1FF000 4K
+ * off: FFF
+ */
+ switch (level) {
+ default:
+ case 0:
+ return L0_XLAT_SIZE;
+ case 1:
+ return L1_XLAT_SIZE;
+ case 2:
+ return L2_XLAT_SIZE;
+ case 3:
+ return L3_XLAT_SIZE;
+ }
+}
+
static uint64_t *get_ttb(void)
{
return (uint64_t *)get_ttbr(current_el());
@@ -210,39 +243,6 @@ static int __arch_remap_range(uint64_t virt, uint64_t phys, uint64_t size,
return 0;
}
-static size_t granule_size(int level)
-{
- /*
- * With 4k page granule, a virtual address is split into 4 lookup parts
- * spanning 9 bits each:
- *
- * _______________________________________________
- * | | | | | | |
- * | 0 | Lv0 | Lv1 | Lv2 | Lv3 | off |
- * |_______|_______|_______|_______|_______|_______|
- * 63-48 47-39 38-30 29-21 20-12 11-00
- *
- * mask page size
- *
- * Lv0: FF8000000000 --
- * Lv1: 7FC0000000 1G
- * Lv2: 3FE00000 2M
- * Lv3: 1FF000 4K
- * off: FFF
- */
- switch (level) {
- default:
- case 0:
- return L0_XLAT_SIZE;
- case 1:
- return L1_XLAT_SIZE;
- case 2:
- return L2_XLAT_SIZE;
- case 3:
- return L3_XLAT_SIZE;
- }
-}
-
static bool pte_is_cacheable(uint64_t pte)
{
return (pte & PTE_ATTRINDX_MASK) == PTE_ATTRINDX(MT_NORMAL);
--
2.39.5
^ permalink raw reply [flat|nested] 24+ messages in thread
* [PATCH 13/22] ARM: mmu64: fix benign off-by-one in flush_cacheable_pages
2025-08-06 12:36 [PATCH 00/22] ARM: mmu: refactor 32-bit and 64-bit code Ahmad Fatoum
` (11 preceding siblings ...)
2025-08-06 12:37 ` [PATCH 12/22] ARM: mmu64: move granule_size to the top of the file Ahmad Fatoum
@ 2025-08-06 12:37 ` Ahmad Fatoum
2025-08-06 12:37 ` [PATCH 14/22] ARM: mmu64: make flush_cacheable_pages less 64-bit dependent Ahmad Fatoum
` (9 subsequent siblings)
22 siblings, 0 replies; 24+ messages in thread
From: Ahmad Fatoum @ 2025-08-06 12:37 UTC (permalink / raw)
To: barebox; +Cc: Ahmad Fatoum
For v8_flush_dcache_range(), the second argument is exclusive, but we
are passing flush_end potentially, which is ~0ULL.
This is not a real problem, because the virtual address space can't be
64-bit anyway, but in preparation for compiling the code for 32-bit as
well, let's fix the off-by-one.
Signed-off-by: Ahmad Fatoum <a.fatoum@pengutronix.de>
---
arch/arm/cpu/mmu_64.c | 22 ++++++++++++++++++----
1 file changed, 18 insertions(+), 4 deletions(-)
diff --git a/arch/arm/cpu/mmu_64.c b/arch/arm/cpu/mmu_64.c
index 9f709fc2d865..94b3137bde45 100644
--- a/arch/arm/cpu/mmu_64.c
+++ b/arch/arm/cpu/mmu_64.c
@@ -248,10 +248,24 @@ static bool pte_is_cacheable(uint64_t pte)
return (pte & PTE_ATTRINDX_MASK) == PTE_ATTRINDX(MT_NORMAL);
}
+/**
+ * dma_flush_range_end - Flush caches for address range
+ * @start: Starting virtual address of the range.
+ * @end: Last virtual address in range (inclusive)
+ *
+ * This function cleans and invalidates all cache lines in the specified
+ * range. Note that end is inclusive, meaning that it's the last address
+ * that is flushed (assuming both start and total size are cache line aligned).
+ */
+static inline void dma_flush_range_end(unsigned long start, unsigned long end)
+{
+ v8_flush_dcache_range(start, end + 1);
+}
+
/**
* flush_cacheable_pages - Flush only the cacheable pages in a region
* @start: Starting virtual address of the range.
- * @end: Ending virtual address of the range.
+ * @size: Size of range
*
* This function walks the page table and flushes the data caches for the
* specified range only if the memory is marked as normal cacheable in the
@@ -266,7 +280,7 @@ static void flush_cacheable_pages(void *start, size_t size)
u64 *ttb;
region_start = PAGE_ALIGN_DOWN((ulong)start);
- region_end = PAGE_ALIGN(region_start + size);
+ region_end = PAGE_ALIGN(region_start + size) - 1;
ttb = get_ttb();
@@ -301,7 +315,7 @@ static void flush_cacheable_pages(void *start, size_t size)
* If we recorded any area before, let's flush it now
*/
if (flush_start != ~0ULL)
- v8_flush_dcache_range(flush_start, flush_end);
+ dma_flush_range_end(flush_start, flush_end);
/* and start the new contiguous flush area with this page */
flush_start = addr;
@@ -310,7 +324,7 @@ static void flush_cacheable_pages(void *start, size_t size)
/* The previous loop won't flush the last cached range, so do it here */
if (flush_start != ~0ULL)
- v8_flush_dcache_range(flush_start, flush_end);
+ dma_flush_range_end(flush_start, flush_end);
}
static void early_remap_range(uint64_t addr, size_t size, maptype_t map_type)
--
2.39.5
^ permalink raw reply [flat|nested] 24+ messages in thread
* [PATCH 14/22] ARM: mmu64: make flush_cacheable_pages less 64-bit dependent
2025-08-06 12:36 [PATCH 00/22] ARM: mmu: refactor 32-bit and 64-bit code Ahmad Fatoum
` (12 preceding siblings ...)
2025-08-06 12:37 ` [PATCH 13/22] ARM: mmu64: fix benign off-by-one in flush_cacheable_pages Ahmad Fatoum
@ 2025-08-06 12:37 ` Ahmad Fatoum
2025-08-06 12:37 ` [PATCH 15/22] ARM: mmu64: allow asserting last level page in __find_pte Ahmad Fatoum
` (8 subsequent siblings)
22 siblings, 0 replies; 24+ messages in thread
From: Ahmad Fatoum @ 2025-08-06 12:37 UTC (permalink / raw)
To: barebox; +Cc: Ahmad Fatoum
With just a few changes, flush_cacheable_pages() could be used for
32-bit ARM MMU support as well, so prepare for that by using generic
functions and types.
Signed-off-by: Ahmad Fatoum <a.fatoum@pengutronix.de>
---
arch/arm/cpu/mmu_64.c | 14 +++++++-------
1 file changed, 7 insertions(+), 7 deletions(-)
diff --git a/arch/arm/cpu/mmu_64.c b/arch/arm/cpu/mmu_64.c
index 94b3137bde45..cdfbf081d51f 100644
--- a/arch/arm/cpu/mmu_64.c
+++ b/arch/arm/cpu/mmu_64.c
@@ -274,10 +274,10 @@ static inline void dma_flush_range_end(unsigned long start, unsigned long end)
*/
static void flush_cacheable_pages(void *start, size_t size)
{
- u64 flush_start = ~0ULL, flush_end = ~0ULL;
- u64 region_start, region_end;
+ mmu_addr_t flush_start = ~0UL, flush_end = ~0UL;
+ mmu_addr_t region_start, region_end;
size_t block_size;
- u64 *ttb;
+ mmu_addr_t *ttb;
region_start = PAGE_ALIGN_DOWN((ulong)start);
region_end = PAGE_ALIGN(region_start + size) - 1;
@@ -292,9 +292,9 @@ static void flush_cacheable_pages(void *start, size_t size)
* windows being remapped being small, the overhead compared to
* actually flushing the ranges isn't too significant.
*/
- for (u64 addr = region_start; addr < region_end; addr += block_size) {
+ for (mmu_addr_t addr = region_start; addr < region_end; addr += block_size) {
int level;
- u64 *pte = __find_pte(ttb, addr, &level);
+ mmu_addr_t *pte = __find_pte(ttb, addr, &level);
block_size = granule_size(level);
@@ -314,7 +314,7 @@ static void flush_cacheable_pages(void *start, size_t size)
* We don't have a previous contiguous flush area to append to.
* If we recorded any area before, let's flush it now
*/
- if (flush_start != ~0ULL)
+ if (flush_start != ~0UL)
dma_flush_range_end(flush_start, flush_end);
/* and start the new contiguous flush area with this page */
@@ -323,7 +323,7 @@ static void flush_cacheable_pages(void *start, size_t size)
}
/* The previous loop won't flush the last cached range, so do it here */
- if (flush_start != ~0ULL)
+ if (flush_start != ~0UL)
dma_flush_range_end(flush_start, flush_end);
}
--
2.39.5
^ permalink raw reply [flat|nested] 24+ messages in thread
* [PATCH 15/22] ARM: mmu64: allow asserting last level page in __find_pte
2025-08-06 12:36 [PATCH 00/22] ARM: mmu: refactor 32-bit and 64-bit code Ahmad Fatoum
` (13 preceding siblings ...)
2025-08-06 12:37 ` [PATCH 14/22] ARM: mmu64: make flush_cacheable_pages less 64-bit dependent Ahmad Fatoum
@ 2025-08-06 12:37 ` Ahmad Fatoum
2025-08-06 12:37 ` [PATCH 16/22] ARM: mmu64: rename __find_pte to find_pte Ahmad Fatoum
` (7 subsequent siblings)
22 siblings, 0 replies; 24+ messages in thread
From: Ahmad Fatoum @ 2025-08-06 12:37 UTC (permalink / raw)
To: barebox; +Cc: Ahmad Fatoum
We do not ever call __find_pte with a NULL level argument.
ARM32 code has a find_pte function that only returns PTEs for 4K pages
and NULL on PMDs which is later checked to trigger a panic.
In preparation for providing the same API in both ARM32 and ARM64,
let's have the ARM64 panic too if level is not specified and document
the semantics of the function.
No functional change.
Signed-off-by: Ahmad Fatoum <a.fatoum@pengutronix.de>
---
arch/arm/cpu/mmu_64.c | 17 +++++++++++++++++
1 file changed, 17 insertions(+)
diff --git a/arch/arm/cpu/mmu_64.c b/arch/arm/cpu/mmu_64.c
index cdfbf081d51f..15b809808153 100644
--- a/arch/arm/cpu/mmu_64.c
+++ b/arch/arm/cpu/mmu_64.c
@@ -95,6 +95,21 @@ static uint64_t *alloc_pte(void)
}
#endif
+/**
+ * __find_pte - Find page table entry
+ * @ttb: Translation Table Base
+ * @addr: Virtual address to lookup
+ * @level: used to store the level at which the page table walk ended.
+ * if NULL, asserts that the smallest page was found
+ *
+ * This function walks the page table from the top down and finds the page
+ * table entry associated with the supplied virtual address.
+ * The level at which a page was found is saved into *level.
+ * if the level is NULL, a last level page must be found or the function
+ * panics.
+ *
+ * Returns a pointer to the page table entry
+ */
static uint64_t *__find_pte(uint64_t *ttb, uint64_t addr, int *level)
{
uint64_t *pte = ttb;
@@ -113,6 +128,8 @@ static uint64_t *__find_pte(uint64_t *ttb, uint64_t addr, int *level)
pte = (uint64_t *)(*pte & XLAT_ADDR_MASK);
}
+ if (!level && i != 3)
+ panic("Got level %d page table entry, where level 3 expected\n", i);
if (level)
*level = i;
return pte;
--
2.39.5
^ permalink raw reply [flat|nested] 24+ messages in thread
* [PATCH 16/22] ARM: mmu64: rename __find_pte to find_pte
2025-08-06 12:36 [PATCH 00/22] ARM: mmu: refactor 32-bit and 64-bit code Ahmad Fatoum
` (14 preceding siblings ...)
2025-08-06 12:37 ` [PATCH 15/22] ARM: mmu64: allow asserting last level page in __find_pte Ahmad Fatoum
@ 2025-08-06 12:37 ` Ahmad Fatoum
2025-08-06 12:37 ` [PATCH 17/22] ARM: mmu32: rework find_pte to have ARM64 find_pte semantics Ahmad Fatoum
` (6 subsequent siblings)
22 siblings, 0 replies; 24+ messages in thread
From: Ahmad Fatoum @ 2025-08-06 12:37 UTC (permalink / raw)
To: barebox; +Cc: Ahmad Fatoum
find_pte is useful for debugging, but was unreferenced at first until we
added flush_cacheable_pages as user of __find_pte.
__find_pte is easy to use, so just drop find_pte and rename __find_pte
to find_pte. This is in preparation for mmu32 to provide a function
with the same semantics.
While at it, change the level to be unsigned as there are no negative
levels.
Signed-off-by: Ahmad Fatoum <a.fatoum@pengutronix.de>
---
arch/arm/cpu/mmu_64.c | 14 ++++----------
1 file changed, 4 insertions(+), 10 deletions(-)
diff --git a/arch/arm/cpu/mmu_64.c b/arch/arm/cpu/mmu_64.c
index 15b809808153..a20cb39a9296 100644
--- a/arch/arm/cpu/mmu_64.c
+++ b/arch/arm/cpu/mmu_64.c
@@ -96,7 +96,7 @@ static uint64_t *alloc_pte(void)
#endif
/**
- * __find_pte - Find page table entry
+ * find_pte - Find page table entry
* @ttb: Translation Table Base
* @addr: Virtual address to lookup
* @level: used to store the level at which the page table walk ended.
@@ -110,7 +110,7 @@ static uint64_t *alloc_pte(void)
*
* Returns a pointer to the page table entry
*/
-static uint64_t *__find_pte(uint64_t *ttb, uint64_t addr, int *level)
+static uint64_t *find_pte(uint64_t *ttb, uint64_t addr, unsigned *level)
{
uint64_t *pte = ttb;
uint64_t block_shift;
@@ -135,12 +135,6 @@ static uint64_t *__find_pte(uint64_t *ttb, uint64_t addr, int *level)
return pte;
}
-/* This is currently unused, but useful for debugging */
-static __maybe_unused uint64_t *find_pte(uint64_t addr)
-{
- return __find_pte(get_ttb(), addr, NULL);
-}
-
static unsigned long get_pte_attrs(maptype_t map_type)
{
switch (map_type & MAP_TYPE_MASK) {
@@ -310,8 +304,8 @@ static void flush_cacheable_pages(void *start, size_t size)
* actually flushing the ranges isn't too significant.
*/
for (mmu_addr_t addr = region_start; addr < region_end; addr += block_size) {
- int level;
- mmu_addr_t *pte = __find_pte(ttb, addr, &level);
+ unsigned level;
+ mmu_addr_t *pte = find_pte(ttb, addr, &level);
block_size = granule_size(level);
--
2.39.5
^ permalink raw reply [flat|nested] 24+ messages in thread
* [PATCH 17/22] ARM: mmu32: rework find_pte to have ARM64 find_pte semantics
2025-08-06 12:36 [PATCH 00/22] ARM: mmu: refactor 32-bit and 64-bit code Ahmad Fatoum
` (15 preceding siblings ...)
2025-08-06 12:37 ` [PATCH 16/22] ARM: mmu64: rename __find_pte to find_pte Ahmad Fatoum
@ 2025-08-06 12:37 ` Ahmad Fatoum
2025-08-06 12:37 ` [PATCH 18/22] ARM: mmu64: factor out flush_cacheable_pages for reusability Ahmad Fatoum
` (5 subsequent siblings)
22 siblings, 0 replies; 24+ messages in thread
From: Ahmad Fatoum @ 2025-08-06 12:37 UTC (permalink / raw)
To: barebox; +Cc: Ahmad Fatoum
ARM32 find_pte can only look up a page and returns NULL when
encountering a section. The ARM64 find_pte, on the other hand, returns
the PTE at any level and stores the found level into a pointer supplied
by the caller.
Let's have the ARM32 version behave as the ARM64 version does to allow
reuse of code between them and to simplify maintenance.
Signed-off-by: Ahmad Fatoum <a.fatoum@pengutronix.de>
---
arch/arm/cpu/mmu_32.c | 48 ++++++++++++++++++++++++++++++-------------
1 file changed, 34 insertions(+), 14 deletions(-)
diff --git a/arch/arm/cpu/mmu_32.c b/arch/arm/cpu/mmu_32.c
index e43d9d0d4606..521e5f3a5769 100644
--- a/arch/arm/cpu/mmu_32.c
+++ b/arch/arm/cpu/mmu_32.c
@@ -94,16 +94,38 @@ static uint32_t *alloc_pte(void)
}
#endif
-static u32 *find_pte(unsigned long adr)
+/**
+ * find_pte - Find page table entry
+ * @ttb: Translation Table Base
+ * @addr: Virtual address to lookup
+ * @level: used to store the level at which the page table walk ended.
+ * if NULL, asserts that the smallest page was found
+ *
+ * This function walks the page table from the top down and finds the page
+ * table entry associated with the supplied virtual address.
+ * The level at which a page was found is saved into *level.
+ * if the level is NULL, a last level page must be found or the function
+ * panics.
+ *
+ * Returns a pointer to the page table entry
+ */
+static u32 *find_pte(uint32_t *ttb, uint32_t adr, unsigned *level)
{
+ u32 *pgd = &ttb[pgd_index(adr)];
u32 *table;
- uint32_t *ttb = get_ttb();
- if (!pgd_type_table(ttb[pgd_index(adr)]))
- return NULL;
+ if (!pgd_type_table(*pgd)) {
+ if (!level)
+ panic("Got level 1 page table entry, where level 2 expected\n");
+ *level = 1;
+ return pgd;
+ }
+
+ if (level)
+ *level = 2;
/* find the coarse page table base address */
- table = (u32 *)(ttb[pgd_index(adr)] & ~0x3ff);
+ table = (u32 *)(*pgd & ~0x3ff);
/* find second level descriptor */
return &table[(adr >> PAGE_SHIFT) & 0xff];
@@ -308,7 +330,7 @@ static void __arch_remap_range(void *_virt_addr, phys_addr_t phys_addr, size_t s
} else {
unsigned int num_ptes;
u32 *table = NULL;
- unsigned int i;
+ unsigned int i, level;
u32 *pte;
/*
* We only want to cover pages up until next
@@ -328,17 +350,15 @@ static void __arch_remap_range(void *_virt_addr, phys_addr_t phys_addr, size_t s
chunk = min(chunk, size);
num_ptes = chunk / PAGE_SIZE;
- pte = find_pte(virt_addr);
- if (!pte) {
+ pte = find_pte(ttb, virt_addr, &level);
+ if (level == 1) {
/*
- * If PTE is not found it means that
- * we needs to split this section and
- * create a new page table for it
+ * No PTE at level 2, so we needs to split this section
+ * and create a new page table for it
*/
table = arm_create_pte(virt_addr, phys_addr,
pmd_flags_to_pte(*pgd));
- pte = find_pte(virt_addr);
- BUG_ON(!pte);
+ pte = find_pte(ttb, virt_addr, NULL);
}
for (i = 0; i < num_ptes; i++) {
@@ -452,7 +472,7 @@ static void create_vector_table(unsigned long adr)
pr_debug("Creating vector table, virt = 0x%p, phys = 0x%08lx\n",
vectors, adr);
arm_create_pte(adr, adr, get_pte_flags(MAP_UNCACHED));
- pte = find_pte(adr);
+ pte = find_pte(get_ttb(), adr, NULL);
// TODO break-before-make missing
set_pte(pte, (u32)vectors | PTE_TYPE_SMALL |
get_pte_flags(MAP_CACHED));
--
2.39.5
^ permalink raw reply [flat|nested] 24+ messages in thread
* [PATCH 18/22] ARM: mmu64: factor out flush_cacheable_pages for reusability
2025-08-06 12:36 [PATCH 00/22] ARM: mmu: refactor 32-bit and 64-bit code Ahmad Fatoum
` (16 preceding siblings ...)
2025-08-06 12:37 ` [PATCH 17/22] ARM: mmu32: rework find_pte to have ARM64 find_pte semantics Ahmad Fatoum
@ 2025-08-06 12:37 ` Ahmad Fatoum
2025-08-06 12:37 ` [PATCH 19/22] ARM: mmu32: flush only cacheable pages on remap Ahmad Fatoum
` (4 subsequent siblings)
22 siblings, 0 replies; 24+ messages in thread
From: Ahmad Fatoum @ 2025-08-06 12:37 UTC (permalink / raw)
To: barebox; +Cc: Ahmad Fatoum
In preparation for using the same code for ARM32, let's move it into a
header. We intentionally don't move the code into mmu-common.c as we
want to give the compiler maximum leeway with inlining the page table
walk.
Signed-off-by: Ahmad Fatoum <a.fatoum@pengutronix.de>
---
arch/arm/cpu/flush_cacheable_pages.h | 77 ++++++++++++++++++++++++++++
arch/arm/cpu/mmu_64.c | 65 +----------------------
2 files changed, 78 insertions(+), 64 deletions(-)
create mode 100644 arch/arm/cpu/flush_cacheable_pages.h
diff --git a/arch/arm/cpu/flush_cacheable_pages.h b/arch/arm/cpu/flush_cacheable_pages.h
new file mode 100644
index 000000000000..85fde0122802
--- /dev/null
+++ b/arch/arm/cpu/flush_cacheable_pages.h
@@ -0,0 +1,77 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/* SPDX-FileCopyrightText: 2024 Ahmad Fatoum, Pengutronix */
+
+/**
+ * flush_cacheable_pages - Flush only the cacheable pages in a region
+ * @start: Starting virtual address of the range.
+ * @size: Size of range
+ *
+ * This function walks the page table and flushes the data caches for the
+ * specified range only if the memory is marked as normal cacheable in the
+ * page tables. If a non-cacheable or non-normal page is encountered,
+ * it's skipped.
+ */
+/**
+ * flush_cacheable_pages - Flush only the cacheable pages in a region
+ * @start: Starting virtual address of the range.
+ * @size: Size of range
+ *
+ * This function walks the page table and flushes the data caches for the
+ * specified range only if the memory is marked as normal cacheable in the
+ * page tables. If a non-cacheable or non-normal page is encountered,
+ * it's skipped.
+ */
+static void flush_cacheable_pages(void *start, size_t size)
+{
+ mmu_addr_t flush_start = ~0UL, flush_end = ~0UL;
+ mmu_addr_t region_start, region_end;
+ size_t block_size;
+ mmu_addr_t *ttb;
+
+ region_start = PAGE_ALIGN_DOWN((ulong)start);
+ region_end = PAGE_ALIGN(region_start + size) - 1;
+
+ ttb = get_ttb();
+
+ /*
+ * TODO: This loop could be made more optimal by inlining the page walk,
+ * so we need not restart address translation from the top every time.
+ *
+ * The hope is that with the page tables being cached and the
+ * windows being remapped being small, the overhead compared to
+ * actually flushing the ranges isn't too significant.
+ */
+ for (mmu_addr_t addr = region_start; addr < region_end; addr += block_size) {
+ unsigned level;
+ mmu_addr_t *pte = find_pte(ttb, addr, &level);
+
+ block_size = granule_size(level);
+
+ if (!pte || !pte_is_cacheable(*pte))
+ continue;
+
+ if (flush_end == addr) {
+ /*
+ * While it's safe to flush the whole block_size,
+ * it's unnecessary time waste to go beyond region_end.
+ */
+ flush_end = min(flush_end + block_size, region_end);
+ continue;
+ }
+
+ /*
+ * We don't have a previous contiguous flush area to append to.
+ * If we recorded any area before, let's flush it now
+ */
+ if (flush_start != ~0UL)
+ dma_flush_range_end(flush_start, flush_end);
+
+ /* and start the new contiguous flush area with this page */
+ flush_start = addr;
+ flush_end = min(flush_start + block_size, region_end);
+ }
+
+ /* The previous loop won't flush the last cached range, so do it here */
+ if (flush_start != ~0UL)
+ dma_flush_range_end(flush_start, flush_end);
+}
diff --git a/arch/arm/cpu/mmu_64.c b/arch/arm/cpu/mmu_64.c
index a20cb39a9296..50bb25b5373a 100644
--- a/arch/arm/cpu/mmu_64.c
+++ b/arch/arm/cpu/mmu_64.c
@@ -273,70 +273,7 @@ static inline void dma_flush_range_end(unsigned long start, unsigned long end)
v8_flush_dcache_range(start, end + 1);
}
-/**
- * flush_cacheable_pages - Flush only the cacheable pages in a region
- * @start: Starting virtual address of the range.
- * @size: Size of range
- *
- * This function walks the page table and flushes the data caches for the
- * specified range only if the memory is marked as normal cacheable in the
- * page tables. If a non-cacheable or non-normal page is encountered,
- * it's skipped.
- */
-static void flush_cacheable_pages(void *start, size_t size)
-{
- mmu_addr_t flush_start = ~0UL, flush_end = ~0UL;
- mmu_addr_t region_start, region_end;
- size_t block_size;
- mmu_addr_t *ttb;
-
- region_start = PAGE_ALIGN_DOWN((ulong)start);
- region_end = PAGE_ALIGN(region_start + size) - 1;
-
- ttb = get_ttb();
-
- /*
- * TODO: This loop could be made more optimal by inlining the page walk,
- * so we need not restart address translation from the top every time.
- *
- * The hope is that with the page tables being cached and the
- * windows being remapped being small, the overhead compared to
- * actually flushing the ranges isn't too significant.
- */
- for (mmu_addr_t addr = region_start; addr < region_end; addr += block_size) {
- unsigned level;
- mmu_addr_t *pte = find_pte(ttb, addr, &level);
-
- block_size = granule_size(level);
-
- if (!pte || !pte_is_cacheable(*pte))
- continue;
-
- if (flush_end == addr) {
- /*
- * While it's safe to flush the whole block_size,
- * it's unnecessary time waste to go beyond region_end.
- */
- flush_end = min(flush_end + block_size, region_end);
- continue;
- }
-
- /*
- * We don't have a previous contiguous flush area to append to.
- * If we recorded any area before, let's flush it now
- */
- if (flush_start != ~0UL)
- dma_flush_range_end(flush_start, flush_end);
-
- /* and start the new contiguous flush area with this page */
- flush_start = addr;
- flush_end = min(flush_start + block_size, region_end);
- }
-
- /* The previous loop won't flush the last cached range, so do it here */
- if (flush_start != ~0UL)
- dma_flush_range_end(flush_start, flush_end);
-}
+#include "flush_cacheable_pages.h"
static void early_remap_range(uint64_t addr, size_t size, maptype_t map_type)
{
--
2.39.5
^ permalink raw reply [flat|nested] 24+ messages in thread
* [PATCH 19/22] ARM: mmu32: flush only cacheable pages on remap
2025-08-06 12:36 [PATCH 00/22] ARM: mmu: refactor 32-bit and 64-bit code Ahmad Fatoum
` (17 preceding siblings ...)
2025-08-06 12:37 ` [PATCH 18/22] ARM: mmu64: factor out flush_cacheable_pages for reusability Ahmad Fatoum
@ 2025-08-06 12:37 ` Ahmad Fatoum
2025-08-06 12:37 ` [PATCH 20/22] ARM: mmu32: factor out set_pte_range helper Ahmad Fatoum
` (3 subsequent siblings)
22 siblings, 0 replies; 24+ messages in thread
From: Ahmad Fatoum @ 2025-08-06 12:37 UTC (permalink / raw)
To: barebox; +Cc: Ahmad Fatoum
For the same reasons described in b8454cae3b1e ("ARM64: mmu: flush
cacheable regions prior to remapping"), we want to cease doing cache
maintenance operation on MMIO regions and only flush regions that were
actually cacheable before.
We already do that for ARM64 and the code has been prepared for reuse by
the move into a dedicated header, so let's define the functions it
expects and put it to use.
Signed-off-by: Ahmad Fatoum <a.fatoum@pengutronix.de>
---
arch/arm/cpu/flush_cacheable_pages.h | 2 +-
arch/arm/cpu/mmu_32.c | 58 ++++++++++++++++++++++++++--
arch/arm/cpu/mmu_64.c | 2 +-
3 files changed, 57 insertions(+), 5 deletions(-)
diff --git a/arch/arm/cpu/flush_cacheable_pages.h b/arch/arm/cpu/flush_cacheable_pages.h
index 85fde0122802..a03e10810dc7 100644
--- a/arch/arm/cpu/flush_cacheable_pages.h
+++ b/arch/arm/cpu/flush_cacheable_pages.h
@@ -47,7 +47,7 @@ static void flush_cacheable_pages(void *start, size_t size)
block_size = granule_size(level);
- if (!pte || !pte_is_cacheable(*pte))
+ if (!pte || !pte_is_cacheable(*pte, level))
continue;
if (flush_end == addr) {
diff --git a/arch/arm/cpu/mmu_32.c b/arch/arm/cpu/mmu_32.c
index 521e5f3a5769..a76d403e3477 100644
--- a/arch/arm/cpu/mmu_32.c
+++ b/arch/arm/cpu/mmu_32.c
@@ -24,6 +24,36 @@
#define PTRS_PER_PTE (PGDIR_SIZE / PAGE_SIZE)
+static size_t granule_size(int level)
+{
+ /*
+ * With 4k page granule, a virtual address is split into 2 lookup parts.
+ * We don't do LPAE or large (64K) pages for ARM32.
+ *
+ * _______________________
+ * | | | |
+ * | Lv1 | Lv2 | off |
+ * |_______|_______|_______|
+ * 31-21 20-12 11-00
+ *
+ * mask page size term
+ *
+ * Lv0: E0000000 --
+ * Lv1: 1FE00000 1M PGD/PMD
+ * Lv2: 1FF000 4K PTE
+ * off: FFF
+ */
+
+ switch (level) {
+ case 1:
+ return PGDIR_SIZE;
+ case 2:
+ return PAGE_SIZE;
+ }
+
+ return 0;
+}
+
static inline uint32_t *get_ttb(void)
{
/* Clear unpredictable bits [13:0] */
@@ -142,6 +172,20 @@ void dma_flush_range(void *ptr, size_t size)
outer_cache.flush_range(start, end);
}
+/**
+ * dma_flush_range_end - Flush caches for address range
+ * @start: Starting virtual address of the range.
+ * @end: Last virtual address in range (inclusive)
+ *
+ * This function cleans and invalidates all cache lines in the specified
+ * range. Note that end is inclusive, meaning that it's the last address
+ * that is flushed (assuming both start and total size are cache line aligned).
+ */
+static void dma_flush_range_end(unsigned long start, unsigned long end)
+{
+ dma_flush_range((void *)start, end - start + 1);
+}
+
void dma_inv_range(void *ptr, size_t size)
{
unsigned long start = (unsigned long)ptr;
@@ -389,15 +433,23 @@ static void early_remap_range(u32 addr, size_t size, maptype_t map_type)
__arch_remap_range((void *)addr, addr, size, map_type);
}
+static bool pte_is_cacheable(uint32_t pte, int level)
+{
+ return (level == 2 && (pte & PTE_CACHEABLE)) ||
+ (level == 1 && (pte & PMD_SECT_CACHEABLE));
+}
+
+#include "flush_cacheable_pages.h"
+
int arch_remap_range(void *virt_addr, phys_addr_t phys_addr, size_t size, maptype_t map_type)
{
+ if (!maptype_is_compatible(map_type, MAP_CACHED))
+ flush_cacheable_pages(virt_addr, size);
+
map_type = arm_mmu_maybe_skip_permissions(map_type);
__arch_remap_range(virt_addr, phys_addr, size, map_type);
- if (maptype_is_compatible(map_type, MAP_UNCACHED))
- dma_inv_range(virt_addr, size);
-
return 0;
}
diff --git a/arch/arm/cpu/mmu_64.c b/arch/arm/cpu/mmu_64.c
index 50bb25b5373a..d8ba7a171c2d 100644
--- a/arch/arm/cpu/mmu_64.c
+++ b/arch/arm/cpu/mmu_64.c
@@ -254,7 +254,7 @@ static int __arch_remap_range(uint64_t virt, uint64_t phys, uint64_t size,
return 0;
}
-static bool pte_is_cacheable(uint64_t pte)
+static bool pte_is_cacheable(uint64_t pte, int level)
{
return (pte & PTE_ATTRINDX_MASK) == PTE_ATTRINDX(MT_NORMAL);
}
--
2.39.5
^ permalink raw reply [flat|nested] 24+ messages in thread
* [PATCH 20/22] ARM: mmu32: factor out set_pte_range helper
2025-08-06 12:36 [PATCH 00/22] ARM: mmu: refactor 32-bit and 64-bit code Ahmad Fatoum
` (18 preceding siblings ...)
2025-08-06 12:37 ` [PATCH 19/22] ARM: mmu32: flush only cacheable pages on remap Ahmad Fatoum
@ 2025-08-06 12:37 ` Ahmad Fatoum
2025-08-06 12:37 ` [PATCH 21/22] ARM: mmu64: " Ahmad Fatoum
` (2 subsequent siblings)
22 siblings, 0 replies; 24+ messages in thread
From: Ahmad Fatoum @ 2025-08-06 12:37 UTC (permalink / raw)
To: barebox; +Cc: Ahmad Fatoum
By adding a helper that sets multiple PTEs at once, we can generalize
the break-before-make handling into one single place.
No functional change.
Signed-off-by: Ahmad Fatoum <a.fatoum@pengutronix.de>
---
arch/arm/cpu/mmu_32.c | 83 ++++++++++++++++++++++---------------------
1 file changed, 42 insertions(+), 41 deletions(-)
diff --git a/arch/arm/cpu/mmu_32.c b/arch/arm/cpu/mmu_32.c
index a76d403e3477..7cf04ea9412a 100644
--- a/arch/arm/cpu/mmu_32.c
+++ b/arch/arm/cpu/mmu_32.c
@@ -106,6 +106,23 @@ static void set_pte(uint32_t *pt, uint32_t val)
WRITE_ONCE(*pt, val);
}
+static void set_pte_range(unsigned level, uint32_t *virt, phys_addr_t phys,
+ size_t count, uint32_t attrs, bool bbm)
+{
+ unsigned granularity = granule_size(level);
+
+ if (!bbm)
+ goto write_attrs;
+
+ // TODO break-before-make missing
+
+write_attrs:
+ for (int i = 0; i < count; i++, phys += granularity)
+ set_pte(&virt[i], phys | attrs);
+
+ dma_flush_range(virt, count * sizeof(*virt));
+}
+
#ifdef __PBL__
static uint32_t *alloc_pte(void)
{
@@ -203,11 +220,11 @@ void dma_inv_range(void *ptr, size_t size)
* Not yet exported, but may be later if someone finds use for it.
*/
static u32 *arm_create_pte(unsigned long virt, unsigned long phys,
- uint32_t flags)
+ uint32_t flags, bool bbm)
{
uint32_t *ttb = get_ttb();
u32 *table;
- int i, ttb_idx;
+ int ttb_idx;
virt = ALIGN_DOWN(virt, PGDIR_SIZE);
phys = ALIGN_DOWN(phys, PGDIR_SIZE);
@@ -216,16 +233,9 @@ static u32 *arm_create_pte(unsigned long virt, unsigned long phys,
ttb_idx = pgd_index(virt);
- for (i = 0; i < PTRS_PER_PTE; i++) {
- set_pte(&table[i], phys | PTE_TYPE_SMALL | flags);
- virt += PAGE_SIZE;
- phys += PAGE_SIZE;
- }
- dma_flush_range(table, PTRS_PER_PTE * sizeof(u32));
+ set_pte_range(2, table, phys, PTRS_PER_PTE, PTE_TYPE_SMALL | flags, bbm);
- // TODO break-before-make missing
- set_pte(&ttb[ttb_idx], (unsigned long)table | PMD_TYPE_TABLE);
- dma_flush_range(&ttb[ttb_idx], sizeof(u32));
+ set_pte_range(1, &ttb[ttb_idx], (unsigned long)table, 1, PMD_TYPE_TABLE, bbm);
return table;
}
@@ -335,6 +345,7 @@ static void __arch_remap_range(void *_virt_addr, phys_addr_t phys_addr, size_t s
maptype_t map_type)
{
bool force_pages = map_type & ARCH_MAP_FLAG_PAGEWISE;
+ bool mmu_on;
u32 virt_addr = (u32)_virt_addr;
u32 pte_flags, pmd_flags;
uint32_t *ttb = get_ttb();
@@ -351,30 +362,30 @@ static void __arch_remap_range(void *_virt_addr, phys_addr_t phys_addr, size_t s
if (!size)
return;
+ mmu_on = get_cr() & CR_M;
+
while (size) {
const bool pgdir_size_aligned = IS_ALIGNED(virt_addr, PGDIR_SIZE);
u32 *pgd = (u32 *)&ttb[pgd_index(virt_addr)];
+ u32 flags;
size_t chunk;
if (size >= PGDIR_SIZE && pgdir_size_aligned &&
IS_ALIGNED(phys_addr, PGDIR_SIZE) &&
!pgd_type_table(*pgd) && !force_pages) {
- u32 val;
/*
* TODO: Add code to discard a page table and
* replace it with a section
*/
chunk = PGDIR_SIZE;
- val = phys_addr | pmd_flags;
+ flags = pmd_flags;
if (!maptype_is_compatible(map_type, MAP_FAULT))
- val |= PMD_TYPE_SECT;
- // TODO break-before-make missing
- set_pte(pgd, val);
- dma_flush_range(pgd, sizeof(*pgd));
+ flags |= PMD_TYPE_SECT;
+ set_pte_range(1, pgd, phys_addr, 1, flags, mmu_on);
} else {
unsigned int num_ptes;
u32 *table = NULL;
- unsigned int i, level;
+ unsigned int level;
u32 *pte;
/*
* We only want to cover pages up until next
@@ -401,23 +412,14 @@ static void __arch_remap_range(void *_virt_addr, phys_addr_t phys_addr, size_t s
* and create a new page table for it
*/
table = arm_create_pte(virt_addr, phys_addr,
- pmd_flags_to_pte(*pgd));
+ pmd_flags_to_pte(*pgd), mmu_on);
pte = find_pte(ttb, virt_addr, NULL);
}
- for (i = 0; i < num_ptes; i++) {
- u32 val;
-
- val = phys_addr + i * PAGE_SIZE;
- val |= pte_flags;
- if (!maptype_is_compatible(map_type, MAP_FAULT))
- val |= PTE_TYPE_SMALL;
-
- // TODO break-before-make missing
- set_pte(&pte[i], val);
- }
-
- dma_flush_range(pte, num_ptes * sizeof(u32));
+ flags = pte_flags;
+ if (!maptype_is_compatible(map_type, MAP_FAULT))
+ flags |= PTE_TYPE_SMALL;
+ set_pte_range(2, pte, phys_addr, num_ptes, flags, mmu_on);
}
virt_addr += chunk;
@@ -461,6 +463,7 @@ static void early_create_sections(unsigned long first, unsigned long last,
unsigned long ttb_end = pgd_index(last) + 1;
unsigned int i, addr = first;
+ /* This always runs with MMU disabled, so just opencode the loop */
for (i = ttb_start; i < ttb_end; i++) {
set_pte(&ttb[i], addr | flags);
addr += PGDIR_SIZE;
@@ -475,13 +478,11 @@ static inline void early_create_flat_mapping(void)
void *map_io_sections(unsigned long phys, void *_start, size_t size)
{
- unsigned long start = (unsigned long)_start, sec;
+ unsigned long start = (unsigned long)_start;
uint32_t *ttb = get_ttb();
- for (sec = start; sec < start + size; sec += PGDIR_SIZE, phys += PGDIR_SIZE) {
- // TODO break-before-make missing
- set_pte(&ttb[pgd_index(sec)], phys | get_pmd_flags(MAP_UNCACHED));
- }
+ set_pte_range(1, &ttb[pgd_index(start)], phys, size / PGDIR_SIZE,
+ get_pmd_flags(MAP_UNCACHED), true);
dma_flush_range(ttb, 0x4000);
tlb_invalidate();
@@ -523,11 +524,11 @@ static void create_vector_table(unsigned long adr)
vectors = xmemalign(PAGE_SIZE, PAGE_SIZE);
pr_debug("Creating vector table, virt = 0x%p, phys = 0x%08lx\n",
vectors, adr);
- arm_create_pte(adr, adr, get_pte_flags(MAP_UNCACHED));
+
+ arm_create_pte(adr, adr, get_pte_flags(MAP_UNCACHED), true);
pte = find_pte(get_ttb(), adr, NULL);
- // TODO break-before-make missing
- set_pte(pte, (u32)vectors | PTE_TYPE_SMALL |
- get_pte_flags(MAP_CACHED));
+ set_pte_range(2, pte, (u32)vectors, 1, PTE_TYPE_SMALL |
+ get_pte_flags(MAP_CACHED), true);
}
arm_fixup_vectors();
--
2.39.5
^ permalink raw reply [flat|nested] 24+ messages in thread
* [PATCH 21/22] ARM: mmu64: factor out set_pte_range helper
2025-08-06 12:36 [PATCH 00/22] ARM: mmu: refactor 32-bit and 64-bit code Ahmad Fatoum
` (19 preceding siblings ...)
2025-08-06 12:37 ` [PATCH 20/22] ARM: mmu32: factor out set_pte_range helper Ahmad Fatoum
@ 2025-08-06 12:37 ` Ahmad Fatoum
2025-08-06 12:37 ` [PATCH 22/22] ARM: mmu: define dma_alloc_writecombine in common code Ahmad Fatoum
2025-08-07 7:24 ` [PATCH 00/22] ARM: mmu: refactor 32-bit and 64-bit code Sascha Hauer
22 siblings, 0 replies; 24+ messages in thread
From: Ahmad Fatoum @ 2025-08-06 12:37 UTC (permalink / raw)
To: barebox; +Cc: Ahmad Fatoum
By adding a helper that sets multiple PTEs at once, we can generalize
the break-before-make handling into one single place.
No functional change.
Signed-off-by: Ahmad Fatoum <a.fatoum@pengutronix.de>
---
arch/arm/cpu/mmu_64.c | 52 ++++++++++++++++++++++---------------------
1 file changed, 27 insertions(+), 25 deletions(-)
diff --git a/arch/arm/cpu/mmu_64.c b/arch/arm/cpu/mmu_64.c
index d8ba7a171c2d..edb1b34aca7f 100644
--- a/arch/arm/cpu/mmu_64.c
+++ b/arch/arm/cpu/mmu_64.c
@@ -67,9 +67,20 @@ static void set_pte(uint64_t *pt, uint64_t val)
WRITE_ONCE(*pt, val);
}
-static void set_table(uint64_t *pt, uint64_t *table_addr)
+static void set_pte_range(unsigned level, uint64_t *virt, phys_addr_t phys,
+ size_t count, uint64_t attrs, bool bbm)
{
- set_pte(pt, PTE_TYPE_TABLE | (uint64_t)table_addr);
+ unsigned granularity = granule_size(level);
+ if (!bbm)
+ goto write_attrs;
+
+ // TODO break-before-make missing
+
+write_attrs:
+ for (int i = 0; i < count; i++, phys += granularity)
+ set_pte(&virt[i], phys | attrs);
+
+ dma_flush_range(virt, count * sizeof(*virt));
}
#ifdef __PBL__
@@ -160,37 +171,29 @@ static unsigned long get_pte_attrs(maptype_t map_type)
#define MAX_PTE_ENTRIES 512
/* Splits a block PTE into table with subpages spanning the old block */
-static void split_block(uint64_t *pte, int level)
+static void split_block(uint64_t *pte, int level, bool bbm)
{
uint64_t old_pte = *pte;
uint64_t *new_table;
- uint64_t i = 0;
- int levelshift;
+ u64 flags = 0;
if ((*pte & PTE_TYPE_MASK) == PTE_TYPE_TABLE)
return;
- /* level describes the parent level, we need the child ones */
- levelshift = level2shift(level + 1);
-
new_table = alloc_pte();
- for (i = 0; i < MAX_PTE_ENTRIES; i++) {
- set_pte(&new_table[i], old_pte | (i << levelshift));
+ /* Level 3 block PTEs have the table type */
+ if ((level + 1) == 3)
+ flags |= PTE_TYPE_TABLE;
- /* Level 3 block PTEs have the table type */
- if ((level + 1) == 3)
- new_table[i] |= PTE_TYPE_TABLE;
- }
+ set_pte_range(level + 1, new_table, old_pte, MAX_PTE_ENTRIES, flags, bbm);
- /* Set the new table into effect
- * TODO: break-before-make missing
- */
- set_table(pte, new_table);
+ /* level describes the parent level, we need the child ones */
+ set_pte_range(level, pte, (uint64_t)new_table, 1, PTE_TYPE_TABLE, bbm);
}
static int __arch_remap_range(uint64_t virt, uint64_t phys, uint64_t size,
- maptype_t map_type)
+ maptype_t map_type, bool bbm)
{
bool force_pages = map_type & ARCH_MAP_FLAG_PAGEWISE;
unsigned long attr = get_pte_attrs(map_type);
@@ -235,14 +238,13 @@ static int __arch_remap_range(uint64_t virt, uint64_t phys, uint64_t size,
type = (level == 3) ?
PTE_TYPE_PAGE : PTE_TYPE_BLOCK;
- /* TODO: break-before-make missing */
- set_pte(pte, phys | attr | type);
+ set_pte_range(level, pte, phys, 1, attr | type, bbm);
addr += block_size;
phys += block_size;
size -= block_size;
break;
} else {
- split_block(pte, level);
+ split_block(pte, level, bbm);
}
table = get_level_table(pte);
@@ -277,7 +279,7 @@ static inline void dma_flush_range_end(unsigned long start, unsigned long end)
static void early_remap_range(uint64_t addr, size_t size, maptype_t map_type)
{
- __arch_remap_range(addr, addr, size, map_type);
+ __arch_remap_range(addr, addr, size, map_type, false);
}
int arch_remap_range(void *virt_addr, phys_addr_t phys_addr, size_t size, maptype_t map_type)
@@ -287,7 +289,7 @@ int arch_remap_range(void *virt_addr, phys_addr_t phys_addr, size_t size, maptyp
if (!maptype_is_compatible(map_type, MAP_CACHED))
flush_cacheable_pages(virt_addr, size);
- return __arch_remap_range((uint64_t)virt_addr, phys_addr, (uint64_t)size, map_type);
+ return __arch_remap_range((uint64_t)virt_addr, phys_addr, (uint64_t)size, map_type, true);
}
static void mmu_enable(void)
@@ -383,7 +385,7 @@ static void early_init_range(size_t total_level0_tables)
while (total_level0_tables--) {
early_remap_range(addr, L0_XLAT_SIZE, MAP_UNCACHED);
- split_block(ttb, 0);
+ split_block(ttb, 0, false);
addr += L0_XLAT_SIZE;
ttb++;
}
--
2.39.5
^ permalink raw reply [flat|nested] 24+ messages in thread
* [PATCH 22/22] ARM: mmu: define dma_alloc_writecombine in common code
2025-08-06 12:36 [PATCH 00/22] ARM: mmu: refactor 32-bit and 64-bit code Ahmad Fatoum
` (20 preceding siblings ...)
2025-08-06 12:37 ` [PATCH 21/22] ARM: mmu64: " Ahmad Fatoum
@ 2025-08-06 12:37 ` Ahmad Fatoum
2025-08-07 7:24 ` [PATCH 00/22] ARM: mmu: refactor 32-bit and 64-bit code Sascha Hauer
22 siblings, 0 replies; 24+ messages in thread
From: Ahmad Fatoum @ 2025-08-06 12:37 UTC (permalink / raw)
To: barebox; +Cc: Ahmad Fatoum
The two functions are equivalent as the arm32/64 specific bits are
handled in remap_range.
Combine them to reduce the duplication.
Signed-off-by: Ahmad Fatoum <a.fatoum@pengutronix.de>
---
arch/arm/cpu/mmu-common.c | 5 +++++
arch/arm/cpu/mmu_32.c | 5 -----
arch/arm/cpu/mmu_64.c | 5 -----
3 files changed, 5 insertions(+), 10 deletions(-)
diff --git a/arch/arm/cpu/mmu-common.c b/arch/arm/cpu/mmu-common.c
index 1770c56b6eea..46982b00b7c5 100644
--- a/arch/arm/cpu/mmu-common.c
+++ b/arch/arm/cpu/mmu-common.c
@@ -74,6 +74,11 @@ void dma_free_coherent(struct device *dev,
free(mem);
}
+void *dma_alloc_writecombine(struct device *dev, size_t size, dma_addr_t *dma_handle)
+{
+ return dma_alloc_map(dev, size, dma_handle, MAP_WRITECOMBINE);
+}
+
void zero_page_access(void)
{
remap_range(0x0, PAGE_SIZE, MAP_CACHED);
diff --git a/arch/arm/cpu/mmu_32.c b/arch/arm/cpu/mmu_32.c
index 7cf04ea9412a..63c412873ec8 100644
--- a/arch/arm/cpu/mmu_32.c
+++ b/arch/arm/cpu/mmu_32.c
@@ -684,11 +684,6 @@ void mmu_disable(void)
__mmu_cache_off();
}
-void *dma_alloc_writecombine(struct device *dev, size_t size, dma_addr_t *dma_handle)
-{
- return dma_alloc_map(dev, size, dma_handle, MAP_WRITECOMBINE);
-}
-
void mmu_early_enable(unsigned long membase, unsigned long memsize, unsigned long barebox_start)
{
uint32_t *ttb = (uint32_t *)arm_mem_ttb(membase + memsize);
diff --git a/arch/arm/cpu/mmu_64.c b/arch/arm/cpu/mmu_64.c
index edb1b34aca7f..f32cd9a0ac1a 100644
--- a/arch/arm/cpu/mmu_64.c
+++ b/arch/arm/cpu/mmu_64.c
@@ -373,11 +373,6 @@ void dma_flush_range(void *ptr, size_t size)
v8_flush_dcache_range(start, end);
}
-void *dma_alloc_writecombine(struct device *dev, size_t size, dma_addr_t *dma_handle)
-{
- return dma_alloc_map(dev, size, dma_handle, MAP_WRITECOMBINE);
-}
-
static void early_init_range(size_t total_level0_tables)
{
uint64_t *ttb = get_ttb();
--
2.39.5
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [PATCH 00/22] ARM: mmu: refactor 32-bit and 64-bit code
2025-08-06 12:36 [PATCH 00/22] ARM: mmu: refactor 32-bit and 64-bit code Ahmad Fatoum
` (21 preceding siblings ...)
2025-08-06 12:37 ` [PATCH 22/22] ARM: mmu: define dma_alloc_writecombine in common code Ahmad Fatoum
@ 2025-08-07 7:24 ` Sascha Hauer
22 siblings, 0 replies; 24+ messages in thread
From: Sascha Hauer @ 2025-08-07 7:24 UTC (permalink / raw)
To: barebox, Ahmad Fatoum
On Wed, 06 Aug 2025 14:36:52 +0200, Ahmad Fatoum wrote:
> There is duplication and subtle differences between the 32-bit and
> 64-bit MMU code for historical reasons.
>
> Let's refactor the code for more similarity, implement
> flush_cacheable_pages for 32-bit and prepare for observing
> break-before-make requirements.
>
> [...]
Applied, thanks!
[01/22] ARM: mmu: introduce new maptype_t type
https://git.pengutronix.de/cgit/barebox/commit/?id=9f5924e901ab (link may not be stable)
[02/22] ARM: mmu: compare only lowest 16 bits for map type
https://git.pengutronix.de/cgit/barebox/commit/?id=e37b892aa558 (link may not be stable)
[03/22] ARM: mmu: prefix pre-MMU functions with early_
https://git.pengutronix.de/cgit/barebox/commit/?id=7f9cb083ad6f (link may not be stable)
[04/22] ARM: mmu: panic when alloc_pte fails
https://git.pengutronix.de/cgit/barebox/commit/?id=17a1d4b2a5f8 (link may not be stable)
[05/22] ARM: mmu32: introduce new mmu_addr_t type
https://git.pengutronix.de/cgit/barebox/commit/?id=c6c28667495e (link may not be stable)
[06/22] ARM: mmu: provide zero page control in PBL
https://git.pengutronix.de/cgit/barebox/commit/?id=bf0684296ba8 (link may not be stable)
[07/22] ARM: mmu: print map type as string
https://git.pengutronix.de/cgit/barebox/commit/?id=769c49778aba (link may not be stable)
[08/22] ARM: mmu64: rename create_sections to __arch_remap_range
https://git.pengutronix.de/cgit/barebox/commit/?id=9d86d4da477f (link may not be stable)
[09/22] ARM: mmu: move get_pte_attrs call into __arch_remap_range
https://git.pengutronix.de/cgit/barebox/commit/?id=48b88292bb23 (link may not be stable)
[10/22] ARM: mmu64: print debug message in __arch_remap_range
https://git.pengutronix.de/cgit/barebox/commit/?id=5fc230ccd0a4 (link may not be stable)
[11/22] ARM: mmu: make force_pages a maptype_t flag
https://git.pengutronix.de/cgit/barebox/commit/?id=220f91ac1e05 (link may not be stable)
[12/22] ARM: mmu64: move granule_size to the top of the file
https://git.pengutronix.de/cgit/barebox/commit/?id=7f43d7349272 (link may not be stable)
[13/22] ARM: mmu64: fix benign off-by-one in flush_cacheable_pages
https://git.pengutronix.de/cgit/barebox/commit/?id=3795a2fa2d27 (link may not be stable)
[14/22] ARM: mmu64: make flush_cacheable_pages less 64-bit dependent
https://git.pengutronix.de/cgit/barebox/commit/?id=beded95c1b90 (link may not be stable)
[15/22] ARM: mmu64: allow asserting last level page in __find_pte
https://git.pengutronix.de/cgit/barebox/commit/?id=b1fedd998046 (link may not be stable)
[16/22] ARM: mmu64: rename __find_pte to find_pte
https://git.pengutronix.de/cgit/barebox/commit/?id=3c5922e8b006 (link may not be stable)
[17/22] ARM: mmu32: rework find_pte to have ARM64 find_pte semantics
https://git.pengutronix.de/cgit/barebox/commit/?id=f360ca213a76 (link may not be stable)
[18/22] ARM: mmu64: factor out flush_cacheable_pages for reusability
https://git.pengutronix.de/cgit/barebox/commit/?id=8a2811a873ae (link may not be stable)
[19/22] ARM: mmu32: flush only cacheable pages on remap
https://git.pengutronix.de/cgit/barebox/commit/?id=97cc16bf21c4 (link may not be stable)
[20/22] ARM: mmu32: factor out set_pte_range helper
https://git.pengutronix.de/cgit/barebox/commit/?id=c24e4dd7e9cf (link may not be stable)
[21/22] ARM: mmu64: factor out set_pte_range helper
https://git.pengutronix.de/cgit/barebox/commit/?id=4564af5df5d9 (link may not be stable)
[22/22] ARM: mmu: define dma_alloc_writecombine in common code
https://git.pengutronix.de/cgit/barebox/commit/?id=fa95c4e2088b (link may not be stable)
Best regards,
--
Sascha Hauer <s.hauer@pengutronix.de>
^ permalink raw reply [flat|nested] 24+ messages in thread
end of thread, other threads:[~2025-08-07 7:25 UTC | newest]
Thread overview: 24+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2025-08-06 12:36 [PATCH 00/22] ARM: mmu: refactor 32-bit and 64-bit code Ahmad Fatoum
2025-08-06 12:36 ` [PATCH 01/22] ARM: mmu: introduce new maptype_t type Ahmad Fatoum
2025-08-06 12:36 ` [PATCH 02/22] ARM: mmu: compare only lowest 16 bits for map type Ahmad Fatoum
2025-08-06 12:36 ` [PATCH 03/22] ARM: mmu: prefix pre-MMU functions with early_ Ahmad Fatoum
2025-08-06 12:36 ` [PATCH 04/22] ARM: mmu: panic when alloc_pte fails Ahmad Fatoum
2025-08-06 12:36 ` [PATCH 05/22] ARM: mmu32: introduce new mmu_addr_t type Ahmad Fatoum
2025-08-06 12:36 ` [PATCH 06/22] ARM: mmu: provide zero page control in PBL Ahmad Fatoum
2025-08-06 12:36 ` [PATCH 07/22] ARM: mmu: print map type as string Ahmad Fatoum
2025-08-06 12:37 ` [PATCH 08/22] ARM: mmu64: rename create_sections to __arch_remap_range Ahmad Fatoum
2025-08-06 12:37 ` [PATCH 09/22] ARM: mmu: move get_pte_attrs call into __arch_remap_range Ahmad Fatoum
2025-08-06 12:37 ` [PATCH 10/22] ARM: mmu64: print debug message in __arch_remap_range Ahmad Fatoum
2025-08-06 12:37 ` [PATCH 11/22] ARM: mmu: make force_pages a maptype_t flag Ahmad Fatoum
2025-08-06 12:37 ` [PATCH 12/22] ARM: mmu64: move granule_size to the top of the file Ahmad Fatoum
2025-08-06 12:37 ` [PATCH 13/22] ARM: mmu64: fix benign off-by-one in flush_cacheable_pages Ahmad Fatoum
2025-08-06 12:37 ` [PATCH 14/22] ARM: mmu64: make flush_cacheable_pages less 64-bit dependent Ahmad Fatoum
2025-08-06 12:37 ` [PATCH 15/22] ARM: mmu64: allow asserting last level page in __find_pte Ahmad Fatoum
2025-08-06 12:37 ` [PATCH 16/22] ARM: mmu64: rename __find_pte to find_pte Ahmad Fatoum
2025-08-06 12:37 ` [PATCH 17/22] ARM: mmu32: rework find_pte to have ARM64 find_pte semantics Ahmad Fatoum
2025-08-06 12:37 ` [PATCH 18/22] ARM: mmu64: factor out flush_cacheable_pages for reusability Ahmad Fatoum
2025-08-06 12:37 ` [PATCH 19/22] ARM: mmu32: flush only cacheable pages on remap Ahmad Fatoum
2025-08-06 12:37 ` [PATCH 20/22] ARM: mmu32: factor out set_pte_range helper Ahmad Fatoum
2025-08-06 12:37 ` [PATCH 21/22] ARM: mmu64: " Ahmad Fatoum
2025-08-06 12:37 ` [PATCH 22/22] ARM: mmu: define dma_alloc_writecombine in common code Ahmad Fatoum
2025-08-07 7:24 ` [PATCH 00/22] ARM: mmu: refactor 32-bit and 64-bit code Sascha Hauer
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox