* [PATCH] cleanup arm startup a bit
@ 2012-10-05 16:45 Sascha Hauer
  2012-10-05 16:45 ` [PATCH 1/4] ARM: add assembly function for setting up C environment Sascha Hauer
                   ` (3 more replies)
  0 siblings, 4 replies; 9+ messages in thread
From: Sascha Hauer @ 2012-10-05 16:45 UTC (permalink / raw)
  To: barebox
Some cleanups to the arm start files.
Sascha
----------------------------------------------------------------
Sascha Hauer (4):
      ARM: add assembly function for setting up C environment
      introduce region_overlap() function
      ARM: cleanup piggydata copy check
      ARM: simplify start.c
 arch/arm/cpu/Makefile              |    4 +--
 arch/arm/cpu/setupc.S              |   34 ++++++++++++++++++++++++
 arch/arm/cpu/start-pbl.c           |   51 +++++++++---------------------------
 arch/arm/cpu/start.c               |   41 +++++------------------------
 arch/arm/include/asm/barebox-arm.h |    2 ++
 include/common.h                   |   13 +++++++++
 6 files changed, 70 insertions(+), 75 deletions(-)
 create mode 100644 arch/arm/cpu/setupc.S
_______________________________________________
barebox mailing list
barebox@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/barebox
^ permalink raw reply	[flat|nested] 9+ messages in thread
* [PATCH 1/4] ARM: add assembly function for setting up C environment
  2012-10-05 16:45 [PATCH] cleanup arm startup a bit Sascha Hauer
@ 2012-10-05 16:45 ` Sascha Hauer
  2012-10-05 16:45 ` [PATCH 2/4] introduce region_overlap() function Sascha Hauer
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 9+ messages in thread
From: Sascha Hauer @ 2012-10-05 16:45 UTC (permalink / raw)
  To: barebox
Sometimes Assembler beats C. In this case a small assembler
function called without parameters can:
- copy a binary to its link address
- clear the bss
- return to the same position in the copied binary
Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>
---
 arch/arm/cpu/Makefile              |    4 ++--
 arch/arm/cpu/setupc.S              |   34 ++++++++++++++++++++++++++++++++++
 arch/arm/cpu/start-pbl.c           |   19 ++-----------------
 arch/arm/cpu/start.c               |   23 +++++------------------
 arch/arm/include/asm/barebox-arm.h |    2 ++
 5 files changed, 45 insertions(+), 37 deletions(-)
 create mode 100644 arch/arm/cpu/setupc.S
diff --git a/arch/arm/cpu/Makefile b/arch/arm/cpu/Makefile
index f7ab276..b3122c8 100644
--- a/arch/arm/cpu/Makefile
+++ b/arch/arm/cpu/Makefile
@@ -1,7 +1,7 @@
 obj-y += cpu.o
 obj-$(CONFIG_ARM_EXCEPTIONS) += exceptions.o
 obj-$(CONFIG_ARM_EXCEPTIONS) += interrupts.o
-obj-y += start.o
+obj-y += start.o setupc.o
 
 #
 # Any variants can be called as start-armxyz.S
@@ -19,4 +19,4 @@ obj-$(CONFIG_CPU_32v7) += cache-armv7.o
 pbl-$(CONFIG_CPU_32v7) += cache-armv7.o
 obj-$(CONFIG_CACHE_L2X0) += cache-l2x0.o
 
-pbl-y += start-pbl.o
+pbl-y += start-pbl.o setupc.o
diff --git a/arch/arm/cpu/setupc.S b/arch/arm/cpu/setupc.S
new file mode 100644
index 0000000..9a8d54c
--- /dev/null
+++ b/arch/arm/cpu/setupc.S
@@ -0,0 +1,34 @@
+#include <linux/linkage.h>
+
+.section .text.setupc
+
+/*
+ * setup_c: copy binary to link address, clear bss and
+ * continue executing at new address.
+ *
+ * This function does not return to the address it is
+ * called from, but to the same location in the copied
+ * binary.
+ */
+ENTRY(setup_c)
+	push	{r4, r5}
+	mov	r5, lr
+	bl	get_runtime_offset
+	subs	r4, r0, #0
+	beq	1f			/* skip memcpy if already at correct address */
+	ldr	r0,=_text
+	ldr	r2,=__bss_start
+	sub	r2, r2, r0
+	sub	r1, r0, r4
+	bl	memcpy			/* memcpy(_text, _text - offset, __bss_start - _text) */
+1:	ldr	r0, =__bss_start
+	mov	r1, #0
+	ldr	r2, =__bss_stop
+	sub	r2, r2, r0
+	bl	memset			/* clear bss */
+	mov	r0, #0
+	mcr	p15, 0, r0, c7, c5, 0	/* flush icache */
+	add	lr, r5, r4		/* adjust return address to new location */
+	pop	{r4, r5}
+	mov	pc, lr
+ENDPROC(setup_c)
diff --git a/arch/arm/cpu/start-pbl.c b/arch/arm/cpu/start-pbl.c
index 0467dfe..1298ca0 100644
--- a/arch/arm/cpu/start-pbl.c
+++ b/arch/arm/cpu/start-pbl.c
@@ -208,23 +208,8 @@ copy_piggy_link:
 	pg_start = (uint32_t)&input_data;
 
 copy_link:
-	/* relocate to link address if necessary */
-	if (offset)
-		memcpy((void *)_text, (void *)(_text - offset),
-				__bss_start - _text);
 
-	/* clear bss */
-	memset(__bss_start, 0, __bss_stop - __bss_start);
+	setup_c();
 
-	flush_icache();
-
-	r = (unsigned int)&barebox_uncompress;
-	/* call barebox_uncompress with its absolute address */
-	__asm__ __volatile__(
-		"mov r0, %1\n"
-		"mov r1, %2\n"
-		"mov pc, %0\n"
-		:
-		: "r"(r), "r"(pg_start), "r"(pg_len)
-		: "r0", "r1");
+	barebox_uncompress((void *)pg_start, pg_len);
 }
diff --git a/arch/arm/cpu/start.c b/arch/arm/cpu/start.c
index 8676267..7e0a29a 100644
--- a/arch/arm/cpu/start.c
+++ b/arch/arm/cpu/start.c
@@ -37,8 +37,8 @@ void __naked __section(.text_entry) start(void)
 	/* Setup the stack */
 	r = STACK_BASE + STACK_SIZE - 16;
 	__asm__ __volatile__("mov sp, %0" : : "r"(r));
-	/* clear bss */
-	memset(__bss_start, 0, __bss_stop - __bss_start);
+
+	setup_c();
 
 	start_barebox();
 }
@@ -70,27 +70,14 @@ void __naked __bare_init reset(void)
  */
 void __naked board_init_lowlevel_return(void)
 {
-	uint32_t r, offset;
+	uint32_t r;
 
 	/* Setup the stack */
 	r = STACK_BASE + STACK_SIZE - 16;
 	__asm__ __volatile__("mov sp, %0" : : "r"(r));
 
-	/* Get offset between linked address and runtime address */
-	offset = get_runtime_offset();
-
-	/* relocate to link address if necessary */
-	if (offset)
-		memcpy((void *)_text, (void *)(_text - offset),
-				__bss_start - _text);
-
-	/* clear bss */
-	memset(__bss_start, 0, __bss_stop - __bss_start);
+	setup_c();
 
-	flush_icache();
-
-	/* call start_barebox with its absolute address */
-	r = (unsigned int)&start_barebox;
-	__asm__ __volatile__("mov pc, %0" : : "r"(r));
+	start_barebox();
 }
 #endif
diff --git a/arch/arm/include/asm/barebox-arm.h b/arch/arm/include/asm/barebox-arm.h
index 9e17b4f..993130d 100644
--- a/arch/arm/include/asm/barebox-arm.h
+++ b/arch/arm/include/asm/barebox-arm.h
@@ -38,4 +38,6 @@ void board_init_lowlevel(void);
 void board_init_lowlevel_return(void);
 uint32_t get_runtime_offset(void);
 
+void setup_c(void);
+
 #endif	/* _BAREBOX_ARM_H_ */
-- 
1.7.10.4
_______________________________________________
barebox mailing list
barebox@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/barebox
^ permalink raw reply	[flat|nested] 9+ messages in thread
* [PATCH 2/4] introduce region_overlap() function
  2012-10-05 16:45 [PATCH] cleanup arm startup a bit Sascha Hauer
  2012-10-05 16:45 ` [PATCH 1/4] ARM: add assembly function for setting up C environment Sascha Hauer
@ 2012-10-05 16:45 ` Sascha Hauer
  2012-10-05 19:55   ` Robert Jarzmik
  2012-10-05 16:45 ` [PATCH 3/4] ARM: cleanup piggydata copy check Sascha Hauer
  2012-10-05 16:45 ` [PATCH 4/4] ARM: simplify start.c Sascha Hauer
  3 siblings, 1 reply; 9+ messages in thread
From: Sascha Hauer @ 2012-10-05 16:45 UTC (permalink / raw)
  To: barebox
To check if two regions overlap
Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>
---
 include/common.h |   13 +++++++++++++
 1 file changed, 13 insertions(+)
diff --git a/include/common.h b/include/common.h
index c1f44b4..e30774a 100644
--- a/include/common.h
+++ b/include/common.h
@@ -256,4 +256,17 @@ static inline void barebox_banner(void) {}
 		(__x < 0) ? -__x : __x;         \
 	})
 
+/*
+ * Check if two regions overlap. returns true if they do, false otherwise
+ */
+static inline bool region_overlap(unsigned long starta, unsigned long lena,
+		unsigned long startb, unsigned long lenb)
+{
+	if (starta + lena <= startb)
+		return 0;
+	if (startb + lenb <= starta)
+		return 0;
+	return 1;
+}
+
 #endif	/* __COMMON_H_ */
-- 
1.7.10.4
_______________________________________________
barebox mailing list
barebox@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/barebox
^ permalink raw reply	[flat|nested] 9+ messages in thread
* [PATCH 3/4] ARM: cleanup piggydata copy check
  2012-10-05 16:45 [PATCH] cleanup arm startup a bit Sascha Hauer
  2012-10-05 16:45 ` [PATCH 1/4] ARM: add assembly function for setting up C environment Sascha Hauer
  2012-10-05 16:45 ` [PATCH 2/4] introduce region_overlap() function Sascha Hauer
@ 2012-10-05 16:45 ` Sascha Hauer
  2012-10-05 16:45 ` [PATCH 4/4] ARM: simplify start.c Sascha Hauer
  3 siblings, 0 replies; 9+ messages in thread
From: Sascha Hauer @ 2012-10-05 16:45 UTC (permalink / raw)
  To: barebox
Replace to gotos with a single if().
Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>
---
 arch/arm/cpu/start-pbl.c |   28 ++++++++--------------------
 1 file changed, 8 insertions(+), 20 deletions(-)
diff --git a/arch/arm/cpu/start-pbl.c b/arch/arm/cpu/start-pbl.c
index 1298ca0..29d82e5 100644
--- a/arch/arm/cpu/start-pbl.c
+++ b/arch/arm/cpu/start-pbl.c
@@ -188,26 +188,14 @@ void __naked board_init_lowlevel_return(void)
 	pg_end = (uint32_t)&input_data_end - offset;
 	pg_len = pg_end - pg_start;
 
-	if (IS_ENABLED(CONFIG_PBL_FORCE_PIGGYDATA_COPY))
-		goto copy_piggy_link;
-
-	/*
-	 * Check if the piggydata binary will be overwritten
-	 * by the uncompressed binary or by the pbl relocation
-	 */
-	if (!offset ||
-	    !((pg_start >= TEXT_BASE && pg_start < TEXT_BASE + pg_len * 4) ||
-	      ((uint32_t)_text >= pg_start && (uint32_t)_text <= pg_end)))
-		goto copy_link;
-
-copy_piggy_link:
-	/*
-	 * copy piggydata binary to its link address
-	 */
-	memcpy(&input_data, (void *)pg_start, pg_len);
-	pg_start = (uint32_t)&input_data;
-
-copy_link:
+	if (offset && (IS_ENABLED(CONFIG_PBL_FORCE_PIGGYDATA_COPY) ||
+				region_overlap(pg_start, pg_len, TEXT_BASE, pg_len * 4))) {
+		/*
+		 * copy piggydata binary to its link address
+		 */
+		memcpy(&input_data, (void *)pg_start, pg_len);
+		pg_start = (uint32_t)&input_data;
+	}
 
 	setup_c();
 
-- 
1.7.10.4
_______________________________________________
barebox mailing list
barebox@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/barebox
^ permalink raw reply	[flat|nested] 9+ messages in thread
* [PATCH 4/4] ARM: simplify start.c
  2012-10-05 16:45 [PATCH] cleanup arm startup a bit Sascha Hauer
                   ` (2 preceding siblings ...)
  2012-10-05 16:45 ` [PATCH 3/4] ARM: cleanup piggydata copy check Sascha Hauer
@ 2012-10-05 16:45 ` Sascha Hauer
  3 siblings, 0 replies; 9+ messages in thread
From: Sascha Hauer @ 2012-10-05 16:45 UTC (permalink / raw)
  To: barebox
start() for the PBL case is a duplicate of board_init_lowlevel_return().
Instead of duplicating it just call it.
Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>
---
 arch/arm/cpu/start.c |   22 ++++------------------
 1 file changed, 4 insertions(+), 18 deletions(-)
diff --git a/arch/arm/cpu/start.c b/arch/arm/cpu/start.c
index 7e0a29a..7e72eec 100644
--- a/arch/arm/cpu/start.c
+++ b/arch/arm/cpu/start.c
@@ -24,32 +24,19 @@
 #include <asm-generic/memory_layout.h>
 #include <asm/sections.h>
 #include <asm/cache.h>
+#include <memory.h>
 
-#ifdef CONFIG_PBL_IMAGE
 /*
  * First function in the uncompressed image. We get here from
  * the pbl.
  */
 void __naked __section(.text_entry) start(void)
 {
-	u32 r;
-
-	/* Setup the stack */
-	r = STACK_BASE + STACK_SIZE - 16;
-	__asm__ __volatile__("mov sp, %0" : : "r"(r));
-
-	setup_c();
-
-	start_barebox();
-}
+#ifdef CONFIG_PBL_IMAGE
+	board_init_lowlevel_return();
 #else
-
-/*
- * First function in the image without pbl support
- */
-void __naked __section(.text_entry) start(void)
-{
 	barebox_arm_head();
+#endif
 }
 
 /*
@@ -80,4 +67,3 @@ void __naked board_init_lowlevel_return(void)
 
 	start_barebox();
 }
-#endif
-- 
1.7.10.4
_______________________________________________
barebox mailing list
barebox@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/barebox
^ permalink raw reply	[flat|nested] 9+ messages in thread
* Re: [PATCH 2/4] introduce region_overlap() function
  2012-10-05 16:45 ` [PATCH 2/4] introduce region_overlap() function Sascha Hauer
@ 2012-10-05 19:55   ` Robert Jarzmik
  2012-10-05 21:33     ` Sascha Hauer
  0 siblings, 1 reply; 9+ messages in thread
From: Robert Jarzmik @ 2012-10-05 19:55 UTC (permalink / raw)
  To: Sascha Hauer; +Cc: barebox
Sascha Hauer <s.hauer@pengutronix.de> writes:
> To check if two regions overlap
>
> Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>
> ---
>  include/common.h |   13 +++++++++++++
>  1 file changed, 13 insertions(+)
>
> diff --git a/include/common.h b/include/common.h
> index c1f44b4..e30774a 100644
> --- a/include/common.h
> +++ b/include/common.h
> @@ -256,4 +256,17 @@ static inline void barebox_banner(void) {}
>  		(__x < 0) ? -__x : __x;         \
>  	})
>  
> +/*
> + * Check if two regions overlap. returns true if they do, false otherwise
> + */
> +static inline bool region_overlap(unsigned long starta, unsigned long lena,
> +		unsigned long startb, unsigned long lenb)
> +{
> +	if (starta + lena <= startb)
> +		return 0;
> +	if (startb + lenb <= starta)
> +		return 0;
> +	return 1;
> +}
> +
>  #endif	/* __COMMON_H_ */
Or if you look for perfomance (I presume not in barebox) :
static inline bool region_overlap(unsigned long starta, unsigned long lena,
		unsigned long startb, unsigned long lenb)
{
        return starta <= startb + lenb && starta + lena >= startb;
}
It's a bit more obfuscated, but performance wise no branch prediction :)
Cheers.
-- 
Robert
_______________________________________________
barebox mailing list
barebox@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/barebox
^ permalink raw reply	[flat|nested] 9+ messages in thread
* Re: [PATCH 2/4] introduce region_overlap() function
  2012-10-05 19:55   ` Robert Jarzmik
@ 2012-10-05 21:33     ` Sascha Hauer
  2012-10-06 19:44       ` Robert Jarzmik
  2012-10-07  6:59       ` Antony Pavlov
  0 siblings, 2 replies; 9+ messages in thread
From: Sascha Hauer @ 2012-10-05 21:33 UTC (permalink / raw)
  To: Robert Jarzmik; +Cc: barebox
On Fri, Oct 05, 2012 at 09:55:04PM +0200, Robert Jarzmik wrote:
> Sascha Hauer <s.hauer@pengutronix.de> writes:
> 
> > To check if two regions overlap
> >
> > Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>
> > ---
> >  include/common.h |   13 +++++++++++++
> >  1 file changed, 13 insertions(+)
> >
> > diff --git a/include/common.h b/include/common.h
> > index c1f44b4..e30774a 100644
> > --- a/include/common.h
> > +++ b/include/common.h
> > @@ -256,4 +256,17 @@ static inline void barebox_banner(void) {}
> >  		(__x < 0) ? -__x : __x;         \
> >  	})
> >  
> > +/*
> > + * Check if two regions overlap. returns true if they do, false otherwise
> > + */
> > +static inline bool region_overlap(unsigned long starta, unsigned long lena,
> > +		unsigned long startb, unsigned long lenb)
> > +{
> > +	if (starta + lena <= startb)
> > +		return 0;
> > +	if (startb + lenb <= starta)
> > +		return 0;
> > +	return 1;
> > +}
> > +
> >  #endif	/* __COMMON_H_ */
> 
> Or if you look for perfomance (I presume not in barebox) :
> static inline bool region_overlap(unsigned long starta, unsigned long lena,
> 		unsigned long startb, unsigned long lenb)
> {
>         return starta <= startb + lenb && starta + lena >= startb;
> }
> 
> It's a bit more obfuscated, but performance wise no branch prediction :)
You made me curious. I tried to compile both and here is the result on
ARM (I swapped the arguments left and right of the &&):
00025000 <_region_overlap>:
   25000:       e0811000        add     r1, r1, r0
   25004:       e1510002        cmp     r1, r2
   25008:       9a000004        bls     25020 <_region_overlap+0x20>
   2500c:       e0832002        add     r2, r3, r2
   25010:       e1520000        cmp     r2, r0
   25014:       93a00000        movls   r0, #0
   25018:       83a00001        movhi   r0, #1
   2501c:       e12fff1e        bx      lr
   25020:       e3a00000        mov     r0, #0
   25024:       e12fff1e        bx      lr
00025000 <__region_overlap>:
   25000:       e0811000        add     r1, r1, r0
   25004:       e1510002        cmp     r1, r2
   25008:       3a000004        bcc     25020 <__region_overlap+0x20>
   2500c:       e0832002        add     r2, r3, r2
   25010:       e1500002        cmp     r0, r2
   25014:       83a00000        movhi   r0, #0
   25018:       93a00001        movls   r0, #1
   2501c:       e12fff1e        bx      lr
   25020:       e3a00000        mov     r0, #0
   25024:       e12fff1e        bx      lr
Maybe gcc isn't so clever on other architectures, I don't know ;)
Sascha
-- 
Pengutronix e.K.                           |                             |
Industrial Linux Solutions                 | http://www.pengutronix.de/  |
Peiner Str. 6-8, 31137 Hildesheim, Germany | Phone: +49-5121-206917-0    |
Amtsgericht Hildesheim, HRA 2686           | Fax:   +49-5121-206917-5555 |
_______________________________________________
barebox mailing list
barebox@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/barebox
^ permalink raw reply	[flat|nested] 9+ messages in thread
* Re: [PATCH 2/4] introduce region_overlap() function
  2012-10-05 21:33     ` Sascha Hauer
@ 2012-10-06 19:44       ` Robert Jarzmik
  2012-10-07  6:59       ` Antony Pavlov
  1 sibling, 0 replies; 9+ messages in thread
From: Robert Jarzmik @ 2012-10-06 19:44 UTC (permalink / raw)
  To: Sascha Hauer; +Cc: barebox
Sascha Hauer <s.hauer@pengutronix.de> writes:
> You made me curious. I tried to compile both and here is the result on
> ARM (I swapped the arguments left and right of the &&):
>
...zip...
>
> Maybe gcc isn't so clever on other architectures, I don't know ;)
You're right.
Same conclusion on x86 with gcc 4.7. I had tried that a long time ago (maybe 10
years or so) on x86, benchmarking both pure C and Java on this very function (it
was for aircrafts timeslots conflicts discovery).
At that time, the "startA <= endB && endA >= startB" was fastest. So compilation
technique have improved enough to write maintainable code after all :)
Cheers.
-- 
Robert
_______________________________________________
barebox mailing list
barebox@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/barebox
^ permalink raw reply	[flat|nested] 9+ messages in thread
* Re: [PATCH 2/4] introduce region_overlap() function
  2012-10-05 21:33     ` Sascha Hauer
  2012-10-06 19:44       ` Robert Jarzmik
@ 2012-10-07  6:59       ` Antony Pavlov
  1 sibling, 0 replies; 9+ messages in thread
From: Antony Pavlov @ 2012-10-07  6:59 UTC (permalink / raw)
  To: Sascha Hauer; +Cc: barebox
On 6 October 2012 01:33, Sascha Hauer <s.hauer@pengutronix.de> wrote:
> On Fri, Oct 05, 2012 at 09:55:04PM +0200, Robert Jarzmik wrote:
>> Sascha Hauer <s.hauer@pengutronix.de> writes:
>>
>> > To check if two regions overlap
>> >
>> > Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>
>> > ---
>> >  include/common.h |   13 +++++++++++++
>> >  1 file changed, 13 insertions(+)
>> >
>> > diff --git a/include/common.h b/include/common.h
>> > index c1f44b4..e30774a 100644
>> > --- a/include/common.h
>> > +++ b/include/common.h
>> > @@ -256,4 +256,17 @@ static inline void barebox_banner(void) {}
>> >             (__x < 0) ? -__x : __x;         \
>> >     })
>> >
>> > +/*
>> > + * Check if two regions overlap. returns true if they do, false otherwise
>> > + */
>> > +static inline bool region_overlap(unsigned long starta, unsigned long lena,
>> > +           unsigned long startb, unsigned long lenb)
>> > +{
>> > +   if (starta + lena <= startb)
>> > +           return 0;
>> > +   if (startb + lenb <= starta)
>> > +           return 0;
>> > +   return 1;
>> > +}
>> > +
>> >  #endif     /* __COMMON_H_ */
>>
>> Or if you look for perfomance (I presume not in barebox) :
>> static inline bool region_overlap(unsigned long starta, unsigned long lena,
>>               unsigned long startb, unsigned long lenb)
>> {
>>         return starta <= startb + lenb && starta + lena >= startb;
>> }
>>
>> It's a bit more obfuscated, but performance wise no branch prediction :)
>
> You made me curious. I tried to compile both and here is the result on
> ARM (I swapped the arguments left and right of the &&):
>
> 00025000 <_region_overlap>:
>    25000:       e0811000        add     r1, r1, r0
>    25004:       e1510002        cmp     r1, r2
>    25008:       9a000004        bls     25020 <_region_overlap+0x20>
>    2500c:       e0832002        add     r2, r3, r2
>    25010:       e1520000        cmp     r2, r0
>    25014:       93a00000        movls   r0, #0
>    25018:       83a00001        movhi   r0, #1
>    2501c:       e12fff1e        bx      lr
>    25020:       e3a00000        mov     r0, #0
>    25024:       e12fff1e        bx      lr
>
> 00025000 <__region_overlap>:
>    25000:       e0811000        add     r1, r1, r0
>    25004:       e1510002        cmp     r1, r2
>    25008:       3a000004        bcc     25020 <__region_overlap+0x20>
>    2500c:       e0832002        add     r2, r3, r2
>    25010:       e1500002        cmp     r0, r2
>    25014:       83a00000        movhi   r0, #0
>    25018:       93a00001        movls   r0, #1
>    2501c:       e12fff1e        bx      lr
>    25020:       e3a00000        mov     r0, #0
>    25024:       e12fff1e        bx      lr
>
> Maybe gcc isn't so clever on other architectures, I don't know ;)
You made me curious too.
I compiled this piece of code for MIPS:
--- code ---
#include <stdbool.h>
bool _region_overlap(unsigned long starta, unsigned long lena,
unsigned long startb, unsigned long lenb)
{
        if (starta + lena <= startb)
                return 0;
        if (startb + lenb <= starta)
                return 0;
        return 1;
}
bool __region_overlap(unsigned long starta, unsigned long lena,
unsigned long startb, unsigned long lenb)
{
        return starta <= startb + lenb && starta + lena >= startb;
}
--- /code ---
I used gcc 4.6.2 with the '-O2' option.
Here is the result:
00000000 <_region_overlap>:
   0:   00a42821        addu    a1,a1,a0
   4:   00c5282b        sltu    a1,a2,a1
   8:   10a00003        beqz    a1,18 <_region_overlap+0x18>
   c:   00e63021        addu    a2,a3,a2
  10:   03e00008        jr      ra
  14:   0086102b        sltu    v0,a0,a2
  18:   03e00008        jr      ra
  1c:   00001021        move    v0,zero
00000020 <__region_overlap>:
  20:   00e63821        addu    a3,a3,a2
  24:   00e4382b        sltu    a3,a3,a0
  28:   14e00004        bnez    a3,3c <__region_overlap+0x1c>
  2c:   00a42021        addu    a0,a1,a0
  30:   0086302b        sltu    a2,a0,a2
  34:   03e00008        jr      ra
  38:   38c20001        xori    v0,a2,0x1
  3c:   03e00008        jr      ra
  40:   00001021        move    v0,zero
You can see that the shorten obfuscated function (__region_overlap)
has ONE MORE processor instruction!
-- 
Best regards,
  Antony Pavlov
_______________________________________________
barebox mailing list
barebox@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/barebox
^ permalink raw reply	[flat|nested] 9+ messages in thread
end of thread, other threads:[~2012-10-07  6:59 UTC | newest]
Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2012-10-05 16:45 [PATCH] cleanup arm startup a bit Sascha Hauer
2012-10-05 16:45 ` [PATCH 1/4] ARM: add assembly function for setting up C environment Sascha Hauer
2012-10-05 16:45 ` [PATCH 2/4] introduce region_overlap() function Sascha Hauer
2012-10-05 19:55   ` Robert Jarzmik
2012-10-05 21:33     ` Sascha Hauer
2012-10-06 19:44       ` Robert Jarzmik
2012-10-07  6:59       ` Antony Pavlov
2012-10-05 16:45 ` [PATCH 3/4] ARM: cleanup piggydata copy check Sascha Hauer
2012-10-05 16:45 ` [PATCH 4/4] ARM: simplify start.c Sascha Hauer
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox