mirror of
				https://source.denx.de/u-boot/u-boot.git
				synced 2025-10-31 00:11:51 +01:00 
			
		
		
		
	There is currently a problem that U-Boot can not work on ARMv4
because assembly imlementations of memcpy() and some other functions
use "bx lr" instruction that is not available on ARMv4 ("mov pc, lr"
should be used instead).
A working preprocessor-based solution to this problem is found in
arch/arm/lib/relocate.S. Move it to the "ret" macro in
arch/arm/include/asm/assembler.h and change all "bx lr" code
to "ret lr" in functions that may run on ARMv4. Linux source code
deals with this problem in the same manner.
v1 -> v2:
Comment update. Pointed out by Andre Przywara.
Signed-off-by: Sergei Antonov <saproj@gmail.com>
CC: Samuel Holland <samuel@sholland.org>
CC: Ye Li <ye.li@nxp.com>
CC: Simon Glass <sjg@chromium.org>
CC: Andre Przywara <andre.przywara@arm.com>
CC: Marek Vasut <marex@denx.de>
CC: Sean Anderson <sean.anderson@seco.com>
CC: Tom Rini <trini@konsulko.com>
		
	
			
		
			
				
	
	
		
			428 lines
		
	
	
		
			8.6 KiB
		
	
	
	
		
			ArmAsm
		
	
	
	
	
	
			
		
		
	
	
			428 lines
		
	
	
		
			8.6 KiB
		
	
	
	
		
			ArmAsm
		
	
	
	
	
	
| /* SPDX-License-Identifier: GPL-2.0+ */
 | |
| /*
 | |
|  * linux/arch/arm/lib/lib1funcs.S: Optimized ARM division routines
 | |
|  *
 | |
|  * Author: Nicolas Pitre <nico@fluxnic.net>
 | |
|  *   - contributed to gcc-3.4 on Sep 30, 2003
 | |
|  *   - adapted for the Linux kernel on Oct 2, 2003
 | |
|  */
 | |
| /*
 | |
|  * Copyright 1995, 1996, 1998, 1999, 2000, 2003 Free Software Foundation, Inc.
 | |
|  */
 | |
| 
 | |
| #include <linux/linkage.h>
 | |
| #include <asm/assembler.h>
 | |
| 
 | |
| /*
 | |
|  * U-Boot compatibility bit, define empty UNWIND() macro as, since we
 | |
|  * do not support stack unwinding and define CONFIG_AEABI to make all
 | |
|  * of the functions available without diverging from Linux code.
 | |
|  */
 | |
| #ifdef __UBOOT__
 | |
| #define UNWIND(x...)
 | |
| #define CONFIG_AEABI
 | |
| #endif
 | |
| 
 | |
| .macro ARM_DIV_BODY dividend, divisor, result, curbit
 | |
| 
 | |
| #if __LINUX_ARM_ARCH__ >= 5
 | |
| 
 | |
| 	clz	\curbit, \divisor
 | |
| 	clz	\result, \dividend
 | |
| 	sub	\result, \curbit, \result
 | |
| 	mov	\curbit, #1
 | |
| 	mov	\divisor, \divisor, lsl \result
 | |
| 	mov	\curbit, \curbit, lsl \result
 | |
| 	mov	\result, #0
 | |
| 
 | |
| #else
 | |
| 
 | |
| 	@ Initially shift the divisor left 3 bits if possible,
 | |
| 	@ set curbit accordingly.  This allows for curbit to be located
 | |
| 	@ at the left end of each 4 bit nibbles in the division loop
 | |
| 	@ to save one loop in most cases.
 | |
| 	tst	\divisor, #0xe0000000
 | |
| 	moveq	\divisor, \divisor, lsl #3
 | |
| 	moveq	\curbit, #8
 | |
| 	movne	\curbit, #1
 | |
| 
 | |
| 	@ Unless the divisor is very big, shift it up in multiples of
 | |
| 	@ four bits, since this is the amount of unwinding in the main
 | |
| 	@ division loop.  Continue shifting until the divisor is
 | |
| 	@ larger than the dividend.
 | |
| 1:	cmp	\divisor, #0x10000000
 | |
| 	cmplo	\divisor, \dividend
 | |
| 	movlo	\divisor, \divisor, lsl #4
 | |
| 	movlo	\curbit, \curbit, lsl #4
 | |
| 	blo	1b
 | |
| 
 | |
| 	@ For very big divisors, we must shift it a bit at a time, or
 | |
| 	@ we will be in danger of overflowing.
 | |
| 1:	cmp	\divisor, #0x80000000
 | |
| 	cmplo	\divisor, \dividend
 | |
| 	movlo	\divisor, \divisor, lsl #1
 | |
| 	movlo	\curbit, \curbit, lsl #1
 | |
| 	blo	1b
 | |
| 
 | |
| 	mov	\result, #0
 | |
| 
 | |
| #endif
 | |
| 
 | |
| 	@ Division loop
 | |
| 1:	cmp	\dividend, \divisor
 | |
| 	subhs	\dividend, \dividend, \divisor
 | |
| 	orrhs	\result,   \result,   \curbit
 | |
| 	cmp	\dividend, \divisor,  lsr #1
 | |
| 	subhs	\dividend, \dividend, \divisor, lsr #1
 | |
| 	orrhs	\result,   \result,   \curbit,  lsr #1
 | |
| 	cmp	\dividend, \divisor,  lsr #2
 | |
| 	subhs	\dividend, \dividend, \divisor, lsr #2
 | |
| 	orrhs	\result,   \result,   \curbit,  lsr #2
 | |
| 	cmp	\dividend, \divisor,  lsr #3
 | |
| 	subhs	\dividend, \dividend, \divisor, lsr #3
 | |
| 	orrhs	\result,   \result,   \curbit,  lsr #3
 | |
| 	cmp	\dividend, #0			@ Early termination?
 | |
| 	movsne	\curbit,   \curbit,  lsr #4	@ No, any more bits to do?
 | |
| 	movne	\divisor,  \divisor, lsr #4
 | |
| 	bne	1b
 | |
| 
 | |
| .endm
 | |
| 
 | |
| 
 | |
| .macro ARM_DIV2_ORDER divisor, order
 | |
| 
 | |
| #if __LINUX_ARM_ARCH__ >= 5
 | |
| 
 | |
| 	clz	\order, \divisor
 | |
| 	rsb	\order, \order, #31
 | |
| 
 | |
| #else
 | |
| 
 | |
| 	cmp	\divisor, #(1 << 16)
 | |
| 	movhs	\divisor, \divisor, lsr #16
 | |
| 	movhs	\order, #16
 | |
| 	movlo	\order, #0
 | |
| 
 | |
| 	cmp	\divisor, #(1 << 8)
 | |
| 	movhs	\divisor, \divisor, lsr #8
 | |
| 	addhs	\order, \order, #8
 | |
| 
 | |
| 	cmp	\divisor, #(1 << 4)
 | |
| 	movhs	\divisor, \divisor, lsr #4
 | |
| 	addhs	\order, \order, #4
 | |
| 
 | |
| 	cmp	\divisor, #(1 << 2)
 | |
| 	addhi	\order, \order, #3
 | |
| 	addls	\order, \order, \divisor, lsr #1
 | |
| 
 | |
| #endif
 | |
| 
 | |
| .endm
 | |
| 
 | |
| 
 | |
| .macro ARM_MOD_BODY dividend, divisor, order, spare
 | |
| 
 | |
| #if __LINUX_ARM_ARCH__ >= 5
 | |
| 
 | |
| 	clz	\order, \divisor
 | |
| 	clz	\spare, \dividend
 | |
| 	sub	\order, \order, \spare
 | |
| 	mov	\divisor, \divisor, lsl \order
 | |
| 
 | |
| #else
 | |
| 
 | |
| 	mov	\order, #0
 | |
| 
 | |
| 	@ Unless the divisor is very big, shift it up in multiples of
 | |
| 	@ four bits, since this is the amount of unwinding in the main
 | |
| 	@ division loop.  Continue shifting until the divisor is
 | |
| 	@ larger than the dividend.
 | |
| 1:	cmp	\divisor, #0x10000000
 | |
| 	cmplo	\divisor, \dividend
 | |
| 	movlo	\divisor, \divisor, lsl #4
 | |
| 	addlo	\order, \order, #4
 | |
| 	blo	1b
 | |
| 
 | |
| 	@ For very big divisors, we must shift it a bit at a time, or
 | |
| 	@ we will be in danger of overflowing.
 | |
| 1:	cmp	\divisor, #0x80000000
 | |
| 	cmplo	\divisor, \dividend
 | |
| 	movlo	\divisor, \divisor, lsl #1
 | |
| 	addlo	\order, \order, #1
 | |
| 	blo	1b
 | |
| 
 | |
| #endif
 | |
| 
 | |
| 	@ Perform all needed subtractions to keep only the reminder.
 | |
| 	@ Do comparisons in batch of 4 first.
 | |
| 	subs	\order, \order, #3		@ yes, 3 is intended here
 | |
| 	blt	2f
 | |
| 
 | |
| 1:	cmp	\dividend, \divisor
 | |
| 	subhs	\dividend, \dividend, \divisor
 | |
| 	cmp	\dividend, \divisor,  lsr #1
 | |
| 	subhs	\dividend, \dividend, \divisor, lsr #1
 | |
| 	cmp	\dividend, \divisor,  lsr #2
 | |
| 	subhs	\dividend, \dividend, \divisor, lsr #2
 | |
| 	cmp	\dividend, \divisor,  lsr #3
 | |
| 	subhs	\dividend, \dividend, \divisor, lsr #3
 | |
| 	cmp	\dividend, #1
 | |
| 	mov	\divisor, \divisor, lsr #4
 | |
| 	subsge	\order, \order, #4
 | |
| 	bge	1b
 | |
| 
 | |
| 	tst	\order, #3
 | |
| 	teqne	\dividend, #0
 | |
| 	beq	5f
 | |
| 
 | |
| 	@ Either 1, 2 or 3 comparison/subtractions are left.
 | |
| 2:	cmn	\order, #2
 | |
| 	blt	4f
 | |
| 	beq	3f
 | |
| 	cmp	\dividend, \divisor
 | |
| 	subhs	\dividend, \dividend, \divisor
 | |
| 	mov	\divisor,  \divisor,  lsr #1
 | |
| 3:	cmp	\dividend, \divisor
 | |
| 	subhs	\dividend, \dividend, \divisor
 | |
| 	mov	\divisor,  \divisor,  lsr #1
 | |
| 4:	cmp	\dividend, \divisor
 | |
| 	subhs	\dividend, \dividend, \divisor
 | |
| 5:
 | |
| .endm
 | |
| 
 | |
| 
 | |
| .pushsection .text.__udivsi3, "ax"
 | |
| ENTRY(__udivsi3)
 | |
| ENTRY(__aeabi_uidiv)
 | |
| UNWIND(.fnstart)
 | |
| 
 | |
| 	subs	r2, r1, #1
 | |
| 	reteq	lr
 | |
| 	bcc	Ldiv0
 | |
| 	cmp	r0, r1
 | |
| 	bls	11f
 | |
| 	tst	r1, r2
 | |
| 	beq	12f
 | |
| 
 | |
| 	ARM_DIV_BODY r0, r1, r2, r3
 | |
| 
 | |
| 	mov	r0, r2
 | |
| 	ret	lr
 | |
| 
 | |
| 11:	moveq	r0, #1
 | |
| 	movne	r0, #0
 | |
| 	ret	lr
 | |
| 
 | |
| 12:	ARM_DIV2_ORDER r1, r2
 | |
| 
 | |
| 	mov	r0, r0, lsr r2
 | |
| 	ret	lr
 | |
| 
 | |
| UNWIND(.fnend)
 | |
| ENDPROC(__udivsi3)
 | |
| ENDPROC(__aeabi_uidiv)
 | |
| .popsection
 | |
| 
 | |
| .pushsection .text.__umodsi3, "ax"
 | |
| ENTRY(__umodsi3)
 | |
| UNWIND(.fnstart)
 | |
| 
 | |
| 	subs	r2, r1, #1			@ compare divisor with 1
 | |
| 	bcc	Ldiv0
 | |
| 	cmpne	r0, r1				@ compare dividend with divisor
 | |
| 	moveq   r0, #0
 | |
| 	tsthi	r1, r2				@ see if divisor is power of 2
 | |
| 	andeq	r0, r0, r2
 | |
| 	retls	lr
 | |
| 
 | |
| 	ARM_MOD_BODY r0, r1, r2, r3
 | |
| 
 | |
| 	ret	lr
 | |
| 
 | |
| UNWIND(.fnend)
 | |
| ENDPROC(__umodsi3)
 | |
| .popsection
 | |
| 
 | |
| .pushsection .text.__divsi3, "ax"
 | |
| ENTRY(__divsi3)
 | |
| ENTRY(__aeabi_idiv)
 | |
| UNWIND(.fnstart)
 | |
| 
 | |
| 	cmp	r1, #0
 | |
| 	eor	ip, r0, r1			@ save the sign of the result.
 | |
| 	beq	Ldiv0
 | |
| 	rsbmi	r1, r1, #0			@ loops below use unsigned.
 | |
| 	subs	r2, r1, #1			@ division by 1 or -1 ?
 | |
| 	beq	10f
 | |
| 	movs	r3, r0
 | |
| 	rsbmi	r3, r0, #0			@ positive dividend value
 | |
| 	cmp	r3, r1
 | |
| 	bls	11f
 | |
| 	tst	r1, r2				@ divisor is power of 2 ?
 | |
| 	beq	12f
 | |
| 
 | |
| 	ARM_DIV_BODY r3, r1, r0, r2
 | |
| 
 | |
| 	cmp	ip, #0
 | |
| 	rsbmi	r0, r0, #0
 | |
| 	ret	lr
 | |
| 
 | |
| 10:	teq	ip, r0				@ same sign ?
 | |
| 	rsbmi	r0, r0, #0
 | |
| 	ret	lr
 | |
| 
 | |
| 11:	movlo	r0, #0
 | |
| 	moveq	r0, ip, asr #31
 | |
| 	orreq	r0, r0, #1
 | |
| 	ret	lr
 | |
| 
 | |
| 12:	ARM_DIV2_ORDER r1, r2
 | |
| 
 | |
| 	cmp	ip, #0
 | |
| 	mov	r0, r3, lsr r2
 | |
| 	rsbmi	r0, r0, #0
 | |
| 	ret	lr
 | |
| 
 | |
| UNWIND(.fnend)
 | |
| ENDPROC(__divsi3)
 | |
| ENDPROC(__aeabi_idiv)
 | |
| .popsection
 | |
| 
 | |
| .pushsection .text.__modsi3, "ax"
 | |
| ENTRY(__modsi3)
 | |
| UNWIND(.fnstart)
 | |
| 
 | |
| 	cmp	r1, #0
 | |
| 	beq	Ldiv0
 | |
| 	rsbmi	r1, r1, #0			@ loops below use unsigned.
 | |
| 	movs	ip, r0				@ preserve sign of dividend
 | |
| 	rsbmi	r0, r0, #0			@ if negative make positive
 | |
| 	subs	r2, r1, #1			@ compare divisor with 1
 | |
| 	cmpne	r0, r1				@ compare dividend with divisor
 | |
| 	moveq	r0, #0
 | |
| 	tsthi	r1, r2				@ see if divisor is power of 2
 | |
| 	andeq	r0, r0, r2
 | |
| 	bls	10f
 | |
| 
 | |
| 	ARM_MOD_BODY r0, r1, r2, r3
 | |
| 
 | |
| 10:	cmp	ip, #0
 | |
| 	rsbmi	r0, r0, #0
 | |
| 	ret	lr
 | |
| 
 | |
| UNWIND(.fnend)
 | |
| ENDPROC(__modsi3)
 | |
| .popsection
 | |
| 
 | |
| #ifdef CONFIG_AEABI
 | |
| 
 | |
| .pushsection .text.__aeabi_uidivmod, "ax"
 | |
| ENTRY(__aeabi_uidivmod)
 | |
| UNWIND(.fnstart)
 | |
| UNWIND(.save {r0, r1, ip, lr}	)
 | |
| 
 | |
| 	stmfd	sp!, {r0, r1, ip, lr}
 | |
| 	bl	__aeabi_uidiv
 | |
| 	ldmfd	sp!, {r1, r2, ip, lr}
 | |
| 	mul	r3, r0, r2
 | |
| 	sub	r1, r1, r3
 | |
| 	ret	lr
 | |
| 
 | |
| UNWIND(.fnend)
 | |
| ENDPROC(__aeabi_uidivmod)
 | |
| .popsection
 | |
| 
 | |
| .pushsection .text.__aeabi_uidivmod, "ax"
 | |
| ENTRY(__aeabi_idivmod)
 | |
| UNWIND(.fnstart)
 | |
| UNWIND(.save {r0, r1, ip, lr}	)
 | |
| 
 | |
| 	stmfd	sp!, {r0, r1, ip, lr}
 | |
| 	bl	__aeabi_idiv
 | |
| 	ldmfd	sp!, {r1, r2, ip, lr}
 | |
| 	mul	r3, r0, r2
 | |
| 	sub	r1, r1, r3
 | |
| 	ret	lr
 | |
| 
 | |
| UNWIND(.fnend)
 | |
| ENDPROC(__aeabi_idivmod)
 | |
| .popsection
 | |
| 
 | |
| #endif
 | |
| 
 | |
| .pushsection .text.Ldiv0, "ax"
 | |
| Ldiv0:
 | |
| UNWIND(.fnstart)
 | |
| UNWIND(.pad #4)
 | |
| UNWIND(.save {lr})
 | |
| 
 | |
| 	str	lr, [sp, #-8]!
 | |
| 	bl	__div0
 | |
| 	mov	r0, #0			@ About as wrong as it could be.
 | |
| 	ldr	pc, [sp], #8
 | |
| 
 | |
| UNWIND(.fnend)
 | |
| ENDPROC(Ldiv0)
 | |
| .popsection
 | |
| 
 | |
| /* Thumb-1 specialities */
 | |
| #if CONFIG_IS_ENABLED(SYS_THUMB_BUILD) && !defined(CONFIG_HAS_THUMB2)
 | |
| .pushsection .text.__gnu_thumb1_case_sqi, "ax"
 | |
| ENTRY(__gnu_thumb1_case_sqi)
 | |
| 	push	{r1}
 | |
| 	mov	r1, lr
 | |
| 	lsrs	r1, r1, #1
 | |
| 	lsls	r1, r1, #1
 | |
| 	ldrsb	r1, [r1, r0]
 | |
| 	lsls	r1, r1, #1
 | |
| 	add	lr, lr, r1
 | |
| 	pop	{r1}
 | |
| 	ret	lr
 | |
| ENDPROC(__gnu_thumb1_case_sqi)
 | |
| .popsection
 | |
| 
 | |
| .pushsection .text.__gnu_thumb1_case_uqi, "ax"
 | |
| ENTRY(__gnu_thumb1_case_uqi)
 | |
| 	push	{r1}
 | |
| 	mov	r1, lr
 | |
| 	lsrs	r1, r1, #1
 | |
| 	lsls	r1, r1, #1
 | |
| 	ldrb	r1, [r1, r0]
 | |
| 	lsls	r1, r1, #1
 | |
| 	add	lr, lr, r1
 | |
| 	pop	{r1}
 | |
| 	ret	lr
 | |
| ENDPROC(__gnu_thumb1_case_uqi)
 | |
| .popsection
 | |
| 
 | |
| .pushsection .text.__gnu_thumb1_case_shi, "ax"
 | |
| ENTRY(__gnu_thumb1_case_shi)
 | |
| 	push	{r0, r1}
 | |
| 	mov	r1, lr
 | |
| 	lsrs	r1, r1, #1
 | |
| 	lsls	r0, r0, #1
 | |
| 	lsls	r1, r1, #1
 | |
| 	ldrsh	r1, [r1, r0]
 | |
| 	lsls	r1, r1, #1
 | |
| 	add	lr, lr, r1
 | |
| 	pop	{r0, r1}
 | |
| 	ret	lr
 | |
| ENDPROC(__gnu_thumb1_case_shi)
 | |
| .popsection
 | |
| 
 | |
| .pushsection .text.__gnu_thumb1_case_uhi, "ax"
 | |
| ENTRY(__gnu_thumb1_case_uhi)
 | |
| 	push	{r0, r1}
 | |
| 	mov	r1, lr
 | |
| 	lsrs	r1, r1, #1
 | |
| 	lsls	r0, r0, #1
 | |
| 	lsls	r1, r1, #1
 | |
| 	ldrh	r1, [r1, r0]
 | |
| 	lsls	r1, r1, #1
 | |
| 	add	lr, lr, r1
 | |
| 	pop	{r0, r1}
 | |
| 	ret	lr
 | |
| ENDPROC(__gnu_thumb1_case_uhi)
 | |
| .popsection
 | |
| #endif
 |