|  | /* SPDX-License-Identifier: GPL-2.0-only */ | 
|  | /* | 
|  | * Copyright (C) 2014-15 Synopsys, Inc. (www.synopsys.com) | 
|  | */ | 
|  |  | 
|  | #include <linux/linkage.h> | 
|  |  | 
|  | #ifdef __LITTLE_ENDIAN__ | 
|  | # define SHIFT_1(RX,RY,IMM)	asl	RX, RY, IMM	; << | 
|  | # define SHIFT_2(RX,RY,IMM)	lsr	RX, RY, IMM	; >> | 
|  | # define MERGE_1(RX,RY,IMM)	asl	RX, RY, IMM | 
|  | # define MERGE_2(RX,RY,IMM) | 
|  | # define EXTRACT_1(RX,RY,IMM)	and	RX, RY, 0xFFFF | 
|  | # define EXTRACT_2(RX,RY,IMM)	lsr	RX, RY, IMM | 
|  | #else | 
|  | # define SHIFT_1(RX,RY,IMM)	lsr	RX, RY, IMM	; >> | 
|  | # define SHIFT_2(RX,RY,IMM)	asl	RX, RY, IMM	; << | 
|  | # define MERGE_1(RX,RY,IMM)	asl	RX, RY, IMM	; << | 
|  | # define MERGE_2(RX,RY,IMM)	asl	RX, RY, IMM	; << | 
|  | # define EXTRACT_1(RX,RY,IMM)	lsr	RX, RY, IMM | 
|  | # define EXTRACT_2(RX,RY,IMM)	lsr	RX, RY, 0x08 | 
|  | #endif | 
|  |  | 
|  | #ifdef CONFIG_ARC_HAS_LL64 | 
|  | # define LOADX(DST,RX)		ldd.ab	DST, [RX, 8] | 
|  | # define STOREX(SRC,RX)		std.ab	SRC, [RX, 8] | 
|  | # define ZOLSHFT		5 | 
|  | # define ZOLAND			0x1F | 
|  | #else | 
|  | # define LOADX(DST,RX)		ld.ab	DST, [RX, 4] | 
|  | # define STOREX(SRC,RX)		st.ab	SRC, [RX, 4] | 
|  | # define ZOLSHFT		4 | 
|  | # define ZOLAND			0xF | 
|  | #endif | 
|  |  | 
|  | ENTRY_CFI(memcpy) | 
|  | mov.f	0, r2 | 
|  | ;;; if size is zero | 
|  | jz.d	[blink] | 
|  | mov	r3, r0		; don;t clobber ret val | 
|  |  | 
|  | ;;; if size <= 8 | 
|  | cmp	r2, 8 | 
|  | bls.d	@.Lsmallchunk | 
|  | mov.f	lp_count, r2 | 
|  |  | 
|  | and.f	r4, r0, 0x03 | 
|  | rsub	lp_count, r4, 4 | 
|  | lpnz	@.Laligndestination | 
|  | ;; LOOP BEGIN | 
|  | ldb.ab	r5, [r1,1] | 
|  | sub	r2, r2, 1 | 
|  | stb.ab	r5, [r3,1] | 
|  | .Laligndestination: | 
|  |  | 
|  | ;;; Check the alignment of the source | 
|  | and.f	r4, r1, 0x03 | 
|  | bnz.d	@.Lsourceunaligned | 
|  |  | 
|  | ;;; CASE 0: Both source and destination are 32bit aligned | 
|  | ;;; Convert len to Dwords, unfold x4 | 
|  | lsr.f	lp_count, r2, ZOLSHFT | 
|  | lpnz	@.Lcopy32_64bytes | 
|  | ;; LOOP START | 
|  | LOADX (r6, r1) | 
|  | LOADX (r8, r1) | 
|  | LOADX (r10, r1) | 
|  | LOADX (r4, r1) | 
|  | STOREX (r6, r3) | 
|  | STOREX (r8, r3) | 
|  | STOREX (r10, r3) | 
|  | STOREX (r4, r3) | 
|  | .Lcopy32_64bytes: | 
|  |  | 
|  | and.f	lp_count, r2, ZOLAND ;Last remaining 31 bytes | 
|  | .Lsmallchunk: | 
|  | lpnz	@.Lcopyremainingbytes | 
|  | ;; LOOP START | 
|  | ldb.ab	r5, [r1,1] | 
|  | stb.ab	r5, [r3,1] | 
|  | .Lcopyremainingbytes: | 
|  |  | 
|  | j	[blink] | 
|  | ;;; END CASE 0 | 
|  |  | 
|  | .Lsourceunaligned: | 
|  | cmp	r4, 2 | 
|  | beq.d	@.LunalignedOffby2 | 
|  | sub	r2, r2, 1 | 
|  |  | 
|  | bhi.d	@.LunalignedOffby3 | 
|  | ldb.ab	r5, [r1, 1] | 
|  |  | 
|  | ;;; CASE 1: The source is unaligned, off by 1 | 
|  | ;; Hence I need to read 1 byte for a 16bit alignment | 
|  | ;; and 2bytes to reach 32bit alignment | 
|  | ldh.ab	r6, [r1, 2] | 
|  | sub	r2, r2, 2 | 
|  | ;; Convert to words, unfold x2 | 
|  | lsr.f	lp_count, r2, 3 | 
|  | MERGE_1 (r6, r6, 8) | 
|  | MERGE_2 (r5, r5, 24) | 
|  | or	r5, r5, r6 | 
|  |  | 
|  | ;; Both src and dst are aligned | 
|  | lpnz	@.Lcopy8bytes_1 | 
|  | ;; LOOP START | 
|  | ld.ab	r6, [r1, 4] | 
|  | ld.ab	r8, [r1,4] | 
|  |  | 
|  | SHIFT_1	(r7, r6, 24) | 
|  | or	r7, r7, r5 | 
|  | SHIFT_2	(r5, r6, 8) | 
|  |  | 
|  | SHIFT_1	(r9, r8, 24) | 
|  | or	r9, r9, r5 | 
|  | SHIFT_2	(r5, r8, 8) | 
|  |  | 
|  | st.ab	r7, [r3, 4] | 
|  | st.ab	r9, [r3, 4] | 
|  | .Lcopy8bytes_1: | 
|  |  | 
|  | ;; Write back the remaining 16bits | 
|  | EXTRACT_1 (r6, r5, 16) | 
|  | sth.ab	r6, [r3, 2] | 
|  | ;; Write back the remaining 8bits | 
|  | EXTRACT_2 (r5, r5, 16) | 
|  | stb.ab	r5, [r3, 1] | 
|  |  | 
|  | and.f	lp_count, r2, 0x07 ;Last 8bytes | 
|  | lpnz	@.Lcopybytewise_1 | 
|  | ;; LOOP START | 
|  | ldb.ab	r6, [r1,1] | 
|  | stb.ab	r6, [r3,1] | 
|  | .Lcopybytewise_1: | 
|  | j	[blink] | 
|  |  | 
|  | .LunalignedOffby2: | 
|  | ;;; CASE 2: The source is unaligned, off by 2 | 
|  | ldh.ab	r5, [r1, 2] | 
|  | sub	r2, r2, 1 | 
|  |  | 
|  | ;; Both src and dst are aligned | 
|  | ;; Convert to words, unfold x2 | 
|  | lsr.f	lp_count, r2, 3 | 
|  | #ifdef __BIG_ENDIAN__ | 
|  | asl.nz	r5, r5, 16 | 
|  | #endif | 
|  | lpnz	@.Lcopy8bytes_2 | 
|  | ;; LOOP START | 
|  | ld.ab	r6, [r1, 4] | 
|  | ld.ab	r8, [r1,4] | 
|  |  | 
|  | SHIFT_1	(r7, r6, 16) | 
|  | or	r7, r7, r5 | 
|  | SHIFT_2	(r5, r6, 16) | 
|  |  | 
|  | SHIFT_1	(r9, r8, 16) | 
|  | or	r9, r9, r5 | 
|  | SHIFT_2	(r5, r8, 16) | 
|  |  | 
|  | st.ab	r7, [r3, 4] | 
|  | st.ab	r9, [r3, 4] | 
|  | .Lcopy8bytes_2: | 
|  |  | 
|  | #ifdef __BIG_ENDIAN__ | 
|  | lsr.nz	r5, r5, 16 | 
|  | #endif | 
|  | sth.ab	r5, [r3, 2] | 
|  |  | 
|  | and.f	lp_count, r2, 0x07 ;Last 8bytes | 
|  | lpnz	@.Lcopybytewise_2 | 
|  | ;; LOOP START | 
|  | ldb.ab	r6, [r1,1] | 
|  | stb.ab	r6, [r3,1] | 
|  | .Lcopybytewise_2: | 
|  | j	[blink] | 
|  |  | 
|  | .LunalignedOffby3: | 
|  | ;;; CASE 3: The source is unaligned, off by 3 | 
|  | ;;; Hence, I need to read 1byte for achieve the 32bit alignment | 
|  |  | 
|  | ;; Both src and dst are aligned | 
|  | ;; Convert to words, unfold x2 | 
|  | lsr.f	lp_count, r2, 3 | 
|  | #ifdef __BIG_ENDIAN__ | 
|  | asl.ne	r5, r5, 24 | 
|  | #endif | 
|  | lpnz	@.Lcopy8bytes_3 | 
|  | ;; LOOP START | 
|  | ld.ab	r6, [r1, 4] | 
|  | ld.ab	r8, [r1,4] | 
|  |  | 
|  | SHIFT_1	(r7, r6, 8) | 
|  | or	r7, r7, r5 | 
|  | SHIFT_2	(r5, r6, 24) | 
|  |  | 
|  | SHIFT_1	(r9, r8, 8) | 
|  | or	r9, r9, r5 | 
|  | SHIFT_2	(r5, r8, 24) | 
|  |  | 
|  | st.ab	r7, [r3, 4] | 
|  | st.ab	r9, [r3, 4] | 
|  | .Lcopy8bytes_3: | 
|  |  | 
|  | #ifdef __BIG_ENDIAN__ | 
|  | lsr.nz	r5, r5, 24 | 
|  | #endif | 
|  | stb.ab	r5, [r3, 1] | 
|  |  | 
|  | and.f	lp_count, r2, 0x07 ;Last 8bytes | 
|  | lpnz	@.Lcopybytewise_3 | 
|  | ;; LOOP START | 
|  | ldb.ab	r6, [r1,1] | 
|  | stb.ab	r6, [r3,1] | 
|  | .Lcopybytewise_3: | 
|  | j	[blink] | 
|  |  | 
|  | END_CFI(memcpy) |