arch/blackfin/lib/udivdi3.S - linux/kernel/git/davem/net - Git at Google

 /*
  * udivdi3.S - unsigned long long division
  *
  * Copyright 2003-2007 Analog Devices Inc.
  * Enter bugs at http://blackfin.uclinux.org/
  *
  * Licensed under the GPLv2 or later.
  */

 #include <linux/linkage.h>

 #define CARRY AC0

 #ifdef CONFIG_ARITHMETIC_OPS_L1
 .section .l1.text
 #else
 .text
 #endif


 ENTRY(___udivdi3)
    R3 = [SP + 12];
    [--SP] = (R7:4, P5:3);

    /* Attempt to use divide primitive first; these will handle
    **  most cases, and they're quick - avoids stalls incurred by
    ** testing for identities.
    */

    R4 = R2 | R3;
    CC = R4 == 0;
    IF CC JUMP .LDIV_BY_ZERO;

    R4.H = 0x8000;
    R4 >>>= 16;                  // R4 now 0xFFFF8000
    R5 = R0 | R2;                // If either dividend or
    R4 = R5 & R4;                // divisor have bits in
    CC = R4;                     // top half or low half's sign
    IF CC JUMP .LIDENTS;          // bit, skip builtins.
    R4 = R1 | R3;                // Also check top halves
    CC = R4;
    IF CC JUMP .LIDENTS;

    /* Can use the builtins. */

    AQ = CC;                     // Clear AQ (CC==0)
    DIVQ(R0, R2);
    DIVQ(R0, R2);
    DIVQ(R0, R2);
    DIVQ(R0, R2);
    DIVQ(R0, R2);
    DIVQ(R0, R2);
    DIVQ(R0, R2);
    DIVQ(R0, R2);
    DIVQ(R0, R2);
    DIVQ(R0, R2);
    DIVQ(R0, R2);
    DIVQ(R0, R2);
    DIVQ(R0, R2);
    DIVQ(R0, R2);
    DIVQ(R0, R2);
    DIVQ(R0, R2);
    DIVQ(R0, R2);
    R0 = R0.L (Z);
    R1 = 0;
    (R7:4, P5:3) = [SP++];
    RTS;

 .LIDENTS:
    /* Test for common identities. Value to be returned is
    ** placed in R6,R7.
    */
                                 // Check for 0/y, return 0
    R4 = R0 | R1;
    CC = R4 == 0;
    IF CC JUMP .LRETURN_R0;

                                 // Check for x/x, return 1
    R6 = R0 - R2;                // If x == y, then both R6 and R7 will be zero
    R7 = R1 - R3;
    R4 = R6 | R7;                // making R4 zero.
    R6 += 1;                     // which would now make R6:R7==1.
    CC = R4 == 0;
    IF CC JUMP .LRETURN_IDENT;

                                 // Check for x/1, return x
    R6 = R0;
    R7 = R1;
    CC = R3 == 0;
    IF !CC JUMP .Lnexttest;
    CC = R2 == 1;
    IF CC JUMP .LRETURN_IDENT;

 .Lnexttest:
    R4.L = ONES R2;              // check for div by power of two which
    R5.L = ONES R3;              // can be done using a shift
    R6 = PACK (R5.L, R4.L);
    CC = R6 == 1;
    IF CC JUMP .Lpower_of_two_upper_zero;
    R6 = PACK (R4.L, R5.L);
    CC = R6 == 1;
    IF CC JUMP .Lpower_of_two_lower_zero;

                                 // Check for x < y, return 0
    R6 = 0;
    R7 = R6;
    CC = R1 < R3 (IU);
    IF CC JUMP .LRETURN_IDENT;
    CC = R1 == R3;
    IF !CC JUMP .Lno_idents;
    CC = R0 < R2 (IU);
    IF CC JUMP .LRETURN_IDENT;

 .Lno_idents:                    // Idents don't match. Go for the full operation


    // If X, or X and Y have high bit set, it'll affect the
    // results, so shift right one to stop this. Note: we've already
    // checked that X >= Y, so Y's msb won't be set unless X's
    // is.

    R4 = 0;
    CC = R1 < 0;
    IF !CC JUMP .Lx_msb_clear;
    CC = !CC;                   // 1 -> 0;
    R1 = ROT R1 BY -1;          // Shift X >> 1
    R0 = ROT R0 BY -1;          // lsb -> CC
    BITSET(R4,31);              // to record only x msb was set
    CC = R3 < 0;
    IF !CC JUMP .Ly_msb_clear;
    CC = !CC;
    R3 = ROT R3 BY -1;          // Shift Y >> 1
    R2 = ROT R2 BY -1;
    BITCLR(R4,31);              // clear bit to record only x msb was set

 .Ly_msb_clear:
 .Lx_msb_clear:
    // Bit 31 in R4 indicates X msb set, but Y msb wasn't, and no bits
    // were lost, so we should shift result left by one.

    [--SP] = R4;                // save for later

    // In the loop that follows, each iteration we add
    // either Y' or -Y' to the Remainder. We compute the
    // negated Y', and store, for convenience. Y' goes
    // into P0:P1, while -Y' goes into P2:P3.

    P0 = R2;
    P1 = R3;
    R2 = -R2;
    CC = CARRY;
    CC = !CC;
    R4 = CC;
    R3 = -R3;
    R3 = R3 - R4;

    R6 = 0;                     // remainder = 0
    R7 = R6;

    [--SP] = R2; P2 = SP;
    [--SP] = R3; P3 = SP;
    [--SP] = R6; P5 = SP;       // AQ = 0
    [--SP] = P1;

    /* In the loop that follows, we use the following
    ** register assignments:
    ** R0,R1 X, workspace
    ** R2,R3 Y, workspace
    ** R4,R5 partial Div
    ** R6,R7 partial remainder
    ** P5 AQ
    ** The remainder and div form a 128-bit number, with
    ** the remainder in the high 64-bits.
    */
    R4 = R0;                    // Div = X'
    R5 = R1;
    R3 = 0;

    P4 = 64;                    // Iterate once per bit
    LSETUP(.LULST,.LULEND) LC0 = P4;
 .LULST:
         /* Shift Div and remainder up by one. The bit shifted
         ** out of the top of the quotient is shifted into the bottom
         ** of the remainder.
         */
         CC = R3;
         R4 = ROT R4 BY 1;
         R5 = ROT R5 BY 1 ||        // low q to high q
              R2 = [P5];            // load saved AQ
         R6 = ROT R6 BY 1 ||        // high q to low r
              R0 = [P2];            // load -Y'
         R7 = ROT R7 BY 1 ||        // low r to high r
              R1 = [P3];

                                    // Assume add -Y'
         CC = R2 < 0;               // But if AQ is set...
         IF CC R0 = P0;             // then add Y' instead
         IF CC R1 = P1;

         R6 = R6 + R0;              // Rem += (Y' or -Y')
         CC = CARRY;
         R0 = CC;
         R7 = R7 + R1;
         R7 = R7 + R0 (NS) ||
              R1 = [SP];
                                    // Set the next AQ bit
         R1 = R7 ^ R1;              // from Remainder and Y'
         R1 = R1 >> 31 ||           // Negate AQ's value, and
              [P5] = R1;            // save next AQ
         BITTGL(R1, 0);             // add neg AQ  to the Div
 .LULEND: R4 = R4 + R1;

    R6 = [SP + 16];

    R0 = R4;
    R1 = R5;
    CC = BITTST(R6,30);         // Just set CC=0
    R4 = ROT R0 BY 1;           // but if we had to shift X,
    R5 = ROT R1 BY 1;           // and didn't shift any bits out,
    CC = BITTST(R6,31);         // then the result will be half as
    IF CC R0 = R4;              // much as required, so shift left
    IF CC R1 = R5;              // one space.

    SP += 20;
    (R7:4, P5:3) = [SP++];
    RTS;

 .Lpower_of_two:
    /* Y has a single bit set, which means it's a power of two.
    ** That means we can perform the division just by shifting
    ** X to the right the appropriate number of bits
    */

    /* signbits returns the number of sign bits, minus one.
    ** 1=>30, 2=>29, ..., 0x40000000=>0. Which means we need
    ** to shift right n-signbits spaces. It also means 0x80000000
    ** is a special case, because that *also* gives a signbits of 0
    */
 .Lpower_of_two_lower_zero:
    R7 = 0;
    R6 = R1 >> 31;
    CC = R3 < 0;
    IF CC JUMP .LRETURN_IDENT;

    R2.L = SIGNBITS R3;
    R2 = R2.L (Z);
    R2 += -62;
    (R7:4, P5:3) = [SP++];
    JUMP ___lshftli;

 .Lpower_of_two_upper_zero:
    CC = R2 < 0;
    IF CC JUMP .Lmaxint_shift;

    R2.L = SIGNBITS R2;
    R2 = R2.L (Z);
    R2 += -30;
    (R7:4, P5:3) = [SP++];
    JUMP ___lshftli;

 .Lmaxint_shift:
    R2 = -31;
    (R7:4, P5:3) = [SP++];
    JUMP ___lshftli;

 .LRETURN_IDENT:
    R0 = R6;
    R1 = R7;
 .LRETURN_R0:
    (R7:4, P5:3) = [SP++];
    RTS;
 .LDIV_BY_ZERO:
    R0 = ~R2;
    R1 = R0;
    (R7:4, P5:3) = [SP++];
    RTS;

 ENDPROC(___udivdi3)


 ENTRY(___lshftli)
 	CC = R2 == 0;
 	IF CC JUMP .Lfinished;	// nothing to do
 	CC = R2 < 0;
 	IF CC JUMP .Lrshift;
 	R3 = 64;
 	CC = R2 < R3;
 	IF !CC JUMP .Lretzero;

 	// We're shifting left, and it's less than 64 bits, so
 	// a valid result will be returned.

 	R3 >>= 1;	// R3 now 32
 	CC = R2 < R3;

 	IF !CC JUMP .Lzerohalf;

 	// We're shifting left, between 1 and 31 bits, which means
 	// some of the low half will be shifted into the high half.
 	// Work out how much.

 	R3 = R3 - R2;

 	// Save that much data from the bottom half.

 	P1 = R7;
 	R7 = R0;
 	R7 >>= R3;

 	// Adjust both parts of the parameter.

 	R0 <<= R2;
 	R1 <<= R2;

 	// And include the bits moved across.

 	R1 = R1 | R7;
 	R7 = P1;
 	RTS;

 .Lzerohalf:
 	// We're shifting left, between 32 and 63 bits, so the
 	// bottom half will become zero, and the top half will
 	// lose some bits. How many?

 	R2 = R2 - R3;	// N - 32
 	R1 = LSHIFT R0 BY R2.L;
 	R0 = R0 - R0;
 	RTS;

 .Lretzero:
 	R0 = R0 - R0;
 	R1 = R0;
 .Lfinished:
 	RTS;

 .Lrshift:
 	// We're shifting right, but by how much?
 	R2 = -R2;
 	R3 = 64;
 	CC = R2 < R3;
 	IF !CC JUMP .Lretzero;

 	// Shifting right less than 64 bits, so some result bits will
 	// be retained.

 	R3 >>= 1;	// R3 now 32
 	CC = R2 < R3;
 	IF !CC JUMP .Lsignhalf;

 	// Shifting right between 1 and 31 bits, so need to copy
 	// data across words.

 	P1 = R7;
 	R3 = R3 - R2;
 	R7 = R1;
 	R7 <<= R3;
 	R1 >>= R2;
 	R0 >>= R2;
 	R0 = R7 | R0;
 	R7 = P1;
 	RTS;

 .Lsignhalf:
 	// Shifting right between 32 and 63 bits, so the top half
 	// will become all zero-bits, and the bottom half is some
 	// of the top half. But how much?

 	R2 = R2 - R3;
 	R0 = R1;
 	R0 >>= R2;
 	R1 = 0;
 	RTS;

 ENDPROC(___lshftli)
	/*
	* udivdi3.S - unsigned long long division
	*
	* Copyright 2003-2007 Analog Devices Inc.
	* Enter bugs at http://blackfin.uclinux.org/
	*
	* Licensed under the GPLv2 or later.
	*/

	#include <linux/linkage.h>

	#define CARRY AC0

	#ifdef CONFIG_ARITHMETIC_OPS_L1
	.section .l1.text
	#else
	.text
	#endif


	ENTRY(___udivdi3)
	R3 = [SP + 12];
	[--SP] = (R7:4, P5:3);

	/* Attempt to use divide primitive first; these will handle
	** most cases, and they're quick - avoids stalls incurred by
	** testing for identities.
	*/

	R4 = R2 \| R3;
	CC = R4 == 0;
	IF CC JUMP .LDIV_BY_ZERO;

	R4.H = 0x8000;
	R4 >>>= 16; // R4 now 0xFFFF8000
	R5 = R0 \| R2; // If either dividend or
	R4 = R5 & R4; // divisor have bits in
	CC = R4; // top half or low half's sign
	IF CC JUMP .LIDENTS; // bit, skip builtins.
	R4 = R1 \| R3; // Also check top halves
	CC = R4;
	IF CC JUMP .LIDENTS;

	/* Can use the builtins. */

	AQ = CC; // Clear AQ (CC==0)
	DIVQ(R0, R2);
	DIVQ(R0, R2);
	DIVQ(R0, R2);
	DIVQ(R0, R2);
	DIVQ(R0, R2);
	DIVQ(R0, R2);
	DIVQ(R0, R2);
	DIVQ(R0, R2);
	DIVQ(R0, R2);
	DIVQ(R0, R2);
	DIVQ(R0, R2);
	DIVQ(R0, R2);
	DIVQ(R0, R2);
	DIVQ(R0, R2);
	DIVQ(R0, R2);
	DIVQ(R0, R2);
	DIVQ(R0, R2);
	R0 = R0.L (Z);
	R1 = 0;
	(R7:4, P5:3) = [SP++];
	RTS;

	.LIDENTS:
	/* Test for common identities. Value to be returned is
	** placed in R6,R7.
	*/
	// Check for 0/y, return 0
	R4 = R0 \| R1;
	CC = R4 == 0;
	IF CC JUMP .LRETURN_R0;

	// Check for x/x, return 1
	R6 = R0 - R2; // If x == y, then both R6 and R7 will be zero
	R7 = R1 - R3;
	R4 = R6 \| R7; // making R4 zero.
	R6 += 1; // which would now make R6:R7==1.
	CC = R4 == 0;
	IF CC JUMP .LRETURN_IDENT;

	// Check for x/1, return x
	R6 = R0;
	R7 = R1;
	CC = R3 == 0;
	IF !CC JUMP .Lnexttest;
	CC = R2 == 1;
	IF CC JUMP .LRETURN_IDENT;

	.Lnexttest:
	R4.L = ONES R2; // check for div by power of two which
	R5.L = ONES R3; // can be done using a shift
	R6 = PACK (R5.L, R4.L);
	CC = R6 == 1;
	IF CC JUMP .Lpower_of_two_upper_zero;
	R6 = PACK (R4.L, R5.L);
	CC = R6 == 1;
	IF CC JUMP .Lpower_of_two_lower_zero;

	// Check for x < y, return 0
	R6 = 0;
	R7 = R6;
	CC = R1 < R3 (IU);
	IF CC JUMP .LRETURN_IDENT;
	CC = R1 == R3;
	IF !CC JUMP .Lno_idents;
	CC = R0 < R2 (IU);
	IF CC JUMP .LRETURN_IDENT;

	.Lno_idents: // Idents don't match. Go for the full operation


	// If X, or X and Y have high bit set, it'll affect the
	// results, so shift right one to stop this. Note: we've already
	// checked that X >= Y, so Y's msb won't be set unless X's
	// is.

	R4 = 0;
	CC = R1 < 0;
	IF !CC JUMP .Lx_msb_clear;
	CC = !CC; // 1 -> 0;
	R1 = ROT R1 BY -1; // Shift X >> 1
	R0 = ROT R0 BY -1; // lsb -> CC
	BITSET(R4,31); // to record only x msb was set
	CC = R3 < 0;
	IF !CC JUMP .Ly_msb_clear;
	CC = !CC;
	R3 = ROT R3 BY -1; // Shift Y >> 1
	R2 = ROT R2 BY -1;
	BITCLR(R4,31); // clear bit to record only x msb was set

	.Ly_msb_clear:
	.Lx_msb_clear:
	// Bit 31 in R4 indicates X msb set, but Y msb wasn't, and no bits
	// were lost, so we should shift result left by one.

	[--SP] = R4; // save for later

	// In the loop that follows, each iteration we add
	// either Y' or -Y' to the Remainder. We compute the
	// negated Y', and store, for convenience. Y' goes
	// into P0:P1, while -Y' goes into P2:P3.

	P0 = R2;
	P1 = R3;
	R2 = -R2;
	CC = CARRY;
	CC = !CC;
	R4 = CC;
	R3 = -R3;
	R3 = R3 - R4;

	R6 = 0; // remainder = 0
	R7 = R6;

	[--SP] = R2; P2 = SP;
	[--SP] = R3; P3 = SP;
	[--SP] = R6; P5 = SP; // AQ = 0
	[--SP] = P1;

	/* In the loop that follows, we use the following
	** register assignments:
	** R0,R1 X, workspace
	** R2,R3 Y, workspace
	** R4,R5 partial Div
	** R6,R7 partial remainder
	** P5 AQ
	** The remainder and div form a 128-bit number, with
	** the remainder in the high 64-bits.
	*/
	R4 = R0; // Div = X'
	R5 = R1;
	R3 = 0;

	P4 = 64; // Iterate once per bit
	LSETUP(.LULST,.LULEND) LC0 = P4;
	.LULST:
	/* Shift Div and remainder up by one. The bit shifted
	** out of the top of the quotient is shifted into the bottom
	** of the remainder.
	*/
	CC = R3;
	R4 = ROT R4 BY 1;
	R5 = ROT R5 BY 1 \|\| // low q to high q
	R2 = [P5]; // load saved AQ
	R6 = ROT R6 BY 1 \|\| // high q to low r
	R0 = [P2]; // load -Y'
	R7 = ROT R7 BY 1 \|\| // low r to high r
	R1 = [P3];

	// Assume add -Y'
	CC = R2 < 0; // But if AQ is set...
	IF CC R0 = P0; // then add Y' instead
	IF CC R1 = P1;

	R6 = R6 + R0; // Rem += (Y' or -Y')
	CC = CARRY;
	R0 = CC;
	R7 = R7 + R1;
	R7 = R7 + R0 (NS) \|\|
	R1 = [SP];
	// Set the next AQ bit
	R1 = R7 ^ R1; // from Remainder and Y'
	R1 = R1 >> 31 \|\| // Negate AQ's value, and
	[P5] = R1; // save next AQ
	BITTGL(R1, 0); // add neg AQ to the Div
	.LULEND: R4 = R4 + R1;

	R6 = [SP + 16];

	R0 = R4;
	R1 = R5;
	CC = BITTST(R6,30); // Just set CC=0
	R4 = ROT R0 BY 1; // but if we had to shift X,
	R5 = ROT R1 BY 1; // and didn't shift any bits out,
	CC = BITTST(R6,31); // then the result will be half as
	IF CC R0 = R4; // much as required, so shift left
	IF CC R1 = R5; // one space.

	SP += 20;
	(R7:4, P5:3) = [SP++];
	RTS;

	.Lpower_of_two:
	/* Y has a single bit set, which means it's a power of two.
	** That means we can perform the division just by shifting
	** X to the right the appropriate number of bits
	*/

	/* signbits returns the number of sign bits, minus one.
	** 1=>30, 2=>29, ..., 0x40000000=>0. Which means we need
	** to shift right n-signbits spaces. It also means 0x80000000
	** is a special case, because that also gives a signbits of 0
	*/
	.Lpower_of_two_lower_zero:
	R7 = 0;
	R6 = R1 >> 31;
	CC = R3 < 0;
	IF CC JUMP .LRETURN_IDENT;

	R2.L = SIGNBITS R3;
	R2 = R2.L (Z);
	R2 += -62;
	(R7:4, P5:3) = [SP++];
	JUMP ___lshftli;

	.Lpower_of_two_upper_zero:
	CC = R2 < 0;
	IF CC JUMP .Lmaxint_shift;

	R2.L = SIGNBITS R2;
	R2 = R2.L (Z);
	R2 += -30;
	(R7:4, P5:3) = [SP++];
	JUMP ___lshftli;

	.Lmaxint_shift:
	R2 = -31;
	(R7:4, P5:3) = [SP++];
	JUMP ___lshftli;

	.LRETURN_IDENT:
	R0 = R6;
	R1 = R7;
	.LRETURN_R0:
	(R7:4, P5:3) = [SP++];
	RTS;
	.LDIV_BY_ZERO:
	R0 = ~R2;
	R1 = R0;
	(R7:4, P5:3) = [SP++];
	RTS;

	ENDPROC(___udivdi3)


	ENTRY(___lshftli)
	CC = R2 == 0;
	IF CC JUMP .Lfinished; // nothing to do
	CC = R2 < 0;
	IF CC JUMP .Lrshift;
	R3 = 64;
	CC = R2 < R3;
	IF !CC JUMP .Lretzero;

	// We're shifting left, and it's less than 64 bits, so
	// a valid result will be returned.

	R3 >>= 1; // R3 now 32
	CC = R2 < R3;

	IF !CC JUMP .Lzerohalf;

	// We're shifting left, between 1 and 31 bits, which means
	// some of the low half will be shifted into the high half.
	// Work out how much.

	R3 = R3 - R2;

	// Save that much data from the bottom half.

	P1 = R7;
	R7 = R0;
	R7 >>= R3;

	// Adjust both parts of the parameter.

	R0 <<= R2;
	R1 <<= R2;

	// And include the bits moved across.

	R1 = R1 \| R7;
	R7 = P1;
	RTS;

	.Lzerohalf:
	// We're shifting left, between 32 and 63 bits, so the
	// bottom half will become zero, and the top half will
	// lose some bits. How many?

	R2 = R2 - R3; // N - 32
	R1 = LSHIFT R0 BY R2.L;
	R0 = R0 - R0;
	RTS;

	.Lretzero:
	R0 = R0 - R0;
	R1 = R0;
	.Lfinished:
	RTS;

	.Lrshift:
	// We're shifting right, but by how much?
	R2 = -R2;
	R3 = 64;
	CC = R2 < R3;
	IF !CC JUMP .Lretzero;

	// Shifting right less than 64 bits, so some result bits will
	// be retained.

	R3 >>= 1; // R3 now 32
	CC = R2 < R3;
	IF !CC JUMP .Lsignhalf;

	// Shifting right between 1 and 31 bits, so need to copy
	// data across words.

	P1 = R7;
	R3 = R3 - R2;
	R7 = R1;
	R7 <<= R3;
	R1 >>= R2;
	R0 >>= R2;
	R0 = R7 \| R0;
	R7 = P1;
	RTS;

	.Lsignhalf:
	// Shifting right between 32 and 63 bits, so the top half
	// will become all zero-bits, and the bottom half is some
	// of the top half. But how much?

	R2 = R2 - R3;
	R0 = R1;
	R0 >>= R2;
	R1 = 0;
	RTS;

	ENDPROC(___lshftli)