linux / linux / kernel / git / davem / net / 7a82b63f19b0a05a76271aee1eb7905cd1c7d5ad / . / arch / blackfin / lib / udivdi3.S

/* | |

* udivdi3.S - unsigned long long division | |

* | |

* Copyright 2003-2007 Analog Devices Inc. | |

* Enter bugs at http://blackfin.uclinux.org/ | |

* | |

* Licensed under the GPLv2 or later. | |

*/ | |

#include <linux/linkage.h> | |

#define CARRY AC0 | |

#ifdef CONFIG_ARITHMETIC_OPS_L1 | |

.section .l1.text | |

#else | |

.text | |

#endif | |

ENTRY(___udivdi3) | |

R3 = [SP + 12]; | |

[--SP] = (R7:4, P5:3); | |

/* Attempt to use divide primitive first; these will handle | |

** most cases, and they're quick - avoids stalls incurred by | |

** testing for identities. | |

*/ | |

R4 = R2 | R3; | |

CC = R4 == 0; | |

IF CC JUMP .LDIV_BY_ZERO; | |

R4.H = 0x8000; | |

R4 >>>= 16; // R4 now 0xFFFF8000 | |

R5 = R0 | R2; // If either dividend or | |

R4 = R5 & R4; // divisor have bits in | |

CC = R4; // top half or low half's sign | |

IF CC JUMP .LIDENTS; // bit, skip builtins. | |

R4 = R1 | R3; // Also check top halves | |

CC = R4; | |

IF CC JUMP .LIDENTS; | |

/* Can use the builtins. */ | |

AQ = CC; // Clear AQ (CC==0) | |

DIVQ(R0, R2); | |

DIVQ(R0, R2); | |

DIVQ(R0, R2); | |

DIVQ(R0, R2); | |

DIVQ(R0, R2); | |

DIVQ(R0, R2); | |

DIVQ(R0, R2); | |

DIVQ(R0, R2); | |

DIVQ(R0, R2); | |

DIVQ(R0, R2); | |

DIVQ(R0, R2); | |

DIVQ(R0, R2); | |

DIVQ(R0, R2); | |

DIVQ(R0, R2); | |

DIVQ(R0, R2); | |

DIVQ(R0, R2); | |

DIVQ(R0, R2); | |

R0 = R0.L (Z); | |

R1 = 0; | |

(R7:4, P5:3) = [SP++]; | |

RTS; | |

.LIDENTS: | |

/* Test for common identities. Value to be returned is | |

** placed in R6,R7. | |

*/ | |

// Check for 0/y, return 0 | |

R4 = R0 | R1; | |

CC = R4 == 0; | |

IF CC JUMP .LRETURN_R0; | |

// Check for x/x, return 1 | |

R6 = R0 - R2; // If x == y, then both R6 and R7 will be zero | |

R7 = R1 - R3; | |

R4 = R6 | R7; // making R4 zero. | |

R6 += 1; // which would now make R6:R7==1. | |

CC = R4 == 0; | |

IF CC JUMP .LRETURN_IDENT; | |

// Check for x/1, return x | |

R6 = R0; | |

R7 = R1; | |

CC = R3 == 0; | |

IF !CC JUMP .Lnexttest; | |

CC = R2 == 1; | |

IF CC JUMP .LRETURN_IDENT; | |

.Lnexttest: | |

R4.L = ONES R2; // check for div by power of two which | |

R5.L = ONES R3; // can be done using a shift | |

R6 = PACK (R5.L, R4.L); | |

CC = R6 == 1; | |

IF CC JUMP .Lpower_of_two_upper_zero; | |

R6 = PACK (R4.L, R5.L); | |

CC = R6 == 1; | |

IF CC JUMP .Lpower_of_two_lower_zero; | |

// Check for x < y, return 0 | |

R6 = 0; | |

R7 = R6; | |

CC = R1 < R3 (IU); | |

IF CC JUMP .LRETURN_IDENT; | |

CC = R1 == R3; | |

IF !CC JUMP .Lno_idents; | |

CC = R0 < R2 (IU); | |

IF CC JUMP .LRETURN_IDENT; | |

.Lno_idents: // Idents don't match. Go for the full operation | |

// If X, or X and Y have high bit set, it'll affect the | |

// results, so shift right one to stop this. Note: we've already | |

// checked that X >= Y, so Y's msb won't be set unless X's | |

// is. | |

R4 = 0; | |

CC = R1 < 0; | |

IF !CC JUMP .Lx_msb_clear; | |

CC = !CC; // 1 -> 0; | |

R1 = ROT R1 BY -1; // Shift X >> 1 | |

R0 = ROT R0 BY -1; // lsb -> CC | |

BITSET(R4,31); // to record only x msb was set | |

CC = R3 < 0; | |

IF !CC JUMP .Ly_msb_clear; | |

CC = !CC; | |

R3 = ROT R3 BY -1; // Shift Y >> 1 | |

R2 = ROT R2 BY -1; | |

BITCLR(R4,31); // clear bit to record only x msb was set | |

.Ly_msb_clear: | |

.Lx_msb_clear: | |

// Bit 31 in R4 indicates X msb set, but Y msb wasn't, and no bits | |

// were lost, so we should shift result left by one. | |

[--SP] = R4; // save for later | |

// In the loop that follows, each iteration we add | |

// either Y' or -Y' to the Remainder. We compute the | |

// negated Y', and store, for convenience. Y' goes | |

// into P0:P1, while -Y' goes into P2:P3. | |

P0 = R2; | |

P1 = R3; | |

R2 = -R2; | |

CC = CARRY; | |

CC = !CC; | |

R4 = CC; | |

R3 = -R3; | |

R3 = R3 - R4; | |

R6 = 0; // remainder = 0 | |

R7 = R6; | |

[--SP] = R2; P2 = SP; | |

[--SP] = R3; P3 = SP; | |

[--SP] = R6; P5 = SP; // AQ = 0 | |

[--SP] = P1; | |

/* In the loop that follows, we use the following | |

** register assignments: | |

** R0,R1 X, workspace | |

** R2,R3 Y, workspace | |

** R4,R5 partial Div | |

** R6,R7 partial remainder | |

** P5 AQ | |

** The remainder and div form a 128-bit number, with | |

** the remainder in the high 64-bits. | |

*/ | |

R4 = R0; // Div = X' | |

R5 = R1; | |

R3 = 0; | |

P4 = 64; // Iterate once per bit | |

LSETUP(.LULST,.LULEND) LC0 = P4; | |

.LULST: | |

/* Shift Div and remainder up by one. The bit shifted | |

** out of the top of the quotient is shifted into the bottom | |

** of the remainder. | |

*/ | |

CC = R3; | |

R4 = ROT R4 BY 1; | |

R5 = ROT R5 BY 1 || // low q to high q | |

R2 = [P5]; // load saved AQ | |

R6 = ROT R6 BY 1 || // high q to low r | |

R0 = [P2]; // load -Y' | |

R7 = ROT R7 BY 1 || // low r to high r | |

R1 = [P3]; | |

// Assume add -Y' | |

CC = R2 < 0; // But if AQ is set... | |

IF CC R0 = P0; // then add Y' instead | |

IF CC R1 = P1; | |

R6 = R6 + R0; // Rem += (Y' or -Y') | |

CC = CARRY; | |

R0 = CC; | |

R7 = R7 + R1; | |

R7 = R7 + R0 (NS) || | |

R1 = [SP]; | |

// Set the next AQ bit | |

R1 = R7 ^ R1; // from Remainder and Y' | |

R1 = R1 >> 31 || // Negate AQ's value, and | |

[P5] = R1; // save next AQ | |

BITTGL(R1, 0); // add neg AQ to the Div | |

.LULEND: R4 = R4 + R1; | |

R6 = [SP + 16]; | |

R0 = R4; | |

R1 = R5; | |

CC = BITTST(R6,30); // Just set CC=0 | |

R4 = ROT R0 BY 1; // but if we had to shift X, | |

R5 = ROT R1 BY 1; // and didn't shift any bits out, | |

CC = BITTST(R6,31); // then the result will be half as | |

IF CC R0 = R4; // much as required, so shift left | |

IF CC R1 = R5; // one space. | |

SP += 20; | |

(R7:4, P5:3) = [SP++]; | |

RTS; | |

.Lpower_of_two: | |

/* Y has a single bit set, which means it's a power of two. | |

** That means we can perform the division just by shifting | |

** X to the right the appropriate number of bits | |

*/ | |

/* signbits returns the number of sign bits, minus one. | |

** 1=>30, 2=>29, ..., 0x40000000=>0. Which means we need | |

** to shift right n-signbits spaces. It also means 0x80000000 | |

** is a special case, because that *also* gives a signbits of 0 | |

*/ | |

.Lpower_of_two_lower_zero: | |

R7 = 0; | |

R6 = R1 >> 31; | |

CC = R3 < 0; | |

IF CC JUMP .LRETURN_IDENT; | |

R2.L = SIGNBITS R3; | |

R2 = R2.L (Z); | |

R2 += -62; | |

(R7:4, P5:3) = [SP++]; | |

JUMP ___lshftli; | |

.Lpower_of_two_upper_zero: | |

CC = R2 < 0; | |

IF CC JUMP .Lmaxint_shift; | |

R2.L = SIGNBITS R2; | |

R2 = R2.L (Z); | |

R2 += -30; | |

(R7:4, P5:3) = [SP++]; | |

JUMP ___lshftli; | |

.Lmaxint_shift: | |

R2 = -31; | |

(R7:4, P5:3) = [SP++]; | |

JUMP ___lshftli; | |

.LRETURN_IDENT: | |

R0 = R6; | |

R1 = R7; | |

.LRETURN_R0: | |

(R7:4, P5:3) = [SP++]; | |

RTS; | |

.LDIV_BY_ZERO: | |

R0 = ~R2; | |

R1 = R0; | |

(R7:4, P5:3) = [SP++]; | |

RTS; | |

ENDPROC(___udivdi3) | |

ENTRY(___lshftli) | |

CC = R2 == 0; | |

IF CC JUMP .Lfinished; // nothing to do | |

CC = R2 < 0; | |

IF CC JUMP .Lrshift; | |

R3 = 64; | |

CC = R2 < R3; | |

IF !CC JUMP .Lretzero; | |

// We're shifting left, and it's less than 64 bits, so | |

// a valid result will be returned. | |

R3 >>= 1; // R3 now 32 | |

CC = R2 < R3; | |

IF !CC JUMP .Lzerohalf; | |

// We're shifting left, between 1 and 31 bits, which means | |

// some of the low half will be shifted into the high half. | |

// Work out how much. | |

R3 = R3 - R2; | |

// Save that much data from the bottom half. | |

P1 = R7; | |

R7 = R0; | |

R7 >>= R3; | |

// Adjust both parts of the parameter. | |

R0 <<= R2; | |

R1 <<= R2; | |

// And include the bits moved across. | |

R1 = R1 | R7; | |

R7 = P1; | |

RTS; | |

.Lzerohalf: | |

// We're shifting left, between 32 and 63 bits, so the | |

// bottom half will become zero, and the top half will | |

// lose some bits. How many? | |

R2 = R2 - R3; // N - 32 | |

R1 = LSHIFT R0 BY R2.L; | |

R0 = R0 - R0; | |

RTS; | |

.Lretzero: | |

R0 = R0 - R0; | |

R1 = R0; | |

.Lfinished: | |

RTS; | |

.Lrshift: | |

// We're shifting right, but by how much? | |

R2 = -R2; | |

R3 = 64; | |

CC = R2 < R3; | |

IF !CC JUMP .Lretzero; | |

// Shifting right less than 64 bits, so some result bits will | |

// be retained. | |

R3 >>= 1; // R3 now 32 | |

CC = R2 < R3; | |

IF !CC JUMP .Lsignhalf; | |

// Shifting right between 1 and 31 bits, so need to copy | |

// data across words. | |

P1 = R7; | |

R3 = R3 - R2; | |

R7 = R1; | |

R7 <<= R3; | |

R1 >>= R2; | |

R0 >>= R2; | |

R0 = R7 | R0; | |

R7 = P1; | |

RTS; | |

.Lsignhalf: | |

// Shifting right between 32 and 63 bits, so the top half | |

// will become all zero-bits, and the bottom half is some | |

// of the top half. But how much? | |

R2 = R2 - R3; | |

R0 = R1; | |

R0 >>= R2; | |

R1 = 0; | |

RTS; | |

ENDPROC(___lshftli) |