//*@@@+++@@@@******************************************************************
//
// Microsoft Windows Media
// Copyright (C) Microsoft Corporation. All rights reserved.
//
//*@@@---@@@@******************************************************************

#ifndef  _BASICOPS_H
#define _BASICOPS_H

#include "macros.h" 


union short_longlong
{
  U64 ull;
  struct {
    unsigned short sll; // lower least
    unsigned short slm; // lower most
    unsigned short sul; // upper least
    unsigned short sum; // upper most
  } s2ull;   // short to unsigned long long
};

union ishort_longlong
{
  I64 ull;
  struct {
    unsigned short sll; // lower least
    unsigned short slm; // lower most
    unsigned short sul; // upper least
    signed   short sum; // upper most
  } s2ull;   // short to unsigned long long
};

union intshort_longlong
{
  U64 ull;
  struct {
    int            sll; // lower int
    unsigned short sul; // upper least
    unsigned short sum; // upper most 
  } s2ull;   // int short to unsigned long long
};

union uint_longlong
{
  U64 ull;
  struct {
    unsigned int            lw; // lower int
    unsigned int            mw; // upper int
  } s2ull;   // int short to unsigned long long
};

union short_long
{
  U32 ul;
  struct {
    unsigned short sl; // least
    unsigned short sm; // most
  } s2ul;   // short to unsigned long
};

union ishort_long
{
  I32 ul;
  struct {
    unsigned short sl; // least
    signed short sm; // most
  } s2ul;   // short to unsigned long
};    


typedef struct format_struct_t{
  I32 dividend_q;
  I32 divisor_q;
  I32 quotient_q;
} format_struct_t;

typedef struct mult_struct_t{
  U64 mult1;
  U32 mult2;
} mult_struct_t;

#define LOG2(x) my_log2(x)
#define SIGNBITS(x) signbits(x)
#define MULT_160I_032U_824I(x, y)	mult_160i_032u_824i(x, y)
#define MULT_160I_032U_626I(x, y)	mult_160i_032u_626i(x, y)
#define MULT_320U_320U_1814U(x,y)	mult_320u_320u_1814u(x,y)
#define MULT_131I_131I(x, y)		mult_131i_131i(x, y)
#define MULT_266I_032U_266I(x, y)	mult_266i_032u_266i(x, y)
#define MULT_320U_428U_320U(x, y)	mult_320u_428u_320u(x, y)
#define MULT_160U_428U_320U(x, y)	mult_160u_428u_320u(x, y) 
#define MULT_320U_131U_320U(x, y)	mult_320u_131u_320u(x, y)
#define MULT_160U_230U_284U(x, y)	mult_160u_230u_284u(x, y) 
#define MULT_032U_032U_032U(x, y)	mult_032u_032u_032u(x, y) 
#define MULT_032U_16I(x, y)		mult_032u_16i(x, y)
#define MULT_266U_032U_1616U(x, y)      mult_266u_032u_1616u(x, y)
#define MULT_032U_428U_032U(x, y)       mult_032u_428u_032u(x, y)
#define MULT_160U_131U_320U(x, y)       mult_160u_131u_320u(x, y)
#define MULT_131U_923U_131U(x, y)       mult_131u_923u_131u(x, y)
#define MULT_302U_032U_257U(x, y)       mult_302u_032u_257u(x, y)
#define ROUND16(x, y)                   round16(x, y)

#define OFLOW_CHECK( x ) assert( (int)(x) == (x) )

//Some common declarations 

/***************needed by computeexc patrn */
INLINE U64 mult_1616u_1616u_3216u(U32 mult1,U32 mult2);
U64 mult_428u_3216u_337u(U32 mult1,U64 mult2);
U64 mult_626u_3216u_337u(U32 mult1,U64 mult2);



/* orig convntn. U32 mult_U48_x_U32_res32(U64 mult1,U32 mult2) */
U32 mult_426u_032u_2210u(U64 mult1,U32 mult2);

/* I32 MULT_32_x_32_RES_32(I32 ix, I32 iy) */
I32 mult_131i_131i(I32 mult1, I32 mult2);

/* U32 MULT_uint_4dot28(U32 r0, U32 r1)  */
U32 mult_032u_428u_032u(U32 mult1,U32 mult2);

/* int mult_32s_32u(int num1,unsigned int num2) */
I32 mult_266i_032u_266i(I32 mult1,U32 mult2 );

/*only 32b op. uint mult_32u_16u_48(unsigned int num1,unsigned short num2) */
U32 mult_266u_160u_266u(U32 mult1, U16 mult2);


/* U32 MULT_cBits_iNmr(int cBits,U32 iNmr, U32 shift) */
U32 mult_320i_923u_scaled(I32 mult1, U32 mult2,U32 shift);


int my_log2(unsigned int x); 
/******** Mac routines here **********/
/* U64 mac_U48_x_U32_res48(U64 mult1, U32 mult2, U64 prv_result)  */
//U64 mac_3216u_1022u_426u(U64 prv_result, mult_struct_t * mult_inps );
U64 mac_3216u_1022u_426u(U64 mult1, U32 mult2, U64 prv_result );

/* U64 MY_MAC_32_x_32_RES_64(I32 ix, I32 iy,U64 prevoutp) ; */
/* second param to be U32 */
U64 mac_256i_032u_3430u(I32 ix, I32 iy,U64 prevoutp);

/**  unsigned int div_U32_by_U32_gen(unsigned int r0,unsigned int r1,int r2 ,int r4,int r7)  generic form */
U32 div_032u_132u(U32 dividend, U32 divisor, format_struct_t * func_formats);

INLINE U32 div_U32_by_U16_gen(unsigned int r0,unsigned int r1_32,int r2,int r4, int r7);

/* orig convn. div_32( )  */
INLINE I32 div_256i_132i(I32 dividend,I32 divisor);

/*int div_I32_by_I32( int r0, int r1) ;*/
I32 div_320i_320i(I32 dividend, I32 divisor);

/* U32 new_div_32_by_32(U32 dividend, U32 divs); */
U32 div_032u_032u(U32 dividend,U32 divisor);

I32 div_32_new(I32 dividend,I32 divisor);

U32 div_320u_320u_131u(U32 divd  ,U32 divs);

U32 div_320u_320u_824u(U32 divd, U32 divs);
U32 div_320u_320u_527u(U32 divd, U32 divs);
U32 div_1616u_302u_428u(U32 r0 , U32 r1  );
U32 div_032u_032u_428u(U32 divd, U32 divs);
U32 div_311u_1418u_1814u(U32 r0, U32 r1  );
U32 div_626u_626u_230u(U32 r0  , U32 r1  );
U32 div_230u_248u_230u(U32 r0  , U32 r1  );
U32 div_320u_320u_1418u(U32 r0 , U32 r1  );

unsigned int div_1616u_923u_527u(unsigned int r0 , unsigned int r1  );
unsigned int div_320u_320u_131u_1(unsigned int r0, unsigned int r1  );
unsigned int div_1418u_1418u_032u(unsigned int r0, unsigned int r1  );
unsigned int div_266u_302u_923u(unsigned int r0  ,unsigned int r1_32);
unsigned int div_1319u_527u_824u(unsigned int r0 , unsigned int r1  );


/* unsigned int div_U32_by_U32(unsigned int r0num,unsigned int r1);*/
unsigned int div_U32_by_U32(unsigned int r0num,unsigned int r1);

int square_root(int x);
int longsignbits(U64 x);

/* short ROUND16(int num,unsigned int tempexp) */
I16 round16(I32 mult1, U32 exp); 

/**************Ramesh added here **************/

 U64 mult_428u_3216u_2812u(U32 mult1,U64 mult2);
 U64 mult_626u_3216u_2812u(U32 mult1,U64 mult2);
 U32 mult_131u_2812u_266u(U32 mult1,U64 mult2);


 /*********************************************/

U64 mult_824u_480i_480i_asm(U32 mult1,U64 mult2);




#ifndef WMA_TARGET_X86		//Linux build
/* I32 mult_16s_32u(U32 mult1,I16 mult2);  */
I32 mult_032u_16i(U32 mult1,I16 mult2);

/* U64 mult_40u_32u(U32 num1,U64 num2)*/
U64 mult_131u_337u(U32 num1,U64 num2);

/* U32 mult_U32_X_U32_1(U32 mult1,U32 mult2) */
U32 mult_302u_032u_257u(U32 mult1,U32 mult2);

/* orig convntn. U32 mult_U32_X_U32_all(U32 mult1,U32 mult2) */
/* Also :  MULT_32U_32U_32U( ) */

/* unsigned int mult_32u_32u(unsigned int num1,unsigned int num2) */
U32 mult_032u_032u_032u(U32 mult1,U32 mult2);

/*orig convtn .U64 MULT_48i_x_32i_RES_64(U32 mult1,U64 mult2)  */
U64 mult_824u_480i_480i(U32 mult1,U64 mult2);
#else
INLINE U32 mult_320u_320u_1814u(U32 ix,U32 iy)
{

  union short_long m1,m2;
  unsigned short r1_hu,r3_hu;
  unsigned short r1_lu,r3_lu;

  U64 a0,a1;

  m1.ul = ix;
  m2.ul = iy;

  r1_hu  = m1.s2ul.sm;
  r1_lu  = m1.s2ul.sl;

  r3_hu  = m2.s2ul.sm;
  r3_lu  = m2.s2ul.sl;

  a1 = (U64)r1_lu*(U64)r3_lu;
  a1 = a1 >>16L;

  a0 = (((U64)r1_hu*(U64)r3_hu)<<15L); a1 += (U64)r1_hu*(U64)r3_lu;
  a1 += (U64)r3_hu*(U64)r1_lu;

  a1 = (U64)(a1 >> 1L);
  a0 += a1;

  return (U32)a0;
}  

INLINE I32 mult_160i_032u_824i(I16 ix,U32 iy)
{
  union short_long m1;
  unsigned short r3_hu,r3_lu;
  short r1_l;

  I32 a0,a1;

  m1.ul = iy;

  r3_hu  = m1.s2ul.sm;
  r3_lu  = m1.s2ul.sl;

  r1_l = ix;

  a1 = (I32)r1_l*(U32)r3_lu;
  a1 = a1 >>8L;
  a0 = (I32)r1_l * (U32)r3_hu;
  a0 = (I32)(a0 <<8L);
  a0 += a1;
  return (I32)a0;
}                 

INLINE I32 mult_160i_032u_626i(I16 ix, U32 iy)
{

  union short_long m1;
  unsigned short r3_hu,r3_lu;
  short r1_l;

  I32 a0,a1;

  r1_l = ix;

  m1.ul = iy;

  r3_hu  = m1.s2ul.sm;
  r3_lu  = m1.s2ul.sl;

  a1 = (I32)r1_l*(U32)r3_lu;
  a1 = a1 >>6L;

  a0 = (I32)r1_l * (U32)r3_hu;
  a0 = (I32)(a0 <<10L);
  a0 += a1;

  return (I32)a0;
}                  

/*int mult_16s_32u(unsigned int num1,short s11) */

#if !(defined (UNIFIED_TABLES))
INLINE I32 mult_032u_16i(U32 mult1,I16 mult2)
{
  union short_long m1;
  unsigned short s21,s22;
  int result;

  m1.ul = mult1;
  s22 = m1.s2ul.sm;
  s21 = m1.s2ul.sl;

  result = ((I32)((I32)mult2*(U32)s21) >> 16) + (I32)((I32)mult2*(U32)s22) ;

  assert( result == ((I64)mult1*mult2)>>16 );

  return result;
}
#else
INLINE I32 mult_032u_16i(U32 mult1,I16 mult2)		//Since i modified WtInput, hence accept 1.31, 16.0 to give 16.16
{
  union short_long m1;
  unsigned short s21,s22;
  int result;

  mult1 = mult1 << 1;		//to make it 0.32 from 1.31, in assmbly we do it a lil smarter
  m1.ul = mult1;
  s22 = m1.s2ul.sm;
  s21 = m1.s2ul.sl;

  result = ((I32)((I32)mult2*(U32)s21) >> 16) + (I32)((I32)mult2*(U32)s22) ;

  assert( result == ((I64)mult1*mult2)>>16 );

  return result;
}   
#endif	//!UNIFIED_TABLES
INLINE U64 mult_131u_337u(U32 num1,U64 num2)
{
  union uint_longlong m2;
  U64 ltemp1,ltemp2,ltemp;
  U64 temp1,temp2;

  m2.ull = num2;

  temp1 = m2.s2ull.lw;
  temp2 = m2.s2ull.mw;

  //1.31 * 33.7 --> 34.38 >> 31 --> 33.7
  ltemp1 = (temp2 * num1)  << 1L;
  ltemp2 = (temp1 * num1) >> 31L;
  ltemp = ltemp1  + ltemp2;

  return ltemp;

}        
            
INLINE U32 mult_302u_032u_257u(U32 mult1,U32 mult2)
{

  union short_long m1,m2;
  U64 a1,a0; // ALUs
  U32 r0;    // returned value
  unsigned short r1_hu,r1_lu,r2_hu,r2_lu;

  m1.ul = mult1;

  r1_hu = m1.s2ul.sm;
  r1_lu = m1.s2ul.sl;

  m2.ul = mult2;

  r2_hu = m2.s2ul.sm;
  r2_lu = m2.s2ul.sl;

  a1  = (U64)r1_lu*(U64)r2_lu;
  a1  = a1 >>11L;

  a0  = (U64)r1_hu*(U64)r2_hu ; a1 += (U64)r1_hu*(U64)r2_lu;
  a1 += (U64)r1_lu*(U64)r2_hu;

  a1  = (U64)(a1 >> 11L);
  a0  = (U64)(a0 <<5L);
  a0 += a1;

  r0  = (U32)a0;  // r0 is in 25.7 Format
  return r0;

}             

INLINE U32 mult_032u_032u_032u(U32 mult1,U32 mult2)
{
  union short_long m1,m2;
  U64 a1,a0; // ALUs
  U32 r0;    // returned value
  unsigned short r1_hu,r1_lu,r2_hu,r2_lu;

  m1.ul = mult1;
  r1_hu = m1.s2ul.sm;
  r1_lu = m1.s2ul.sl;

  m2.ul = mult2;

  r2_hu = m2.s2ul.sm;
  r2_lu = m2.s2ul.sl;

  a1  = (U64)r1_lu*(U64)r2_lu;
  a1  = (U64)(a1 >>16L);

  a0  = (U64)r1_hu*(U64)r2_hu ; a1 += (U64)r1_hu*(U64)r2_lu;
  a1 += (U64)r1_lu*(U64)r2_hu;

  a1  = (U64)(a1 >> 16L);
  a0 += a1;

  r0  = (U32)a0;
  return r0;
}               

INLINE U64 mult_824u_480i_480i(U32 mult1,U64 mult2)
{

  union short_long m1;
  union short_longlong m2;
  U64 a0,a1;
  unsigned short r0_hu,r2_hu;
  unsigned short r0_lu,r2_lu,r1_lu;

  m1.ul = mult1;

  r0_hu = m1.s2ul.sm;
  r0_lu = m1.s2ul.sl;

  m2.ull = mult2;

  r2_hu = m2.s2ull.sul;
  r2_lu = m2.s2ull.slm;
  r1_lu = m2.s2ull.sll;

  a0  = ((U64)r0_hu*(U64)r2_hu);
  a0  = (U64)(a0 << 32L);

  a1  = (U64)r0_hu*(U64)r2_lu;
  a1 += (U64)r2_hu*(U64)r0_lu;
  a1 = a1 << 16L;
  a0 += a1;

  a1 = (U64)r0_hu * (U64)r1_lu;
  a1 += (U64)r2_lu* (U64)r0_lu;
  a0 += a1;

  a1  = (U64)r0_lu *(U64)r1_lu;
  a1 = a1>>16L;
  a0 += a1;

  return a0;
}       

INLINE U32 mult_426u_032u_2210u(U64 mult1,U32 mult2)
{
  union short_long m1;
  union short_longlong m2;
  U64 a0,a1;
  U32 r0;
  unsigned short r0_hu,r0_lu,r1_lu,r2_hu,r2_lu;

  m2.ull = mult1;

  r0_hu = m2.s2ull.sul;
  r0_lu = m2.s2ull.slm;
  r1_lu = m2.s2ull.sll;

  m1.ul = mult2;
  r2_hu = m1.s2ul.sm;
  r2_lu = m1.s2ul.sl;

  a1   = (U64)((U64)r1_lu*(U64)r2_lu);a0   = (U64)((U64)r1_lu*(U64)r2_hu);
  a0  += (U64)((U64)r0_lu*(U64)r2_lu);

  a0   = (U64)(a0 <<16L );
  a0  += a1;
  a0   = (U64)(a0 >>28L );

  a1   = (U64)((U64)r0_lu*(U64)r2_hu);
  a1  += (U64)((U64)r0_hu*(U64)r2_lu);
  a1   = (U64)(a1 << 4L );

  a0  += a1;
  a1   = (U64)((U64)r0_hu*(U64)r2_hu);
  a1   = (U64)(a1 <<20L );

  a0  += a1;
  r0   =(U32)a0;
  return r0;
}   

INLINE I32 mult_131i_131i(I32 ix, I32 iy)
{
  union ishort_long m1,m2;
  short r1_h,r3_h;
  unsigned short r1_lu,r3_lu;
  I64 a0,a1;


  m1.ul = ix;
  r1_h  = m1.s2ul.sm;
  r1_lu = m1.s2ul.sl;

  m2.ul = iy;
  r3_h  = m2.s2ul.sm;
  r3_lu = m2.s2ul.sl;

  a1 = (U64)r1_lu*(U64)r3_lu;
  a1 = a1 >>16L;

  a0 = ((I64)r1_h*(I64)r3_h <<1L); a1 += (I64)r1_h*(U64)r3_lu;

  a1 += (I64)r3_h*(U64)r1_lu;

  a1 = (I64)(a1 >> 15L);
  a0 = (I64)(a0);
  a0 += a1;

  return (I32)a0; // Actual result in 32 bits only
}        

INLINE U32 mult_032u_428u_032u(U32 r0,U32 r1)
{
  union short_long m1,m2;
  U32 r0_l,r0_h,r1_l,r1_h;
  U64 a0,a1;

  m1.ul = r0;
  r0_h  = m1.s2ul.sm;
  r0_l  = m1.s2ul.sl;

  m2.ul = r1;
  r1_h = m2.s2ul.sm;
  r1_l = m2.s2ul.sl;

  a0 = (r0_l * r1_l) >> 16L;
  a1 =  (r0_h * r1_l) + (r0_l * r1_h);
  a0 += a1;
  a0 >>= 12L;

  a1 = r0_h * r1_h;
  a0 += a1;
  r0 = (U32)a0;

  return r0;
}             

INLINE I32 mult_266i_032u_266i(I32 num1,U32 num2 )
{
  union ishort_long m1;
  union short_long m2;
  unsigned int s11,s21,s22;
  int s12,result;
  int result1,result2;

  m1.ul = num1;
  s12   = m1.s2ul.sm;
  s11   = m1.s2ul.sl;

  m2.ul = num2;
  s22   = m2.s2ul.sm;
  s21   = m2.s2ul.sl;

  result1 = (U32)(s22*s11) >> 16;
  result2 = (int)((int)s12*(U32)s21) >> 16;
  result = result1 + result2;
  result1 = (int)(s22 * s12) ;
  result += result1;

  return result;       
}


INLINE U32 mult_266u_160u_266u(U32 num1, U16 num2)
{
  union short_long m1;
  unsigned int s11,s12,result,result1,result2;

  m1.ul = num1;
  s12 = m1.s2ul.sm;
  s11 = m1.s2ul.sl;

  result1 = num2 * s11;
  result2 = num2 * s12; 
  result = (result2 << 16) + result1;

  return result;        
}

INLINE U32 mult_266u_032u_1616u(U32 mult1,U32 mult2)        //26.6u * 0.32 --> 26.38 >> 22 --> 16.16
{
  union short_long m1,m2;
  U64 a1,a0; // ALUs
  U32 r0;    // returned value
  unsigned short r1_hu,r1_lu,r2_hu,r2_lu;

  m1.ul = mult1;
  r1_hu = m1.s2ul.sm;
  r1_lu = m1.s2ul.sl;

  m2.ul = mult2;

  r2_hu = m2.s2ul.sm;
  r2_lu = m2.s2ul.sl;

  a1  = (U64)r1_lu*(U64)r2_lu;
  a1  = (U64)(a1 >>16L);

  a0  = (U64)r1_hu*(U64)r2_hu ; a1 += (U64)r1_hu*(U64)r2_lu;
  a1 += (U64)r1_lu*(U64)r2_hu;

  a1  = (U64)(a1 >> 6L);
  a0  = a0 << 10L;
  a0 += a1;

  r0  = (U32)a0;
  return r0;
}

INLINE U64 mult_1616u_1616u_337u(U32 mult1,U32 mult2)
{
  union short_long m1,m2;
  U64 a1,a0; // ALUs
  unsigned short r1_hu,r1_lu,r2_hu,r2_lu;

  m1.ul = mult1;
  r1_hu = m1.s2ul.sm;
  r1_lu = m1.s2ul.sl;

  m2.ul = mult2;

  r2_hu = m2.s2ul.sm;
  r2_lu = m2.s2ul.sl;

  a1  = (U64)r1_lu*(U64)r2_lu;
  a1  = (U64)(a1 >>16L);

  a0  = (U64)r1_hu*(U64)r2_hu ; a1 += (U64)r1_hu*(U64)r2_lu;
  a1 += (U64)r1_lu*(U64)r2_hu;

  a1  = (U64)(a1 >> 9L);
  a0  = a0 << 7L;
  a0 += a1;

  return a0;    //result is type U64: format is 33.7 
}

INLINE U64 mult_626u_337u_337u(U32 mult1,U64 mult2)
{
  union short_long m1;
  union short_longlong m2;
  U64 a0,a1;
  unsigned short r0_hu,r2_hu;
  unsigned short r0_lu,r2_lu,r1_lu;

  m1.ul = mult1;

  r0_hu = m1.s2ul.sm;
  r0_lu = m1.s2ul.sl;

  m2.ull = mult2;

  r2_hu = m2.s2ull.sul;
  r2_lu = m2.s2ull.slm;
  r1_lu = m2.s2ull.sll;

  /*  33.7 * 6.26 ---> 39.33 >> 28 ---> 33.7*/
  a0  = (U64)((U32)r0_lu * (U32)r1_lu) >> 16L; 

  a1 = (U64)((U32)r1_lu * (U32)r0_hu);
  a0 += (U64)((U32)r2_lu * (U32)r0_lu);
  a0 += a1;
  a0 = a0 >> 10L;

  a1  = (U64)((U32)r2_hu * (U32)r0_hu);
  a1 = a1 << 16L;
  
  a1 += (U64)((U32)r2_hu * (U32)r0_lu);
  a1 += (U64)((U32)r2_lu * (U32)r0_hu);

  a1 = a1 << 6L;
  a0 += a1;        //contains the 33.7 result;

  return a0;
}

INLINE U64 mult_428u_337u_337u(U32 mult1,U64 mult2)  //33.7u * 4.28u ---> 37.35 >> 28 --> 33.7 uns
{
  union short_long m1;
  union short_longlong m2;
  U64 a0,a1;
  unsigned short r0_hu,r2_hu;
  unsigned short r0_lu,r2_lu,r1_lu;

  m1.ul = mult1;

  r0_hu = m1.s2ul.sm;
  r0_lu = m1.s2ul.sl;

  m2.ull = mult2;

  r2_hu = m2.s2ull.sul;
  r2_lu = m2.s2ull.slm;
  r1_lu = m2.s2ull.sll;

  /*  33.7 * 6.26 ---> 39.33 >> 28 ---> 33.7*/
  a0  = (U64)((U32)r0_lu * (U32)r1_lu) >> 16L; 

  a1 = (U64)((U32)r1_lu * (U32)r0_hu);
  a0 += (U64)((U32)r2_lu * (U32)r0_lu);
  a0 += a1;
  a0 = a0 >> 12L;

  a1  = (U64)((U32)r2_hu * (U32)r0_hu);
  a1 = a1 << 16L;
  
  a1 += (U64)((U32)r2_hu * (U32)r0_lu);
  a1 += (U64)((U32)r2_lu * (U32)r0_hu);

  a1 = a1 << 4L;
  a0 += a1;        //contains the 33.7 result;

  return a0;
}

INLINE U64 mult_131u_337u_337u(U32 mult1,U64 mult2)  //33.7u * 1.31u ---> 34.38 >> 31 --> 33.7 uns
{
  union short_long m1;
  union short_longlong m2;
  U64 a0,a1;
  unsigned short r0_hu,r2_hu;
  unsigned short r0_lu,r2_lu,r1_lu;

  m1.ul = mult1;

  r0_hu = m1.s2ul.sm;
  r0_lu = m1.s2ul.sl;

  m2.ull = mult2;

  r2_hu = m2.s2ull.sul;
  r2_lu = m2.s2ull.slm;
  r1_lu = m2.s2ull.sll;

  a0  = (U64)((U32)r0_lu * (U32)r1_lu) >> 16L; 

  a1 = (U64)((U32)r1_lu * (U32)r0_hu);
  a0 += (U64)((U32)r2_lu * (U32)r0_lu);
  a0 += a1;
  a0 = a0 >> 15L;

  a1  = (U64)((U32)r2_hu * (U32)r0_hu);
  a1 = a1 << 16L;
  
  a1 += (U64)((U32)r2_hu * (U32)r0_lu);
  a1 += (U64)((U32)r2_lu * (U32)r0_hu);

  a1 = a1 << 1L;
  a0 += a1;        //contains the 33.7 result;

  return a0;
}

INLINE U64 mult_725u_266u_337u(U32 mult1,U32 mult2)        //7.25u * 26.6 --> 33.31 >> 24 --> 33.7
{
  union short_long m1,m2;
  U64 a1,a0; // ALUs
  unsigned short r1_hu,r1_lu,r2_hu,r2_lu;

  m1.ul = mult1;
  r1_hu = m1.s2ul.sm;
  r1_lu = m1.s2ul.sl;

  m2.ul = mult2;

  r2_hu = m2.s2ul.sm;
  r2_lu = m2.s2ul.sl;

  a1  = (U64)r1_lu*(U64)r2_lu;
  a1  = (U64)(a1 >>16L);

  a0  = (U64)r1_hu*(U64)r2_hu ; a1 += (U64)r1_hu*(U64)r2_lu;
  a1 += (U64)r1_lu*(U64)r2_hu;

  a1  = (U64)(a1 >> 8L);
  a0  = a0 << 8L;
  a0 += a1;

  return a0;
}

INLINE U32 mult_160u_131u_320u(U16 r0_l, U32 r1)      //16.0 * 1.31 --> >> 31 --> int result
{
  union short_long m1;

  U16 r1_l,r1_h;
  U32 a0;

  m1.ul= r1;

  r1_l = m1.s2ul.sl;    //r1 & 0xFFFF;
  r1_h = m1.s2ul.sm;    //(r1 & 0xFFFF0000) >> 16;

  a0 = ((U32)r0_l * (U32)r1_l) >> 16L;

  a0 += r0_l * r1_h;
  a0 >>= 15L;
  return a0;
}

INLINE U32 mult_131u_923u_131u(U32 r0, U32 r1)     //1.31 * 9.23 --> >> 23 --> 1.31
{
  union short_long m0;
  union short_long m1;

  U32 r0_l,r0_h,r1_l,r1_h;
  U64 a0,a1;

  m0.ul = r0;
  m1.ul = r1;

  r0_l = m0.s2ul.sl;    //r0 & 0xFFFF;
  r0_h = m0.s2ul.sm;    //(r0 & 0xFFFF0000) >> 16;
  r1_l = m1.s2ul.sl;    //r1 & 0xFFFF;
  r1_h = m1.s2ul.sm;    //(r1 & 0xFFFF0000) >> 16;

  a0 = (r0_l * r1_l) >> 16L;
  a1 =  (r0_h * r1_l) + (r0_l * r1_h);
  a0 += a1;
  a0 >>= 7L;
  a1 = r0_h * r1_h;
  a1 = a1 << 9L;    //this wasnt there earlier. however putting this makes no diff.coz r0.h is 0 anyways
  a0 += a1;
  r0 = (U32)a0;
  return r0;
}

INLINE U32 mult_160u_428u_320u(U16 r0_l, U32 r1)     //int * 4.28 --> >> 28 --> int result
{
  union short_long m1;

  U16 r1_l,r1_h;
  U32 a0;

  m1.ul= r1;

  r1_l = m1.s2ul.sl;    //r1 & 0xFFFF;
  r1_h = m1.s2ul.sm;    //(r1 & 0xFFFF0000) >> 16;


  a0 = ((U32)r0_l * (U32)r1_l) >> 16L;

  a0 += r0_l * r1_h;
  a0 >>= 12L;
  return a0;
}

INLINE U32 mult_320u_428u_320u(U32 r0, U32 r1)     //uint * 4.28 --> >> 28 --> uint result
{
  union short_long m0;
  union short_long m1;

  U32 r0_l,r0_h,r1_l,r1_h;
  U64 a0,a1;

  m0.ul = r0;
  m1.ul = r1;

  r0_l = m0.s2ul.sl;    //r0 & 0xFFFF;
  r0_h = m0.s2ul.sm;    //(r0 & 0xFFFF0000) >> 16;
  r1_l = m1.s2ul.sl;    //r1 & 0xFFFF;
  r1_h = m1.s2ul.sm;    //(r1 & 0xFFFF0000) >> 16;

  a0 = (r0_l * r1_l) >> 16L;
  a1 =  (r0_h * r1_l) + (r0_l * r1_h);
  a0 += a1;
  a0 >>= 12L;
  a1 = r0_h * r1_h;
  a1 = a1 << 4L;    //this wasnt there earlier. however putting this makes no diff.coz r0.h is 0 anyways
  a0 += a1;
  r0 = (U32)a0;
  return r0;
}

INLINE U32 mult_320u_131u_320u(U32 r0, U32 r1)     //uint * 1.31 --> >> 31 --> uint result
{
  union short_long m0;
  union short_long m1;

  U32 r0_l,r0_h,r1_l,r1_h;
  U64 a0,a1;

  m0.ul = r0;
  m1.ul = r1;

  r0_l = m0.s2ul.sl;    //r0 & 0xFFFF;
  r0_h = m0.s2ul.sm;    //(r0 & 0xFFFF0000) >> 16;
  r1_l = m1.s2ul.sl;    //r1 & 0xFFFF;
  r1_h = m1.s2ul.sm;    //(r1 & 0xFFFF0000) >> 16;

  a0 = (r0_l * r1_l) >> 16L;
  a1 =  (r0_h * r1_l) + (r0_l * r1_h);
  a0 += a1;
  a0 >>= 15L;
  a1 = r0_h * r1_h;
  a1 = a1 << 1L;    //this wasnt there earlier. however putting this makes no diff.coz r0.h is 0 anyways
  a0 += a1;
  r0 = (U32)a0;
  return r0;
}

INLINE U32 mult_160u_230u_284u(U16 r0_l, U32 r1)   //ushort *   2.30 --> >> 26 --> 28.4
{
  union short_long m1;

  U16 r1_l,r1_h;
  U32 a0;

  m1.ul= r1;

  r1_l = m1.s2ul.sl;    //r1 & 0xFFFF;
  r1_h = m1.s2ul.sm;    //(r1 & 0xFFFF0000) >> 16;

  a0 = ((U32)r0_l * (U32)r1_l) >> 16;

  a0 += r0_l * r1_h;
  a0 >>= 10;
  return a0;
}

INLINE I16 round16(I32 num, U32 exp)
{
  int num16dot16;
  unsigned int abs16dot16;
  unsigned int bit15;
  short temp;           //result

  if (exp < 1)
	  num = num << 1;
  else
	  num = num >> (exp - 1);	//produces a 32.16 number whose uppermost 16 bits contain nothing
  
  num16dot16 = num << 10;
  abs16dot16 = abs(num16dot16);
  bit15 = ((abs16dot16 & 0x8000) >> 15);	//the fifteenth bit
  temp = (abs16dot16 >> 16);              	// the integer portion of num

  if (num < 0)
    temp = -temp - bit15;
  else
    temp = temp + bit15;

  return temp;
}

// signbits() and its derivatives count the #
// of most significant bits that are unused.

INLINE int signbits( I32 x )
{
    int n;
    if ( 0 > x ) x = -(x+1);
    for( n = x ? 30 : 31; x >>= 1; --n );
    return n;
}

INLINE int my_log2(unsigned int i)
{   // returns n where n = log2(2^n) = log2(2^(n+1)-1)
  U32 iLog2 = 0;
  iLog2 = 30-SIGNBITS(i);
  return iLog2;
}

INLINE int usignbits( U32 x )
{
    int n;
    for( n = x ? 31 : 32; x >>= 1; --n );

    // return n;    // !!

    // the above code correctly counts the #
    // of empty most-significant bits in an
    // unsigned 32-bit integer; however, it
    // appears that the caller is dependent
    // upon this value being in the range of
    // a signed integer, so return a value
    // of n-1 instead of n ( but never a
    // negative value ). it would be best
    // to correct the caller eventually.

    return max(0,n-1);
}


INLINE int longsignbits(U64 x)
{
  int count = 0;
  U64 i;
  U64 y,xtemp;
  U64 stat = (U64)x >> 39L;

  xtemp = x;
  for(i = 1; i<=39;i++) {
    y = (U64)(1L<<(39L-i));
    y = x & y;
    y = y >> (39L - i);

    if (y != stat)
      break;
    count++;
  }
  count = count-8;
  return count;
}


INLINE int square_root(int input) 
{
  union short_long m1;
  union ishort_long m2;
  union ishort_longlong m3;
  int shift=-1,i,reg32,r0;
  unsigned  int norm_input;
  short r0_h,r1_h;
  unsigned short r0_lu,r1_lu;
  I64 a0,a1;

  static int coeff_array[6]={0x072d0010,0xddaa0000,0x46d60200,
                             0xa9ecfe80,0x5d1d0000,0x0d490020}; // 2.30 format

  static int scale_array[31]={ 0x5a827900,0x3fffff80,0x2d413c80,0x1fffffc0,
                               0x16a09e40,0xfffffe0 ,0xb504f20 ,0x7fffff0 ,
                               0x5a82790 ,0x3fffff8 , 		// 9.23 format
                               0x2d413c8 ,0x1fffffc ,0x16a09e4 ,0xffffff,
                               0xb504f3 , 0x0, 0x2d413cc0,0x1fffffe0,0x16a09e40,
                               0xfffffe0 ,0xb504f20,0x7fffff0 ,0x5a82790 ,
                               0x3fffff8 ,0x2d413c8 ,0x1fffffc, // 2.30 format
			       0x16a09e4 ,0xfffffe  ,0xb504f2  ,0x7fffff  ,0x5a8279 };
  if(input == 0)
    return 0;
  else {
    while(input>0) {
      shift++;
      input<<=1;
    }
    norm_input = input;
    norm_input >>=1;      //unsigned right-shift to get back +ve sign(i.e,0)

    m1.ul = norm_input;
    r0_h  = m1.s2ul.sm;
    r0_lu = m1.s2ul.sl;

    m2.ul = *coeff_array;
    r1_h  = m2.s2ul.sm;
    r1_lu = m2.s2ul.sl;

    for (i=1;i<=5;i++) {
      a0 = (((I64)r0_h * (I64) r1_h)<<1L),a1 = (I64)r0_h*(U64)r1_lu;
      a1 += (I64)r1_h*(U64)r0_lu;

      a1 = (I64)(a1>>15L);
      a0 += a1;                    // a0 in 2.30 format

      reg32 = *(coeff_array+i);
      a1 = reg32;                // reg32 in 2.30 format
      a0 += a1;
       
      m3.ull = a0;
      r1_h  = m3.s2ull.slm;
      r1_lu = m3.s2ull.sll;

    }

    if (shift == 15) {
      a0 =(I64)(a0 >> 7L);
      r0= (int)(a0);

      return r0;
    } else {
      shift -= 15;

      m2.ul  = *(scale_array+15+shift);
      r0_h  = m2.s2ul.sm;
      r0_lu = m2.s2ul.sl;

      a0 = (((I64)r0_h * (I64) r1_h)<<1L),a1 = (I64)r0_h*(U64)r1_lu;
      a1 += (I64)r1_h*(U64)r0_lu;
      if(shift>0) {
	a1 = (I64)(a1>>15L);
	a0+= a1;
	a0 = (I64)(a0>>6L);
      } else {
	a1 = (I64)(a1>>14L);
	a0+= a1;
	a0 = (I64)(a0<<1L);
      }

      r0= (int)(a0 ); // 9.23 format
      return r0;
    }
  }
}

INLINE U64 mac_3216u_1022u_426u(U64 mult1, U32 mult2, U64 prv_result )
{
  union short_long m1;
  union short_longlong m2;
  union intshort_longlong m3; 
  U64 a0,a1;
  U32 r3;
  unsigned short r0_hu,r0_lu,r1_hu,r1_lu,r2_hu,r2_lu;

  m2.ull = mult1;

  r0_hu = m2.s2ull.sul;
  r0_lu = m2.s2ull.slm;
  r1_lu = m2.s2ull.sll;

  m1.ul = mult2;

  r2_hu = m1.s2ul.sm;
  r2_lu = m1.s2ul.sl;

  m3.ull = prv_result;
  r3       = m3.s2ull.sll;
  r1_hu    = m3.s2ull.sul;   

  a1     = (U64)((U64)r1_lu*(U64)r2_lu);a0   = (U64)((U64)r1_lu*(U64)r2_hu);
  a0    += (U64)((U64)r0_lu*(U64)r2_lu);

  a0   = (U64)(a0 <<16L );
  a0  += a1;
  a0   = (U64)(a0 >>32L );

  a1   = (U64)((U64)r0_lu*(U64)r2_hu);
  a1  += (U64)((U64)r0_hu*(U64)r2_lu);
  a0  += a1;

  a1   = (U64)r3;
  a0  += a1;

  a1   = (U64)((U64)r0_hu*(U64)r2_hu);
  a1   = (U64)(a1 <<16L );
  a0  += a1;

  a1   = (U64)r1_hu;
  a1   = (U64)(a1 <<32L );
  a0  += a1;

  return a0;
}        


INLINE U64 mac_256i_032u_3430u(I32 ix, I32 iy,U64 prevoutp)
{
  union ishort_long m1,m2;
  short r1_h,r3_h;
  unsigned short r1_lu,r3_lu;

  I64 a0,a1;
  U64 a2;

  m1.ul = ix;
  r1_h  = m1.s2ul.sm;
  r1_lu = m1.s2ul.sl;

  m2.ul = iy;
  r3_h  = m2.s2ul.sm;
  r3_lu = m2.s2ul.sl;

  a1 = (U64)r1_lu*(U64)r3_lu;
  a1 = (U64)a1 >>16L;

  a0 = ((I64)r1_h*(I64)r3_h)<<1L; a1 += (I64)r1_h*(U64)r3_lu;
  a1 += (I64)r3_h*(U64)r1_lu;
  a1 = a1 >> 10L;
  a0 = a0 << 5L;

  a0 += a1;

  a1 = prevoutp;
  a0 += a1;
  a2 = a0;

  return a2; // 64 bit result  : will be made more accurate if needed
}      

INLINE U32 div_131u_293u_131u(U32 divd,U32 divs)
{
  int dsign,ssign,dexp,sexp,exp;
  U32 quotient;

  dsign = SIGNBITS(divd);
  ssign = SIGNBITS(divs);

  dexp = 31 - dsign -31 +1;         //1.31 dividend
  sexp = 31 - ssign -3  ;            //29.3 divisor

  divd = divd << (dsign);
  divs = divs << (ssign + 1);
  
  quotient=div_U32_by_U32(divd,divs);
  exp =  1 - (dexp - sexp);

  if (exp > 31)
    quotient = 0;
  else if (exp < 0)
    quotient <<= abs(exp);
  else
    quotient >>= exp;

  return quotient;
}    

INLINE U32 div_032u_032u(U32 divd,U32 divs)
{
  int dsign,ssign,dexp,sexp,exp;
  U32 quotient;
  
  dsign = SIGNBITS(divd);
  ssign = SIGNBITS(divs);

  dexp = 31 - dsign -31 +1;         //1.31 dividend
  sexp = 31 - ssign -3  ;            //29.3 divisor

  divd = divd << (dsign);
  divs = divs << (ssign + 1);
  quotient=div_U32_by_U32(divd,divs);
  exp =  1 - (dexp - sexp);
  
  if (exp > 31)
    quotient = 0;
  else if (exp < 0)
    quotient <<= abs(exp);
  else
    quotient >>= exp;

  return quotient;
}

   
INLINE U32 div_320u_320u_131u(U32 divd,U32 divs)
{
  int dsign,ssign,dexp,sexp,exp;
  U32 quotient;
  
  dsign = SIGNBITS(divd);
  ssign = SIGNBITS(divs);
  
  dexp = 31 - dsign  + 1  ;         // 18.0   dividend
  sexp = 31 - ssign ;          //   16.0  divisor

  divd = divd << (dsign);
  divs = divs << (ssign + 1);
  
  quotient=div_U32_by_U32(divd,divs);
  exp = 1 - (dexp - sexp);

  if (exp > 31)
    quotient = 0;
  else if (exp < 0)
    quotient <<= abs(exp);
  else
    quotient >>= exp;
  
  return quotient;
}

INLINE U32 divide_PowerbySlice(U32 dividend,U32 a0)
{
  
  U32 divisor, quotient, Ratio;
  int dsign,ssign,dexp,sexp,shift,exp;
  dsign = usignbits(dividend);   //signbits returns number of insignificant bits = #of signbits - 1
  ssign = usignbits(a0);         //we want a 32 bit by 32 bit division, hence have to take the
  				 //significant 32 bits of the number in a0 for division

  dividend = dividend << (dsign);
  
  divisor = a0 << (ssign + 1);
    
  dexp = 31 - dsign ;          //  32.0 dividend
  sexp = 31 - (ssign)-1 + 3 ;  //  37.0 divisor
  
  quotient=div_U32_by_U32(dividend,divisor);
  exp = dexp - sexp ;
  shift = 14 - exp;           //result in 14.18

  if (shift > 31)
    Ratio = 0;
  else if (shift < 0)
    Ratio = quotient << abs(shift);
  else
    Ratio = quotient >> shift;

  return Ratio;
} 

INLINE unsigned int div_1616u_923u_527u(unsigned int r0, unsigned int r1)
{

  int i;
  unsigned int r3=0,r2=1,r4,r5;
  U64 a0 = r0;

  r4 = (U32) a0;                //dividend
  r5 = r1;                //divisor


  for (i=0;i<32;i++) {
    if ((U64)r1 <= a0) {
      a0 = a0 - (U64) r1;   // if par.result >= divisor
      r3 = r3 + r2;   // set LSB
    }
    r3 <<= 1;                       // pre result << by 1
    a0 <<= 1L;
  }
  r0 = r3;
  return r0;
}

INLINE unsigned int div_1418u_1418u_032u(unsigned int r0, unsigned int r1)
{

  int i;
  unsigned int r3=0,r2=1,r4,r5;
  U64 a0 = r0;

  r4 = (U32) a0;                //dividend
  r5 = r1;                //divisor


  for (i=0;i<32;i++) {
    if ((U64)r1 <= a0) {
      a0 = a0 - (U64) r1;   // if par.result >= divisor
      r3 = r3 + r2;   // set LSB
    }
    r3 <<= 1;                       // pre result << by 1
    a0 <<= 1L;
  }
  r0 = r3;
  return r0;
}

INLINE unsigned int div_1319u_527u_824u(unsigned int r0, unsigned int r1)
{

  int i;
  unsigned int r3=0,r2=1,r4,r5;
  U64 a0 = r0;

  r4 = (U32) a0;                //dividend
  r5 = r1;                //divisor


  for (i=0;i<32;i++) {
    if ((U64)r1 <= a0) {
      a0 = a0 - (U64) r1;   // if par.result >= divisor
      r3 = r3 + r2;   // set LSB
    }
    r3 <<= 1;                       // pre result << by 1
    a0 <<= 1L;
  }
  r0 = r3;
  return r0;
}

INLINE unsigned int div_320u_320u_131u_1(unsigned int r0, unsigned int r1)
{

  int i;
  unsigned int r3=0,r2=1,r4,r5;
  U64 a0 = r0;

  r4 = (U32) a0;                //dividend
  r5 = r1;                //divisor


  for (i=0;i<32;i++) {
    if ((U64)r1 <= a0) {
      a0 = a0 - (U64) r1;   // if par.result >= divisor
      r3 = r3 + r2;   // set LSB
    }
    r3 <<= 1;                       // pre result << by 1
    a0 <<= 1L;
  }
  r0 = r3 >> 1;
  return r0;
}

INLINE U32  div_230u_248u_230u(U32 r0, U32 r1 )
{

  U64 a0,a1;
  unsigned int r6;
  int i;
  int r2=1,r5;

  r5 = usignbits(r0);
  r6 = usignbits(r1);

  r0   = r0 << r5;
  r1   = r1 << (r6+1);

  a0 = (U64)r0;  // dividend
  a1 = (U64)r1;  // divisor

  r5 += 23;
  r5 = r5 - r6;

  if (r5 > 31) {
    r0 =0;
    goto down;
  }

  r0 =  0;

  for(i=0;i<32;i++) {

    if(a1<=a0){
      a0 -= a1;   // if par.result >= divisor
      r0 = r0 + r2;   // set LSB

    }
    r0 <<= 1;        
    a0 <<= 1L;
  }
  if(r5<0)
    r0 = r0 << abs(r5);

  else
    r0 = r0 >> r5 ;

 down:
  return r0;
}
 
INLINE U32  div_320u_320u_1418u(U32 r0, U32 r1 )
{

  U64 a0,a1;
  unsigned int r6;
  int i;
  int r2=1,r5;

  r5 = usignbits(r0);
  r6 = usignbits(r1);

  r0   = r0 << r5;
  r1   = r1 << (r6+1);

  a0 = (U64)r0;  // dividend
  a1 = (U64)r1;  // divisor

  r5 += 13;
  r5 = r5 - r6;

  if (r5 > 31) {
    r0 =0;
    goto down;
  }

  r0 =  0;

  for(i=0;i<32;i++) {

    if(a1<=a0){
      a0 -= a1;   // if par.result >= divisor
      r0 = r0 + r2;   // set LSB

    }
    r0 <<= 1;        
    a0 <<= 1L;
  }
  if(r5<0)
    r0 = r0 << abs(r5);

  else
    r0 = r0 >> r5 ;

 down:
  return r0;
}

INLINE U32  div_311u_1418u_1814u(U32 r0, U32 r1 )
{

  U64 a0,a1;
  unsigned int r6;
  int i;
  int r2=1,r5;

  r5 = usignbits(r0);
  r6 = usignbits(r1);

  r0   = r0 << r5;
  r1   = r1 << (r6+1);

  a0 = (U64)r0;  // dividend
  a1 = (U64)r1;  // divisor

  r5 = r5 - r6;

  if (r5 > 31) {
    r0 =0;
    goto down;
  }

  r0 =  0;

  for(i=0;i<32;i++) {

    if(a1<=a0){
      a0 -= a1;   // if par.result >= divisor
      r0 = r0 + r2;   // set LSB

    }
    r0 <<= 1;        
    a0 <<= 1L;
  }
  if(r5<0)
    r0 = r0 << abs(r5);

  else
    r0 = r0 >> r5 ;

 down:
  return r0;
}
 
INLINE U32 div_626u_626u_230u(U32 r0, U32 r1 )
{

  U64 a0,a1;
  unsigned int r6;
  int i;
  int r2=1,r5;

  r5 = usignbits(r0);
  r6 = usignbits(r1);

  r0   = r0 << r5;
  r1   = r1 << (r6+1);

  a0 = (U64)r0;  // dividend
  a1 = (U64)r1;  // divisor

  r5 = r5 - r6 +1 ;

  if (r5 > 31) {
    r0 =0;
    goto down;
  }

  r0 =  0;

  for(i=0;i<32;i++) {

    if(a1<=a0){
      a0 -= a1;   // if par.result >= divisor
      r0 = r0 + r2;   // set LSB

    }
    r0 <<= 1;        
    a0 <<= 1L;
  }
  if(r5<0)
    r0 = r0 << abs(r5);

  else
    r0 = r0 >> r5 ;

 down:
  return r0;
}

INLINE U32  div_1616u_302u_428u(U32 r0, U32 r1)
{
  U64 a0,a1;
  unsigned int r6;
  int i;
  int r2,r5;
  r2=1;
  
  r5 = usignbits(r0);
  r6 = usignbits(r1);
  r0   = r0 << r5;
  r1   = r1 << (r6+1);

  a0 = (U64)r0;  // dividend
  a1 = (U64)r1;  // divisor

  r5 += 17;
  r5 = r5 - r6;

  if (r5 > 31) {
    r0 =0;
    goto down;
  }

  r0 =  0;

  for(i=0;i<32;i++) {

    if(a1<=a0){
      a0 -= a1;   // if par.result >= divisor
      r0 = r0 + r2;   // set LSB

    }
    r0 <<= 1;        
    a0 <<= 1L;
  }
  if(r5<0)
    r0 = r0 << abs(r5);

  else
    r0 = r0 >> r5 ;

 down:
  return r0;
}
 
INLINE U32  div_032u_132u(U32 r0, U32 r1, format_struct_t * func_formats)
{
  U64 a0,a1;
  unsigned int r6;
  int i;
  int r2,r4,r7,r5;

  r2 = func_formats->dividend_q;
  r4 = func_formats->divisor_q;
  r7 = func_formats->quotient_q;

  r7 = r7+r2;
  r2=1;

  r7 = r7-r4;
  r7 = r7-r2;

  r5 = usignbits(r0);
  r6 = usignbits(r1);

  r0   = r0 << r5;
  r1   = r1 << (r6+1);

  a0 = (U64)r0;  // dividend
  a1 = (U64)r1;  // divisor

  r5 +=r7;
  r5 = r5 - r6;

  if (r5 > 31) {
    r0 =0;
    goto down;
  }

  r0 =  0;

  for(i=0;i<32;i++) {

    if(a1<=a0){
      a0 -= a1;   // if par.result >= divisor
      r0 = r0 + r2;   // set LSB

    }
    r0 <<= 1;        
    a0 <<= 1L;
  }
  if(r5<0)
    r0 = r0 << abs(r5);

  else
    r0 = r0 >> r5 ;

 down:
  return r0;
}
 
INLINE I32 div_256i_302i_256i(I32 dividend,I32 divisor)
{
    U32 udividend,udivisor,uquotient;
    int sign_num,sign_den,sign_quotient;
    I32 quotient;

    if (dividend <0)
         sign_num = 1;
    else sign_num = 0;

    if (divisor <0)
         sign_den = 1;
    else sign_den = 0;

    sign_quotient = sign_num ^sign_den;

    udividend = abs(dividend);
    udivisor = abs(divisor);

    uquotient = div_U32_by_U16_gen(udividend,udivisor,6,29,25);

    if (sign_quotient==1)
      quotient = -((I32)uquotient);
    else
      quotient = (I32)uquotient;

  return quotient;
}


INLINE unsigned int div_266u_302u_923u(unsigned int r0,unsigned int r1_32)
{

    int i,AQ=0;
    unsigned int rr;
    int r5,r6;
    unsigned short r1;
		if (r1_32 == 0)
			return 0;

    r0    >>= 1;
    r1_32 >>= 1; 
    
    r5 = usignbits(r0);
    r6 = usignbits(r1_32);

    r0   = r0 << r5 ;
    r1_32   = r1_32 << r6 ;

    r5 = r5-4 ;
    r5 = r6-r5;

    if(r5>31)
    {
        r0=0;
        goto down;
    }

    r1_32   = r1_32 >> 16 ;
    r1 = (unsigned short)r1_32;

    for(i=0;i<16;i++)
    {
        // DIVQ
        if(AQ==1)
        {
            rr = (((r0&0xFFFF0000)>>16)+(r1&0xFFFF))<<16;
            AQ =  ((rr&0x80000000)>>31)^((r1&0x8000)>>15);
            rr = rr << 1;
            rr = rr+ ((r0&0xFFFF)<<1)+ (!(AQ&0x1));
        }
        else
        {
            rr = (((r0&0xFFFF0000)>>16)-(r1&0xFFFF))<<16;
            AQ=  ((rr&0x80000000)>>31)^((r1&0x8000)>>15);
            rr = rr <<1;
            rr = rr+ ((r0&0xFFFF)<<1)+ (!(AQ&0x1));
        }
        r0 = rr;
    }

    r0 <<= 16;
    r0 >>= 16;

    if(r5<0)
    r0 = r0 >> abs(r5) ;

    else
    r0 = r0 << r5 ;
    down:
    return r0;

}


/////////////////////////////////////////////////////////////////////////////////
INLINE U32 div_U32_by_U16_gen(unsigned int r0,unsigned int r1_32,int r2,int r4, int r7)
{
///////////////////////////////////////////////////////////////////
//  function name : div_U32_by_U16                               //
//  Parameters    : dividend(32b),divisor(16b),                  //
//                  n_dividend (n of the m:n format of dividend),//
//                  n_divisor  (n of the m:n format of divisor), //
//                  m_quotient (m of the m:n format of quotient) //
//  Description   : this function computes the quotient of the   //
//                       division i.e, dividend/divisor          //
//  Returns       : the quotient of division in specified format //
//  Explanation   :                                              //
//     dexp = 31 - dsign - n_dividend + 1;      // m:n dividend  //
//     sexp = 15 - ssign - n_divisor;           // m:n divisor   //
//     where dsign & ssign are no of extra sign bits of          //
//     dividend and divisor (are needed in normalization)        //
//     shift = m_quotient - dexp + sexp;                         //
//     where shift is the corresponding shift to be done to the  //
//     quotient resulting from division of normalized dividend & //
//     divisor.                                                  //
///////////////////////////////////////////////////////////////////

    int i,AQ=0;
    unsigned int rr;
    int r5,r6;
    unsigned short r1;
		if (r1_32 == 0)
			return 0;

    r7 = r7+r2-r4-17;

    r5 = usignbits(r0);
    r6 = usignbits(r1_32);

    r0   = r0 << r5 ;
    r1_32   = r1_32 << r6 ;

    r5 = r5+r7;
    r5 = r6-r5;

    if(r5>31)
    {
        r0=0;
        goto down;
    }

    r1_32   = r1_32 >> 16 ;
    r1 = (unsigned short)r1_32;

    for(i=0;i<16;i++)
    {
        // DIVQ
        if(AQ==1)
        {
            rr = (((r0&0xFFFF0000)>>16)+(r1&0xFFFF))<<16;
            AQ =  ((rr&0x80000000)>>31)^((r1&0x8000)>>15);
            rr = rr << 1;
            rr = rr+ ((r0&0xFFFF)<<1)+ (!(AQ&0x1));
        }
        else
        {
            rr = (((r0&0xFFFF0000)>>16)-(r1&0xFFFF))<<16;
            AQ=  ((rr&0x80000000)>>31)^((r1&0x8000)>>15);
            rr = rr <<1;
            rr = rr+ ((r0&0xFFFF)<<1)+ (!(AQ&0x1));
        }
        r0 = rr;
    }

    r0 <<= 16;
    r0 >>= 16;

    if(r5<0)
    r0 = r0 >> abs(r5) ;

    else
    r0 = r0 << r5 ;
    down:
    return r0;

}


INLINE I32 div_256i_132i(I32 dividend,I32 divisor)
{
  U32 udividend,udivisor,uquotient;
  int sign_num,sign_den,sign_quotient;
  int dsign,ssign,dexp,sexp,exp1;
  U64 u64quotient;
  I32 quotient;

  if (dividend <0)
    sign_num = 1;
  else sign_num = 0;

  if (divisor <0)
    sign_den = 1;
  else sign_den = 0;

  sign_quotient = sign_num ^sign_den;

  udividend = abs(dividend);
  udivisor = abs(divisor);

  dsign = SIGNBITS(udividend);
  ssign = SIGNBITS(udivisor);

  dexp = 31 - dsign + 1;         //26.6 dividend
  sexp = 31 - ssign;                     //32.0 divisor

  udividend = udividend << (dsign);
  udivisor = udivisor << (ssign+1 );

  /*********************************************/

  uquotient = div_U32_by_U32(udividend,udivisor);
  exp1 =  (dexp - sexp);

  if (exp1 < 0)
    u64quotient = uquotient >> (U64)abs(exp1);
  else
    u64quotient = (U64)uquotient << (I64)exp1;


  /*******DONE WITH FIXED POINT DIVISION *****************/
  uquotient = (U32)(u64quotient>>2L);          
  if (sign_quotient==1)
    quotient = -((I32)uquotient);
  else
    quotient = (I32)uquotient;

  return quotient;
}    


INLINE U64 mult_1616u_1616u_3216u(U32 mult1,U32 mult2)
{
  union short_long m1,m2;
  U64 a1,a0; // ALUs
  unsigned short r1_hu,r1_lu,r2_hu,r2_lu;

  m1.ul = mult1;
  r1_hu = m1.s2ul.sm;
  r1_lu = m1.s2ul.sl;

  m2.ul = mult2;

  r2_hu = m2.s2ul.sm;
  r2_lu = m2.s2ul.sl;

  a1  = (U64)r1_lu*(U64)r2_lu;
  a1  = (U64)(a1 >>16L);

  a0  = (U64)r1_hu*(U64)r2_hu ; a1 += (U64)r1_hu*(U64)r2_lu;
  a1 += (U64)r1_lu*(U64)r2_hu;
  a0  = a0 << 16L;
//  a1  = (U64)(a1 >> 9L);
//  a0  = a0 << 7L;
  a0 += a1;

  return a0;    //result is type U64: format is 33.7
}

INLINE U64 mult_428u_3216u_337u(U32 mult1,U64 mult2)
{
  union short_long m1;
  union short_longlong m2;
  U64 a0,a1;
  unsigned short r0_hu,r2_hu;
  unsigned short r0_lu,r2_lu,r1_lu;

  m1.ul = mult1;

  r0_hu = m1.s2ul.sm;
  r0_lu = m1.s2ul.sl;

  m2.ull = mult2;

  r2_hu = m2.s2ull.sul;
  r2_lu = m2.s2ull.slm;
  r1_lu = m2.s2ull.sll;

  /*  32.16 * 6.26 ---> 38.42 >> 35 ---> 33.7*/
  /////a0  = (U64)((U32)r0_lu * (U32)r1_lu) >> 16L;

  a0 = (U64)((U32)r1_lu * (U32)r0_hu);
  a0 += (U64)((U32)r2_lu * (U32)r0_lu);
  a0 = a0 >> 16L;

  a1 = (U64)((U32)r2_hu * (U32)r0_lu);
  a1 += (U64)((U32)r2_lu * (U32)r0_hu);
  a0 += a1;
  a0 = a0 >> 5L;

  a1  = (U64)((U32)r2_hu * (U32)r0_hu);
  a1 = a1 << 11L;
  a0 += a1;        //contains the 33.7 result;

  return a0;
}

INLINE U64 mult_626u_3216u_337u(U32 mult1,U64 mult2)
{
  union short_long m1;
  union short_longlong m2;
  U64 a0,a1;
  unsigned short r0_hu,r2_hu;
  unsigned short r0_lu,r2_lu,r1_lu;

  m1.ul = mult1;

  r0_hu = m1.s2ul.sm;
  r0_lu = m1.s2ul.sl;

  m2.ull = mult2;

  r2_hu = m2.s2ull.sul;
  r2_lu = m2.s2ull.slm;
  r1_lu = m2.s2ull.sll;

  /*  32.16 * 6.26 ---> 38.42 >> 35 ---> 33.7*/
  /////a0  = (U64)((U32)r0_lu * (U32)r1_lu) >> 16L;

  a0 = (U64)((U32)r1_lu * (U32)r0_hu);
  a0 += (U64)((U32)r2_lu * (U32)r0_lu);
  a0 = a0 >> 16L;

  a1 = (U64)((U32)r2_hu * (U32)r0_lu);
  a1 += (U64)((U32)r2_lu * (U32)r0_hu);
  a0 += a1;
  a0 = a0 >> 3L;

  a1  = (U64)((U32)r2_hu * (U32)r0_hu);
  a1 = a1 << 13L;
  a0 += a1;        //contains the 33.7 result;

  return a0;
}

//U64 mult_428u_3216u_346u(U32 mult1,U64 mult2)
INLINE U64 mult_428u_3216u_2812u(U32 mult1,U64 mult2)
{
  union short_long m1;
  union short_longlong m2;
  U64 a0,a1;
  unsigned short r0_hu,r2_hu;
  unsigned short r0_lu,r2_lu,r1_lu;

  m1.ul = mult1;

  r0_hu = m1.s2ul.sm;
  r0_lu = m1.s2ul.sl;

  m2.ull = mult2;

  r2_hu = m2.s2ull.sul;
  r2_lu = m2.s2ull.slm;
  r1_lu = m2.s2ull.sll;

  /*  32.16 * 6.26 ---> 38.42 >> 35 ---> 33.7*/
  a0  = (U64)((U32)r0_lu * (U32)r1_lu);
  a0 = a0 >> 16L;

  a0 += (U64)((U32)r1_lu * (U32)r0_hu);
  a0 += (U64)((U32)r2_lu * (U32)r0_lu);
  a0 = a0 >> 16L;

  a1 = (U64)((U32)r2_hu * (U32)r0_lu);
  a1 += (U64)((U32)r2_lu * (U32)r0_hu);
  a0 += a1;

  a1  = (U64)((U32)r2_hu * (U32)r0_hu);
  a1 = a1 << 16;
  a0 += a1;        //contains the 33.7 result;
  return a0;
}

INLINE U64 mult_626u_3216u_2812u(U32 mult1,U64 mult2)  //used in computenoise
{
  union short_long m1;
  union short_longlong m2;
  U64 a0,a1;
  unsigned short r0_hu,r2_hu;
  unsigned short r0_lu,r2_lu,r1_lu;

  m1.ul = mult1;

  r0_hu = m1.s2ul.sm;
  r0_lu = m1.s2ul.sl;

  m2.ull = mult2;

  r2_hu = m2.s2ull.sul;
  r2_lu = m2.s2ull.slm;
  r1_lu = m2.s2ull.sll;

  /*  32.16 * 6.26 ---> 38.42 >> 35 ---> 33.7*/
  a0  = (U64)((U32)r0_lu * (U32)r1_lu) >> 16L;

  a0 += (U64)((U32)r1_lu * (U32)r0_hu);
  a0 += (U64)((U32)r2_lu * (U32)r0_lu);
  a0 = a0 >> 14L;

  a1 = (U64)((U32)r2_hu * (U32)r0_lu);
  a1 += (U64)((U32)r2_lu * (U32)r0_hu);
  a1 = a1 << 2L;
  
  a0 += a1;

  a1  = (U64)((U32)r2_hu * (U32)r0_hu);
  a1 = a1 << 18L;
  a0 += a1;        //contains the 33.7 result;

  return a0;
}

INLINE U32 mult_131u_2812u_266u(U32 mult1,U64 mult2)  //used in computenoise
{
  union short_long m1;
  union short_longlong m2;
  U64 a0,a1;
  unsigned short r0_hu,r2_hu;
  unsigned short r0_lu,r2_lu,r1_lu;

  m1.ul = mult1;

  r0_hu = m1.s2ul.sm;
  r0_lu = m1.s2ul.sl;

  m2.ull = mult2;

  r2_hu = m2.s2ull.sul;
  r2_lu = m2.s2ull.slm;
  r1_lu = m2.s2ull.sll;

  /*  32.16 * 6.26 ---> 38.42 >> 35 ---> 33.7*/
  a0  = (U64)((U32)r0_lu * (U32)r1_lu) >> 16L;

  a0 += (U64)((U32)r1_lu * (U32)r0_hu);
  a0 += (U64)((U32)r2_lu * (U32)r0_lu);
  a0 = a0 >> 16L;

  a1 = (U64)((U32)r2_hu * (U32)r0_lu);
  a1 += (U64)((U32)r2_lu * (U32)r0_hu);
  
  a0 += a1;
  a0 = a0 >> 5L;

  a1  = (U64)((U32)r2_hu * (U32)r0_hu);
  a1 = a1 << 11L;
  a0 += a1;        //contains the 26.6 result;

  return (U32)a0;
}
#endif

#endif /*_BASICOPS_H */


