//------------------------------------------------------------------------------
// (c) 01-2002 Gottfried Chen
//------------------------------------------------------------------------------

#ifndef FFT_ROUTINES_H
#define FFT_ROUTINES_H

//#include "water/complex.h"
//#include "mathlib/nmath.h"

// disable double to float conversion warning
#pragma warning(disable:4244)
//#pragma once

#include "water/complex.h"


// This file contains my FFT implementations. You shouldn't use them directly,
// but rather use the Fft classes. Like that you can take advantage of the
// FFTW library.

// Represents nComplex unit roots. 
class UnitRoot
{
public:
    // Generates the powers of the <n>-th unit root.
    UnitRoot(unsigned int n);

    ~UnitRoot();

    // Gets the power <p> of the unit root. <p> must be in [0, n].
    // All other powers are periodic. If <p> is not within the valid range
    // the results are undefined.
    const nComplex& pow(unsigned int p) const;
    
    // Returns pow(1).
    operator const nComplex&() const;

private:
	nComplex* mPowers;
    //nComplex mPowers[32];
    unsigned int mN;
};


//------------------------------------------------------------------------------
//inline UnitRoot::UnitRoot(unsigned int n) :
inline UnitRoot::UnitRoot(unsigned int n) :
//------------------------------------------------------------------------------
mN(n)
{
    mPowers = n_new (nComplex[n]);

	n_assert(mPowers);
    for (unsigned int i = 0; i<n; ++i)
    {
        mPowers[i] = nComplex(n_cos((6.283185307179586476925286766559f)*i/(float)n),
                                     n_sin((6.283185307179586476925286766559f)*i/(float)n));
//n_printf("ur %d %d\n",mPowers[i].real(),mPowers[i].imag());
    }
}

//------------------------------------------------------------------------------
inline UnitRoot::~UnitRoot()
//------------------------------------------------------------------------------
{
    n_delete_array(mPowers);
}

//------------------------------------------------------------------------------
inline const nComplex& UnitRoot::pow(unsigned int p) const
//------------------------------------------------------------------------------
{
    //GE_ASSERT(0<=p && p<=mN);
    return mPowers[p];
}

//------------------------------------------------------------------------------
inline UnitRoot::operator const nComplex&() const
//------------------------------------------------------------------------------
{
    return pow(1);
}
  
// A collection of fast fourier transform functions.
class FftRoutines
{
public:
	FftRoutines();
	// This function does FFT in one dimension. I've implemented it acording to
    // a paper by Paul Heckbert.
    // <data> contains 2^<sizePower> elements. <stride> is the stride between
    // two array elements.
    // <sign> is the sign of 2*PI*i in the fourier transformation.
    static void fft(nComplex* data, unsigned int sizePower, int sign = 1, unsigned int stride = 1);
    // 2D FFT. <data> is assumed to be an array of rows with 2^<rowPower> rows and
    // 2^<columnPower> columns.
    // <sign> is the sign of 2*PI*i in the fourier transformation.
    static void fft2D(nComplex* data, unsigned int rowPower, unsigned int columnPower, int sign = 1);


    // This routine calculates a DFT using ordinary sums. I've used it to verify,
    // that my FFT routine is correct. <stride> is the stride between
    // two array elements.
    // <sign> is the sign of 2*PI*i in the fourier transformation.
    static void dft(nComplex* data, nComplex* result, unsigned int sizePower,
             int sign = 1, unsigned int stride = 1);
    // 2D DFT using ordinary sums.
    // <sign> is the sign of 2*PI*i in the fourier transformation.
    static void dft2D(nComplex* data, nComplex* result,
               unsigned int rowPower, unsigned int columnPower, int sign = 1);


    // This routine is taken from "Numerical recipes in C" and does FFT in one
    // dimension. Data indices start at 1!! I didn't really understand how it
    // works and have used it only for verifying the results of my FFT routine.
//    static
//    void four1(float* data, unsigned int nn, int isign);

//    static
//    void unitTest();
//	inline static void inter(unsigned int i){FftRoutines::m_log=i;};

private:
    // <value> = 00000110, <limit> = 3 returns 00000011.
    static unsigned int reflect(unsigned int value, unsigned int limit);

    // Reflect the indices of the elements stored in array <data> and
    // reorder the entries according to the reflected indices.
    static void reorder(nComplex* data, unsigned int sizePower, unsigned int stride = 1);
	static void swap(nComplex&x, nComplex&y);
	static void swap(float&x, float&y);
	static unsigned int getlogSize(int j);
	static unsigned int m_log;
};

#ifdef __VC__
#pragma optimize("agt", on)
#endif
inline unsigned int FftRoutines::getlogSize(int j)
{
	unsigned int m_logSize =0;
	int i =j;
	while(i>0)//calculate back log2 of sizePower
	{i=i>>1;m_logSize++;}
	m_logSize--;
	//m_log=m_logSize;
//	m_log=m_logSize;
	return m_logSize;
}

#ifdef __VC__
#pragma optimize("agt", on)
#endif
//template<class T> 
//------------------------------------------------------------------------------
inline unsigned int FftRoutines::reflect(unsigned int value, unsigned int limit)
//------------------------------------------------------------------------------
{
    unsigned int ret(0);
    for (unsigned int i = 0; i<limit; ++i)
    {
        ret <<= 1;
        ret |= 1&value;
        value >>= 1;
    }

    return ret;
}

#ifdef __VC__
#pragma optimize("agt", on)
#endif
inline void FftRoutines::swap(nComplex&x, nComplex&y){nComplex t=x;x=y;y=t;}

#ifdef __VC__
#pragma optimize("agt", on)
#endif
inline void FftRoutines::swap(float&x, float&y){float t=x;x=y;y=t;}

#ifdef __VC__
#pragma optimize("agt", on)
#endif
//template<class T> 
//------------------------------------------------------------------------------
inline void FftRoutines::reorder(nComplex* data, unsigned int sizePower, unsigned int stride)
//------------------------------------------------------------------------------
{
    //unsigned int n(1<<sizePower);// 2^5 = 32
    unsigned int j;
	unsigned int logSize = getlogSize(sizePower);
    for (unsigned int i = 0; i<sizePower; ++i)
    {
        j = reflect(i, logSize);
        if (i < j)
            swap(data[i*stride], data[j*stride]);
    }
}

#ifdef __VC__
#pragma optimize("agt", on)
#endif
//template<class T>
//------------------------------------------------------------------------------
inline void FftRoutines::dft(nComplex* data, nComplex* result, unsigned int sizePower,
                 int sign, unsigned int stride)
//------------------------------------------------------------------------------
{
//    unsigned int n = 1<<sizePower;// 2^5 = 32
//    static UnitRoot w(n);
	UnitRoot w(sizePower);
    
    for (unsigned int i = 0; i<sizePower; ++i)
    {
        result[i*stride] = 0.0f;
        for (unsigned int j = 0; j<sizePower; ++j)
        {
            result[i*stride] += w.pow((sign*i*j)&(sizePower-1))*data[j*stride];
        }
    }
}

#ifdef __VC__
#pragma optimize("agt", on)
#endif
//template<class T>
//------------------------------------------------------------------------------
inline void FftRoutines::fft(nComplex* data, unsigned int sizePower, int sign,
                 unsigned int stride)
//------------------------------------------------------------------------------
{
    reorder(data, sizePower, stride);

//    unsigned int n(1<<sizePower);// 2^5 = 32
    UnitRoot w(sizePower); //32
	unsigned int i=sizePower;
    
    unsigned int bd(1); // Butterfly delta
    unsigned int bs(2); // Butterfly step
    unsigned int wm(sizePower>>1); // Unit root multiplier  //32 >> 1 =16
    
    nComplex temp;
    unsigned int ci, ni; // Current index, next index in the butterfly


	unsigned int logSize = getlogSize(sizePower);
//n_printf("%d logSize",logSize);
    for (unsigned int level = 0; level < logSize; ++level)//size log samplesize
    {
        for (unsigned int i = 0; i<sizePower; i += bs)
        {
            for(unsigned int j = 0; j<bd; ++j)
            {
                ci = stride*(i+j);
                ni = stride*(i+j+bd);
                
                // (j*wm)&(n-1) = (j*wm)%n if n is a power of 2
                temp = w.pow((sign*j*wm)&(sizePower-1))*data[ni];
                
                data[ni] = data[ci] - temp;
                data[ci] += temp;    
            }
        }
        bd <<= 1;
        bs <<= 1;
        wm >>= 1;
    }
}

/*
#ifdef __VC__
#pragma optimize("agt", on)
#endif
//template<class T>
//------------------------------------------------------------------------------
inline void FftRoutines::four1(float* data, unsigned int nn, int isign)
//------------------------------------------------------------------------------
{
    unsigned long n, mmax, m, j, istep, i;
    double wtemp, wr, wpr, wpi, wi, theta;
    float tempr, tempi;

    n = nn<<1;
    j = 1;
    for (i = 1; i<n; i+=2)
    {
        if (j>i)
        {
            swap(data[j], data[i]);
            swap(data[j+1], data[i+1]);
        }
        m = n>>1;
        while(m>=2 && j>m)
        {
            j -= m;
            m>>=1;
        }
        j+=m;
    }

    mmax = 2;
    while (n>mmax)
    {
        istep = mmax<<1;
        theta = isign*(6.283185307179586476925286766559/mmax);
        wtemp = n_sin(0.5*theta);
        wpr = -2.0*wtemp*wtemp;
        wpi = n_sin(theta);
        wr = 1.0;
        wi = 0.0;
        for (m = 1; m<mmax; m += 2)
        {
            for(i = m; i<=n; i += istep)
            {
                j = i+mmax;
                tempr = wr*data[j] - wi*data[j+1];
                tempi = wr*data[j+1] + wi*data[j];
                data[j] = data[i] - tempr;
                data[j+1] = data[i+1] - tempi;
                data[i] += tempr;
                data[i+1] += tempi;
            }
            wr = (wtemp = wr)*wpr - wi*wpi + wr;
            wi = wi*wpr + wtemp*wpi + wi;
        }
        mmax = istep;
    }
}
*/
#ifdef __VC__
#pragma optimize("agt", on)
#endif
//template<class T>
//------------------------------------------------------------------------------
inline void FftRoutines::dft2D(nComplex* data, nComplex* result,
                   unsigned int rowPower, unsigned int columnPower,
                   int sign)
//------------------------------------------------------------------------------
{
    //unsigned int rows = 1<<rowPower;// 2^5 = 32
    //unsigned int columns = 1<<columnPower;// 2^5 = 32

    // Do FFT for all rows.
    nComplex* s = data;
    nComplex* d = result;
    unsigned int i;
    for (i = 0; i<rowPower; ++i)
    {
        dft(s, d, columnPower, sign);
        s += columnPower;
        d += columnPower;
    }

    // Do FFT for all columns.
    s = data;
    d = result;
    for (i = 0; i<columnPower; ++i)
    {
        dft(s, d, rowPower, sign, columnPower);
        ++s; ++d;
    }
}
#ifdef __VC__
#pragma optimize("agt", on)
#endif
//template<class T>
//------------------------------------------------------------------------------
inline void FftRoutines::fft2D(nComplex* data, unsigned int rowPower, unsigned int columnPower,
                   int sign)
//------------------------------------------------------------------------------
{
    //unsigned int rows = 1<<rowPower;// 2^5 = 32
    //unsigned int columns = 1<<columnPower;// 2^5 = 32
    
    // Do FFT for all rows.
    nComplex* ptr = data;
    unsigned int i;
    for (i = 0; i<rowPower; ++i)
    {
        fft(ptr, columnPower, sign);
        ptr += columnPower;
    }

    // Do FFT for all columns.
    ptr = data;
    for (i = 0; i<columnPower; ++i)
    {
        fft(ptr, rowPower, sign, columnPower);
        ++ptr;
    }
}

//unsigned int FftRoutines::m_log=0;

//#include "water/FftRoutines.inl"
#endif // FFT_ROUTINES_H