190 likes | 387 Views
Chapter 7 - Performance. Understanding the PIC32 Clock and learning to optimize the memory interface for maximum performance. PIC32MX3xx Clock Module. The OSCCON register. Input Divider. PLL. Output Divider. System Clock. 8 MHz. 1 : 2. 4 MHz. 1 x 18. 72 MHz. 1 : 1. 72 MHz.
E N D
Chapter 7 - Performance Understanding the PIC32 Clock and learning to optimize the memory interface for maximum performance
Input Divider PLL Output Divider System Clock 8 MHz 1 : 2 4 MHz 1 x 18 72 MHz 1 : 1 72 MHz Primary Oscillator Clock Chain
PIC32MX3xx Configuration Settings #pragma config POSCMOD=XT, FNOSC=PRIPLL #pragma config FPLLIDIV=DIV_2, FPLLMUL=MUL_18, FPLLODIV=DIV_1 #pragma config FPBDIV=DIV_2, FWDTEN=OFF, CP=OFF, BWP=OFF NOTE: All PIC32MX devices are currently released to operate at frequencies up to 80 MHz. FPLLMUL = MUL_20 gives the required multiplier.
Fast Fourier Transform // input vector unsigned char inB[N_FFT]; // input complex vector float xr[N_FFT]; float xi[N_FFT]; // Fast Fourier Transformation void FFT( void) { int m, k, i, j; float a,b,c,d,wwr,wwi,pr,pi; // FFT loop m = N_FFT/2; j = 0; while( m > 0) { /* log(N) cycle */ k = 0; while( k < N_FFT) { // batterflies loop for( i=0; i<m; i++) { // batterfly a = xr[i+k]; b = xi[i+k]; c = xr[i+k+m]; d = xi[i+k+m]; wwr = wr[i<<j]; wwi = wi[i<<j]; pr = a-c; pi = b-d; xr[i+k] = a + c; xi[i+k] = b + d; xr[i+k+m] = pr * wwr - pi * wwi; xi[i+k+m] = pr * wwi + pi * wwr; } // for i k += m<<1 ; } // while k m >>= 1; j++; } // while m } // FFT
Windowing // apply Hann window to input vector void windowFFT( unsigned char *s) { int i; float *xrp, *xip, *wwp; // apply window to input signal xrp=xr; xip=xi; wwp=ww; for( i=0; i<N_FFT; i++) { *xrp++ = (*s++ - 128) * (*wwp++); *xip++ = 0; } // for i } // windowFFT
Computing Power and Scaling void powerScale( unsigned char *r) { int i, j; float t, max; float xrp, xip; // compute signal power (in place) and find maximum max = 0; for( i=0; i<N_FFT/2; i++) { j = rev[ i]; xrp = xr[ j]; xip = xi[ j]; t = xrp*xrp + xip*xip; xr[ j]=t; if ( t > max) max = t; } // bit reversal, scaling of output vector as unsigned char max = 255.0/max; for( i=0; i<N_FFT/2; i++) { t = xr[ rev[i]] * max; *r++ = t; } } // powerScale
Initialization // input vector unsigned char inB[N_FFT]; volatile int inCount; // rotation vectors float wr[N_FFT/2]; float wi[N_FFT/2]; // bit reversal vector short rev[N_FFT/2]; // window float ww[N_FFT]; void initFFT( void) { int i, m, t, k; float *wwp; for(i=0; i<N_FFT/2; i++) { // rotations wr[i] = cos(PI2N * i); wi[i] = sin(PI2N * i); // bit reversal t = i; m = 0; k = N_FFT-1; while (k>0) { m = (m << 1) + (t & 1); t = t >> 1; k = k >> 1; } rev[i] = m; } // for i // initialize Hanning window vector for( wwp=ww, i=0; i<N_FFT; i++) *wwp++ = 0.5 - 0.5 * cos(PI2N * i); } // initFFT
“FFT.h” /* ** FFT.h ** ** power of two optimized algorithm */ #include <math.h> #define N_FFT 256 // samples must be power of 2 #define PI2N 2 * M_PI / N_FFT extern unsigned char inB[]; extern volatile int inCount; // preparation of the rotation vectors void initFFT( void); // input window void windowFFT( unsigned char *source); // fast fourier transform void FFT( void); // compute power and scale output void powerScale( unsigned char *dest);
The Running Project /* ** Run.c ** */ // configuration bit settings #pragma config POSCMOD=XT, FNOSC=PRIPLL #pragma config FPLLIDIV=DIV_2, FPLLMUL=MUL_18, FPLLODIV=DIV_1 #pragma config FWDTEN=OFF, CP=OFF, BWP=OFF #include <p32xxxx.h> #include <plib.h> #include "fft.h“ main() { int i, t; double f; // 1. initializations initFFT(); // test sinusoid for (i=0; i<N_FFT; i++) { f = sin(2 * PI2N * i ); inB[ i] = 128 + ( unsigned char) (120.0 * f); } // for // 2. perform FFT algorithm windowFFT( inB); FFT(); powerScale( inB); // 3. infinite loop while( 1); } // main
Capturing Time // init 32-bit timer4/5 OpenTimer45( T4_ON | T4_SOURCE_INT, 0); // clear the 32-bit timer count WriteTimer45( 0); // insert FFT function calls here // read the timer count t = ReadTimer45();
Optimizing WaitStates SYSTEMConfigWaitStatesAndPB(72000000L);
Turning the Cache ON SYSTEMConfigWaitStatesAndPB(72000000L); CheKseg0CacheOn();
Enabling Pre-Fetch SYSTEMConfigWaitStatesAndPB(72000000L); CheKseg0CacheOn(); mCheConfigure( CHECON | 0x30);
Remove RAM Wait States // configure PB frequency and the number of wait states SYSTEMConfigWaitStatesAndPB(72000000L); // enable the cache for max performance CheKseg0CacheOn(); // enable instruction prefetch mCheConfigure( CHECON | 0x30); // disable RAM wait states mBMXDisableDRMWaitState(); SYSTEMConfigPerformance( 72000000L);