#include #include #include #include #include #include #include #include #include #define imageSize 512 #define slabSize 128 // imageSize / 4; int threads; fftw_plan plan; double DataR[imageSize][imageSize][imageSize]; double DataC[imageSize][imageSize][imageSize]; double COSINE[imageSize], SINE[imageSize]; /***********************************************************/ /* OBJECT GENERATING FUNCTIONS. */ /***********************************************************/ /****************************************/ /* Make a 3D cube object. */ /* Do not modify - RAM. */ /****************************************/ void MAKE_CUBE(int size) { int i, j, k; int start = (imageSize/2)-(size / 2); int stop = (imageSize/2)+(size / 2); // Clear array. for (j = 0; j < imageSize; j++) { for (i = 0; i < imageSize; i++) { for (k = 0; k < imageSize; k++) { DataR[j][i][k] = 0.0; DataC[j][i][k] = 0.0; } } } // Build cube. for (j = start; j < stop; j++) { for (i = start; i < stop; i++) { for (k = start; k < stop; k++) { DataR[j][i][k] = 255.0; } } } } /****************************************************************/ /* Fourier Transform subroutines. */ /****************************************************************/ /****************************************/ /* FFTW_THREE_D_FFT */ /* - 3D Fast Fourier transform using */ /* 3D FFTWs. */ /* - Flag specifies if a forward FFT */ /* (1) or an inverse FFT (IFFT) (-1) */ /* is calculated. */ /* Do not modify - RAM. */ /****************************************/ void FFTW_THREE_D_FFT_3(double Flag) { int i, j, k, z; fftw_complex *in, *out; // Enable threads. fftw_init_threads(); fftw_plan_with_nthreads(threads); // Allocate local space. in = (fftw_complex*) fftw_malloc(sizeof(fftw_complex) * imageSize * imageSize * imageSize); out = (fftw_complex*) fftw_malloc(sizeof(fftw_complex) * imageSize * imageSize * imageSize); // Define FFT "plan". if (Flag == 1) { plan = fftw_plan_dft_3d(imageSize, imageSize, imageSize, in, out, FFTW_FORWARD, FFTW_ESTIMATE); } else { plan = fftw_plan_dft_3d(imageSize, imageSize, imageSize, in, out, FFTW_BACKWARD, FFTW_ESTIMATE); } // Populate "in" array. z = 0; for (j = 0; j < imageSize; j++) { for (i = 0 ; i < imageSize; i++) { for (k = 0; k < imageSize; k++) { in[z] = DataR[j][i][k] + _Complex_I * DataC[j][i][k]; z++; } } } fftw_execute(plan); // Extract data from "out" array. z = 0; for (j = 0; j < imageSize; j++) { for (i = 0 ; i < imageSize; i++) { for (k = 0; k < imageSize; k++) { DataR[j][i][k] = creal(out[z]); DataC[j][i][k] = cimag(out[z]); z++; } } } // Free local space. fftw_free(in); fftw_free(out); fftw_destroy_plan(plan); fftw_cleanup_threads(); } /****************************************/ /* SLAB_FFT */ /* - Generates a 2D FFT using MPI code. */ /****************************************/ void SLAB_FFT(double Flag) { int i, j, k, z, s; fftw_complex *in, *out; // Enable threads. fftw_init_threads(); fftw_plan_with_nthreads(threads); // Allocate local space. in = (fftw_complex*) fftw_malloc(sizeof(fftw_complex) * imageSize * imageSize); out = (fftw_complex*) fftw_malloc(sizeof(fftw_complex) * imageSize * imageSize); // Define FFT "plan". if (Flag == 1) { plan = fftw_plan_dft_2d(imageSize, imageSize, in, out, FFTW_FORWARD, FFTW_ESTIMATE); } else { plan = fftw_plan_dft_2d(imageSize, imageSize, in, out, FFTW_BACKWARD, FFTW_ESTIMATE); } // Loop over all slices. for (s = 0; s < slabSize; s++) { // Populate "in" array. z = 0; for (j = 0; j < imageSize; j++) { for (i = 0; i < imageSize; i++) { in[z] = DataR[j][i][s] + _Complex_I * DataC[j][i][s]; z++; } } fftw_execute(plan); // Extract data from "out" array. z = 0; for (j = 0; j < imageSize; j++) { for (i = 0; i < imageSize; i++) { DataR[j][i][s] = creal(out[z]); DataC[j][i][s] = cimag(out[z]); z++; } } } // Free local space. fftw_free(in); fftw_free(out); fftw_destroy_plan(plan); fftw_cleanup_threads(); } /****************************************/ /* SLICE_FFT */ /* - Generates a 2D FFT using MPI code. */ /****************************************/ void *SLICE_THREAD(void *t) { int i, j, k, z, s; int slizeSize = (slabSize/threads); fftw_complex *in, *out; sleep(5); /* // Allocate local space. in = (fftw_complex*) fftw_malloc(sizeof(fftw_complex) * imageSize * imageSize); out = (fftw_complex*) fftw_malloc(sizeof(fftw_complex) * imageSize * imageSize); // Loop over all local slices. for (s = 0; s < slizeSize; s++) { // Populate "in" array. z = 0; for (j = 0; j < imageSize; j++) { for (i = 0; i < imageSize; i++) { in[z] = DataR[j][i][s] + _Complex_I * DataC[j][i][s]; z++; } } fftw_execute(plan); // Extract data from "out" array. z = 0; for (j = 0; j < imageSize; j++) { for (i = 0; i < imageSize; i++) { DataR[j][i][s] = creal(out[z]); DataC[j][i][s] = cimag(out[z]); z++; } } } // Free local space. fftw_free(in); fftw_free(out); */ pthread_exit((void*) t); } void SLICE_FFT(double Flag) { int t; void *status; pthread_t FFT_THREAD[16]; pthread_attr_t attr; fftw_complex *in, *out; // Define FFT "plan". if (Flag == 1) { plan = fftw_plan_dft_2d(imageSize, imageSize, in, out, FFTW_FORWARD, FFTW_ESTIMATE); } else { plan = fftw_plan_dft_2d(imageSize, imageSize, in, out, FFTW_BACKWARD, FFTW_ESTIMATE); } // Initialize and set thread detached attribute. pthread_attr_init(&attr); pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE); // Loop over all slices. for (t = 0; t < threads; t++) { pthread_create(&FFT_THREAD[t], &attr, SLICE_THREAD, (void *)t); printf("Thread %d started.\n", t); } for (t = 0; t < threads; t++) { pthread_join(FFT_THREAD[t], &status); printf("Thread %d returned.\n", status); } fftw_destroy_plan(plan); } /***********************************************************/ /* MAIN. */ /* Do not modify - RAM. */ /***********************************************************/ int main(void) { time_t startTime, endTime; // Make data set. MAKE_CUBE(32); // Benchmark with threaded 3d FFTW. startTime = time(NULL); threads = 1; SLICE_FFT(1.0); endTime = time(NULL); printf("FFT runtime: %lf (sec)\n", difftime(endTime, startTime)); startTime = time(NULL); threads = 2; SLICE_FFT(1.0); endTime = time(NULL); printf("FFT runtime: %lf (sec)\n", difftime(endTime, startTime)); startTime = time(NULL); threads = 4; SLICE_FFT(1.0); endTime = time(NULL); printf("FFT runtime: %lf (sec)\n", difftime(endTime, startTime)); startTime = time(NULL); threads = 8; SLICE_FFT(1.0); endTime = time(NULL); printf("FFT runtime: %lf (sec)\n", difftime(endTime, startTime)); startTime = time(NULL); threads = 16; SLICE_FFT(1.0); endTime = time(NULL); printf("FFT runtime: %lf (sec)\n", difftime(endTime, startTime)); return 1; }