Skip to content
Snippets Groups Projects
Commit 6a53453d authored by Milo Craun's avatar Milo Craun
Browse files

fixed merge

parents dff2d460 d3509e2a
No related branches found
No related tags found
No related merge requests found
......@@ -121,6 +121,11 @@ We need to go through and see what is going on first.
## Found the samples that could be vectorized on both x86 and Arm
Ran these simulations and put results in res folder.
#2024-04-29 Davis and Milo
##Paper started
We have started the final paper and have decided to look into other benchmarks and cpu configurations.
Davis put a matrix multipy benchmark in the benchmarks folder.
# 2024-04-30 Milo
## Added image processing and results
I added 2 more image processing programs and sim results for them
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#define num 2000 //matrix size
//#include <valgrind/callgrind.h>
//clock_t start, end;
//double cpu_time_used; //cannot use time.h in syscall emulation mode of GEM5. Must check the time in stats.txt
void print(int** printer, int N){
for(int i = 0; i < N; i++){
printf("[");
for(int j = 0; j < N; j++){
printf("%d", printer[i][j]);
printf(" ");
}
printf("]\n");
}
}
void matmul_unopt(int** mat_A , int** mat_B, int** product_unopt, int N)
{
for (int i = 0; i < N; i++) {
for (int k= 0; k < N; k++) {
//#pragma GCC unroll 8
for (int j = 0; j < N;j++)
{
product_unopt[i][j] += mat_A[i][k] * mat_B[k][j];
}
}
}
return;
}
void matmul_opt(int** mat_A , int** mat_B, int** product_opt, int N)
{
for (int i = 0; i < N; i++) {
for (int j = 0; j < N; j++) {
for (int k = 0; k < N; k++)
product_opt[i][j] += mat_A[i][k] * mat_B[k][j];
}
}
return;
}
void correctness_test(int** product_unopt, int** product_opt, int N){
int threshold = 0;
//(10^-6), ideally both should be equal but giving this room because of the single precison inting points)
for (int i = 0; i < N; i++) {
for (int j = 0; j < N; j++) {
if( fabsf( fabs(product_unopt[i][j]) - fabsf(product_opt[i][j])) > threshold){
//printf("Optimized implementation is incorrect\n");
return;
}
}
}
printf("The implementation is correct\n");
return;
}
int main (int argc, char *argv[])
{
//srand((unsigned int)time(NULL));
//srand won't work in the SE mode of GEM5. Going to just initialize matrices using iterative variables
//read the size of the square matrix from command line
//if (argc > 1)
//{
// char *a = argv[1];
// num = atoi(a);
//}
//else
//setting matrix size to 64 for now.
// int num = 2000; //default
float min =1;
float max =100;
printf("Generating matrices of size %d * %d \n",num,num);
int **mat_A = (int **)malloc(num * sizeof(int *));
int **mat_B = (int **)malloc(num * sizeof(int *));
int **product_unopt = (int **)malloc(num * sizeof(int *));
int **product_opt = (int **)malloc(num * sizeof(int *));
for (int i=0; i<num; i++){
mat_A[i] = (int *)malloc(num * sizeof(int));
mat_B[i] = (int *)malloc(num * sizeof(int));
product_unopt[i] = (int *)malloc(num * sizeof(int));
product_opt[i] = (int *)malloc(num * sizeof(int));
}
//
int putin;
for( int i=0; i<num; i++){
for(int j=0; j<num; j++){
putin = ((max - min) * ((float)rand() / RAND_MAX)) + min;
// printf("%f\n", ((max - min) * ((float)rand() / RAND_MAX)) + min);
// printf("%d\n", putin);
mat_A[i][j]= putin;
// printf("%d\n", mat_A[i][j]);
mat_B[i][j]= ((max - min) * ((float)rand() / RAND_MAX)) + min;
// mat_A[i][j]= i-j;
// mat_B[i][j]= 1;
product_unopt[i][j]=0;
product_opt[i][j]=0;
// printf("%d\n", mat_A[i][j]);
}
}
// print(mat_A, num);
// print(mat_B, num);
printf("computing the results\n");
//compute the product
//TODO: add timers here to measure execution time
// start = clock();
//CALLGRIND_START_INSTRUMENTATION;
matmul_unopt(mat_A, mat_B, product_unopt, num);
//CALLGRIND_STOP_INSTRUMENTATION;
// end = clock();
// cpu_time_used = ((end - start)) / CLOCKS_PER_SEC;
//printf("order i j k took %f seconds to execute \n", cpu_time_used);
//start = clock();
// matmul_opt(mat_A, mat_B, product_unopt, num);
//end = clock();
//cpu_time_used = ((double) (end - start)) / CLOCKS_PER_SEC;
// printf("order j k i took %f seconds to execute \n", cpu_time_used);
//return 0;
//TODO: add timers here to measure execution time
matmul_opt(mat_A, mat_B, product_opt, num);
// printf("matmul unopt: \n");
// print(product_unopt, num);
// printf("matmul opt: \n");
// print(product_opt, num);
correctness_test(product_unopt, product_opt, num);
// printf("printng result \n");
// for (int i = 0; i < num; i++) {
// for (int j = 0; j < num; j++) {
// printf("%f ", product_opt[i][j]);
// }
// printf("\n");
// }
for (int i=0; i<num; i++)
{
free(mat_A[i]);
free(mat_B[i]);
free(product_unopt[i]);
//free(product_opt[i]);
}
free(mat_A);
free(mat_B);
free(product_unopt);
//free(product_opt);
return (0);
}
File added
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment