fixed merge

6a53453d · Milo Craun · dff2d460 · d3509e2a · 6a53453d · 6a53453d
Commit 6a53453d authored 10 months ago by Milo Craun
--- a/README.md
+++ b/README.md
@@ -121,6 +121,11 @@ We need to go through and see what is going on first.
 ## Found the samples that could be vectorized on both x86 and Arm
 Ran these simulations and put results in res folder.

+#2024-04-29 Davis and Milo
+##Paper started
+We have started the final paper and have decided to look into other benchmarks and cpu configurations. 
+Davis put a matrix multipy benchmark in the benchmarks folder. 
+
 # 2024-04-30 Milo
 ## Added image processing and results
 I added 2 more image processing programs and sim results for them
--- a/benchmarks/matmul.c
+++ b/benchmarks/matmul.c
+#include <stdio.h>
+#include <stdlib.h>
+#include <math.h>
+#define num 2000 //matrix size
+//#include <valgrind/callgrind.h>
+//clock_t start, end;
+//double cpu_time_used; //cannot use time.h in syscall emulation mode of GEM5. Must check the time in stats.txt 
+
+
+void print(int** printer, int N){
+
+  for(int i = 0; i < N; i++){
+    printf("[");
+    for(int j = 0; j < N; j++){
+      printf("%d", printer[i][j]);
+      printf(" ");
+    }
+    printf("]\n");
+  }
+  
+}
+
+
+
+void matmul_unopt(int**  mat_A , int** mat_B, int** product_unopt, int N)
+{
+    for (int i = 0; i < N; i++) {
+        for (int  k= 0; k < N; k++) {
+            //#pragma GCC unroll 8
+            for (int j = 0; j < N;j++)
+			{
+                product_unopt[i][j] += mat_A[i][k] * mat_B[k][j];
+			} 
+		}
+    }
+    return;
+}
+ 
+void matmul_opt(int** mat_A , int** mat_B, int** product_opt, int N)
+{
+     for (int i = 0; i < N; i++) {
+        for (int j = 0; j < N; j++) {
+            for (int k = 0; k < N; k++)
+                product_opt[i][j] += mat_A[i][k] * mat_B[k][j];
+        }
+    }
+    return;
+}
+
+
+void correctness_test(int** product_unopt, int** product_opt, int N){
+     
+      int threshold = 0; 
+      //(10^-6), ideally both should be equal but giving this room because of the single precison inting points)
+      for (int i = 0; i < N; i++) {
+        for (int j = 0; j < N; j++) {
+	  if( fabsf( fabs(product_unopt[i][j]) - fabsf(product_opt[i][j])) > threshold){
+		//printf("Optimized implementation is incorrect\n");
+		return;
+	  }
+	}
+      }
+      printf("The implementation is correct\n");
+      return;
+      
+}
+
+
+int main (int argc, char *argv[])
+
+{
+  
+  //srand((unsigned int)time(NULL));
+  //srand won't work in the SE mode of GEM5. Going to just initialize matrices using iterative variables
+  //read the size of the square matrix from command line 
+  //if (argc > 1)
+  //{
+  //	char *a = argv[1];
+  // 	num = atoi(a);
+  //}
+  //else 
+  //setting  matrix size to 64 for now. 
+   	  
+
+  //  int num = 2000; //default
+  float min =1; 
+  float max =100;
+  
+  printf("Generating matrices of size %d * %d \n",num,num);
+
+  int **mat_A = (int **)malloc(num * sizeof(int *)); 
+  int **mat_B = (int **)malloc(num * sizeof(int *));
+  int **product_unopt = (int **)malloc(num * sizeof(int *));
+  int **product_opt = (int **)malloc(num * sizeof(int *));
+  
+  for (int i=0; i<num; i++){ 
+    mat_A[i] = (int *)malloc(num * sizeof(int)); 
+	mat_B[i] = (int *)malloc(num * sizeof(int));
+	product_unopt[i] = (int *)malloc(num * sizeof(int));
+	product_opt[i] = (int *)malloc(num * sizeof(int));
+  }
+
+  //
+  int putin;
+  for( int i=0; i<num; i++){
+	for(int j=0; j<num; j++){
+
+
+	  putin = ((max - min) * ((float)rand() / RAND_MAX)) + min;
+	  //	  printf("%f\n", ((max - min) * ((float)rand() / RAND_MAX)) + min);
+	  //	  printf("%d\n", putin);
+	  mat_A[i][j]= putin;
+	  //	  printf("%d\n", mat_A[i][j]);
+	  mat_B[i][j]= ((max - min) * ((float)rand() / RAND_MAX)) + min;
+	  //	  mat_A[i][j]= i-j;
+	  //	mat_B[i][j]= 1;
+	  product_unopt[i][j]=0;  
+	  product_opt[i][j]=0;
+	  //	  printf("%d\n", mat_A[i][j]);
+	}
+  }
+
+  //  print(mat_A, num);
+  //  print(mat_B, num);  
+  printf("computing the results\n");
+
+  //compute the product
+  //TODO: add timers here to measure execution time
+  // start = clock();
+  //CALLGRIND_START_INSTRUMENTATION;
+    matmul_unopt(mat_A, mat_B, product_unopt, num);
+  //CALLGRIND_STOP_INSTRUMENTATION;
+  // end = clock();
+  // cpu_time_used = ((end - start)) / CLOCKS_PER_SEC;
+  //printf("order i j k took %f seconds to execute \n", cpu_time_used);
+   
+   //start = clock();
+   // matmul_opt(mat_A, mat_B, product_unopt, num);
+   //end = clock();
+   //cpu_time_used = ((double) (end - start)) / CLOCKS_PER_SEC;
+   // printf("order j k i took %f seconds to execute \n", cpu_time_used);
+   
+
+  //return 0;
+  //TODO: add timers here to measure execution time
+    matmul_opt(mat_A, mat_B, product_opt, num);
+    //    printf("matmul unopt: \n");
+    // print(product_unopt, num);
+    // printf("matmul opt: \n");
+    // print(product_opt, num);
+    correctness_test(product_unopt, product_opt, num);
+    
+ 
+ // printf("printng result \n");
+ // for (int i = 0; i < num; i++) {
+ //       for (int j = 0; j < num; j++) {	
+ // 		printf("%f ", product_opt[i][j]);
+ // 		}
+ // 	 printf("\n");
+ //  	}
+  
+  for (int i=0; i<num; i++)
+  {
+  
+   free(mat_A[i]);
+   free(mat_B[i]);
+   free(product_unopt[i]);
+   //free(product_opt[i]);
+  }
+
+   free(mat_A);
+   free(mat_B);
+   free(product_unopt);
+   //free(product_opt);
+  return (0);
+
+}
--- a/benchmarks/matmul_exe
+++ b/benchmarks/matmul_exe