From 324ace2671a8c2e561c04db1decd31a64de49f88 Mon Sep 17 00:00:00 2001
From: Uzair Mohammed <uzair.hamed@gmail.com>
Date: Thu, 24 Oct 2024 19:18:27 -0700
Subject: [PATCH] Updated python script and added benchmark data

---
 benchmark_data.csv    |  7 +++++
 plot_3vars.py         | 72 +++++++++++++++++++------------------------
 plot_3vars_savefig.py |  2 +-
 3 files changed, 40 insertions(+), 41 deletions(-)
 create mode 100644 benchmark_data.csv

diff --git a/benchmark_data.csv b/benchmark_data.csv
new file mode 100644
index 0000000..72c7ee3
--- /dev/null
+++ b/benchmark_data.csv
@@ -0,0 +1,7 @@
+Problem Size,sum_direct,sum_indirect,sum_vector
+8388608,0.001196,0.011989,0.003333
+16777216,0.002403,0.065814,0.006458
+33554432,0.00488,0.166686,0.012485
+67108864,0.00964,0.370731,0.024519
+134217728,0.019126,0.688523,0.047842
+268435456,0.038392,4.173086,0.0921
\ No newline at end of file
diff --git a/plot_3vars.py b/plot_3vars.py
index 96d5892..e789402 100644
--- a/plot_3vars.py
+++ b/plot_3vars.py
@@ -19,51 +19,43 @@ Assumptions: developed and tested using Python version 3.8.8 on macOS 11.6
 import pandas as pd
 import matplotlib.pyplot as plt
 
-
-fname = "sample_data_3vars.csv"
+# Read the CSV file
+fname = "benchmark_data.csv"
 df = pd.read_csv(fname, comment="#")
-print(df)
 
-var_names = list(df.columns)
+# Extract columns
+problem_sizes = df['Problem Size'].values.tolist()
+mflops = df['MFLOP/s'].values.tolist()
+memory_bandwidth = df['Memory Bandwidth Utilization (%)'].values.tolist()
+memory_latency = df['Memory Latency'].values.tolist()
 
-print("var names =", var_names)
+# Plot MFLOP/s
+plt.figure()
+plt.plot(problem_sizes, mflops, label='MFLOP/s')
+plt.title('Problem Size vs. MFLOP/s')
+plt.xlabel('Problem Size')
+plt.ylabel('MFLOP/s')
+plt.legend()
+plt.savefig('mflops_plot.png')
 
-# split the df into individual vars
-# assumption: column order - 0=problem size, 1=blas time, 2=basic time
+# Plot Memory Bandwidth Utilization
+plt.figure()
+plt.plot(problem_sizes, memory_bandwidth, label='Memory Bandwidth Utilization (%)')
+plt.title('Problem Size vs. Memory Bandwidth Utilization')
+plt.xlabel('Problem Size')
+plt.ylabel('Memory Bandwidth Utilization (%)')
+plt.legend()
+plt.savefig('memory_bandwidth_plot.png')
 
-problem_sizes = df[var_names[0]].values.tolist()
-code1_time = df[var_names[1]].values.tolist()
-code2_time = df[var_names[2]].values.tolist()
-code3_time = df[var_names[3]].values.tolist()
-
-plt.title("Comparison of 3 Codes")
-
-xlocs = [i for i in range(len(problem_sizes))]
-
-plt.xticks(xlocs, problem_sizes)
-
-# here, we are plotting the raw values read from the input .csv file, which
-# we interpret as being "time" that maps directly to the y-axis.
-#
-# what if we want to plot MFLOPS instead? How do we compute MFLOPS from
-# time and problem size? You may need to add some code here to compute
-# MFLOPS, then modify the plt.plot() lines below to plot MFLOPS rather than time.
-
-plt.plot(code1_time, "r-o")
-plt.plot(code2_time, "b-x")
-plt.plot(code3_time, "g-^")
-
-#plt.xscale("log")
-#plt.yscale("log")
-
-plt.xlabel("Problem Sizes")
-plt.ylabel("runtime")
-
-varNames = [var_names[1], var_names[2], var_names[3]]
-plt.legend(varNames, loc="best")
-
-plt.grid(axis='both')
+# Plot Memory Latency
+plt.figure()
+plt.plot(problem_sizes, memory_latency, label='Memory Latency')
+plt.title('Problem Size vs. Memory Latency')
+plt.xlabel('Problem Size')
+plt.ylabel('Memory Latency')
+plt.legend()
+plt.savefig('memory_latency_plot.png')
 
 plt.show()
 
-# EOF
+# EOF
\ No newline at end of file
diff --git a/plot_3vars_savefig.py b/plot_3vars_savefig.py
index 82616ec..17f0c6a 100644
--- a/plot_3vars_savefig.py
+++ b/plot_3vars_savefig.py
@@ -21,7 +21,7 @@ import matplotlib.pyplot as plt
 
 plot_fname = "myplot.png"
 
-fname = "sample_data_3vars.csv"
+fname = "benchmark_data.csv"
 df = pd.read_csv(fname, comment="#")
 print(df)