Updated python script and added benchmark data

2024-10-24 19:18:27 -07:00 · 2024-10-24 19:18:27 -07:00 · 324ace2671
commit 324ace2671
parent 5eb70b128b
3 changed files with 40 additions and 41 deletions
--- a/benchmark_data.csv
+++ b/benchmark_data.csv
@ -0,0 +1,7 @@
+Problem Size,sum_direct,sum_indirect,sum_vector
+8388608,0.001196,0.011989,0.003333
+16777216,0.002403,0.065814,0.006458
+33554432,0.00488,0.166686,0.012485
+67108864,0.00964,0.370731,0.024519
+134217728,0.019126,0.688523,0.047842
+268435456,0.038392,4.173086,0.0921
--- a/plot_3vars.py
+++ b/plot_3vars.py
@ -19,50 +19,42 @@ Assumptions: developed and tested using Python version 3.8.8 on macOS 11.6
 import pandas as pd
 import matplotlib.pyplot as plt

-
-fname = "sample_data_3vars.csv"
+# Read the CSV file
+fname = "benchmark_data.csv"
 df = pd.read_csv(fname, comment="#")
-print(df)

-var_names = list(df.columns)
+# Extract columns
+problem_sizes = df['Problem Size'].values.tolist()
+mflops = df['MFLOP/s'].values.tolist()
+memory_bandwidth = df['Memory Bandwidth Utilization (%)'].values.tolist()
+memory_latency = df['Memory Latency'].values.tolist()

-print("var names =", var_names)
+# Plot MFLOP/s
+plt.figure()
+plt.plot(problem_sizes, mflops, label='MFLOP/s')
+plt.title('Problem Size vs. MFLOP/s')
+plt.xlabel('Problem Size')
+plt.ylabel('MFLOP/s')
+plt.legend()
+plt.savefig('mflops_plot.png')

-# split the df into individual vars
-# assumption: column order - 0=problem size, 1=blas time, 2=basic time
+# Plot Memory Bandwidth Utilization
+plt.figure()
+plt.plot(problem_sizes, memory_bandwidth, label='Memory Bandwidth Utilization (%)')
+plt.title('Problem Size vs. Memory Bandwidth Utilization')
+plt.xlabel('Problem Size')
+plt.ylabel('Memory Bandwidth Utilization (%)')
+plt.legend()
+plt.savefig('memory_bandwidth_plot.png')

-problem_sizes = df[var_names[0]].values.tolist()
-code1_time = df[var_names[1]].values.tolist()
-code2_time = df[var_names[2]].values.tolist()
-code3_time = df[var_names[3]].values.tolist()
-
-plt.title("Comparison of 3 Codes")
-
-xlocs = [i for i in range(len(problem_sizes))]
-
-plt.xticks(xlocs, problem_sizes)
-
-# here, we are plotting the raw values read from the input .csv file, which
-# we interpret as being "time" that maps directly to the y-axis.
-#
-# what if we want to plot MFLOPS instead? How do we compute MFLOPS from
-# time and problem size? You may need to add some code here to compute
-# MFLOPS, then modify the plt.plot() lines below to plot MFLOPS rather than time.
-
-plt.plot(code1_time, "r-o")
-plt.plot(code2_time, "b-x")
-plt.plot(code3_time, "g-^")
-
-#plt.xscale("log")
-#plt.yscale("log")
-
-plt.xlabel("Problem Sizes")
-plt.ylabel("runtime")
-
-varNames = [var_names[1], var_names[2], var_names[3]]
-plt.legend(varNames, loc="best")
-
-plt.grid(axis='both')
+# Plot Memory Latency
+plt.figure()
+plt.plot(problem_sizes, memory_latency, label='Memory Latency')
+plt.title('Problem Size vs. Memory Latency')
+plt.xlabel('Problem Size')
+plt.ylabel('Memory Latency')
+plt.legend()
+plt.savefig('memory_latency_plot.png')

 plt.show()

--- a/plot_3vars_savefig.py
+++ b/plot_3vars_savefig.py
@ -21,7 +21,7 @@ import matplotlib.pyplot as plt

 plot_fname = "myplot.png"

-fname = "sample_data_3vars.csv"
+fname = "benchmark_data.csv"
 df = pd.read_csv(fname, comment="#")
 print(df)