Updated python script and added benchmark data

This commit is contained in:
uzy lol 2024-10-24 19:18:27 -07:00
parent 5eb70b128b
commit 324ace2671
3 changed files with 40 additions and 41 deletions

7
benchmark_data.csv Normal file
View File

@ -0,0 +1,7 @@
Problem Size,sum_direct,sum_indirect,sum_vector
8388608,0.001196,0.011989,0.003333
16777216,0.002403,0.065814,0.006458
33554432,0.00488,0.166686,0.012485
67108864,0.00964,0.370731,0.024519
134217728,0.019126,0.688523,0.047842
268435456,0.038392,4.173086,0.0921
1 Problem Size sum_direct sum_indirect sum_vector
2 8388608 0.001196 0.011989 0.003333
3 16777216 0.002403 0.065814 0.006458
4 33554432 0.00488 0.166686 0.012485
5 67108864 0.00964 0.370731 0.024519
6 134217728 0.019126 0.688523 0.047842
7 268435456 0.038392 4.173086 0.0921

View File

@ -19,50 +19,42 @@ Assumptions: developed and tested using Python version 3.8.8 on macOS 11.6
import pandas as pd
import matplotlib.pyplot as plt
fname = "sample_data_3vars.csv"
# Read the CSV file
fname = "benchmark_data.csv"
df = pd.read_csv(fname, comment="#")
print(df)
var_names = list(df.columns)
# Extract columns
problem_sizes = df['Problem Size'].values.tolist()
mflops = df['MFLOP/s'].values.tolist()
memory_bandwidth = df['Memory Bandwidth Utilization (%)'].values.tolist()
memory_latency = df['Memory Latency'].values.tolist()
print("var names =", var_names)
# Plot MFLOP/s
plt.figure()
plt.plot(problem_sizes, mflops, label='MFLOP/s')
plt.title('Problem Size vs. MFLOP/s')
plt.xlabel('Problem Size')
plt.ylabel('MFLOP/s')
plt.legend()
plt.savefig('mflops_plot.png')
# split the df into individual vars
# assumption: column order - 0=problem size, 1=blas time, 2=basic time
# Plot Memory Bandwidth Utilization
plt.figure()
plt.plot(problem_sizes, memory_bandwidth, label='Memory Bandwidth Utilization (%)')
plt.title('Problem Size vs. Memory Bandwidth Utilization')
plt.xlabel('Problem Size')
plt.ylabel('Memory Bandwidth Utilization (%)')
plt.legend()
plt.savefig('memory_bandwidth_plot.png')
problem_sizes = df[var_names[0]].values.tolist()
code1_time = df[var_names[1]].values.tolist()
code2_time = df[var_names[2]].values.tolist()
code3_time = df[var_names[3]].values.tolist()
plt.title("Comparison of 3 Codes")
xlocs = [i for i in range(len(problem_sizes))]
plt.xticks(xlocs, problem_sizes)
# here, we are plotting the raw values read from the input .csv file, which
# we interpret as being "time" that maps directly to the y-axis.
#
# what if we want to plot MFLOPS instead? How do we compute MFLOPS from
# time and problem size? You may need to add some code here to compute
# MFLOPS, then modify the plt.plot() lines below to plot MFLOPS rather than time.
plt.plot(code1_time, "r-o")
plt.plot(code2_time, "b-x")
plt.plot(code3_time, "g-^")
#plt.xscale("log")
#plt.yscale("log")
plt.xlabel("Problem Sizes")
plt.ylabel("runtime")
varNames = [var_names[1], var_names[2], var_names[3]]
plt.legend(varNames, loc="best")
plt.grid(axis='both')
# Plot Memory Latency
plt.figure()
plt.plot(problem_sizes, memory_latency, label='Memory Latency')
plt.title('Problem Size vs. Memory Latency')
plt.xlabel('Problem Size')
plt.ylabel('Memory Latency')
plt.legend()
plt.savefig('memory_latency_plot.png')
plt.show()

View File

@ -21,7 +21,7 @@ import matplotlib.pyplot as plt
plot_fname = "myplot.png"
fname = "sample_data_3vars.csv"
fname = "benchmark_data.csv"
df = pd.read_csv(fname, comment="#")
print(df)