From 324ace2671a8c2e561c04db1decd31a64de49f88 Mon Sep 17 00:00:00 2001 From: Uzair Mohammed Date: Thu, 24 Oct 2024 19:18:27 -0700 Subject: [PATCH] Updated python script and added benchmark data --- benchmark_data.csv | 7 +++++ plot_3vars.py | 72 +++++++++++++++++++------------------------ plot_3vars_savefig.py | 2 +- 3 files changed, 40 insertions(+), 41 deletions(-) create mode 100644 benchmark_data.csv diff --git a/benchmark_data.csv b/benchmark_data.csv new file mode 100644 index 0000000..72c7ee3 --- /dev/null +++ b/benchmark_data.csv @@ -0,0 +1,7 @@ +Problem Size,sum_direct,sum_indirect,sum_vector +8388608,0.001196,0.011989,0.003333 +16777216,0.002403,0.065814,0.006458 +33554432,0.00488,0.166686,0.012485 +67108864,0.00964,0.370731,0.024519 +134217728,0.019126,0.688523,0.047842 +268435456,0.038392,4.173086,0.0921 \ No newline at end of file diff --git a/plot_3vars.py b/plot_3vars.py index 96d5892..e789402 100644 --- a/plot_3vars.py +++ b/plot_3vars.py @@ -19,51 +19,43 @@ Assumptions: developed and tested using Python version 3.8.8 on macOS 11.6 import pandas as pd import matplotlib.pyplot as plt - -fname = "sample_data_3vars.csv" +# Read the CSV file +fname = "benchmark_data.csv" df = pd.read_csv(fname, comment="#") -print(df) -var_names = list(df.columns) +# Extract columns +problem_sizes = df['Problem Size'].values.tolist() +mflops = df['MFLOP/s'].values.tolist() +memory_bandwidth = df['Memory Bandwidth Utilization (%)'].values.tolist() +memory_latency = df['Memory Latency'].values.tolist() -print("var names =", var_names) +# Plot MFLOP/s +plt.figure() +plt.plot(problem_sizes, mflops, label='MFLOP/s') +plt.title('Problem Size vs. MFLOP/s') +plt.xlabel('Problem Size') +plt.ylabel('MFLOP/s') +plt.legend() +plt.savefig('mflops_plot.png') -# split the df into individual vars -# assumption: column order - 0=problem size, 1=blas time, 2=basic time +# Plot Memory Bandwidth Utilization +plt.figure() +plt.plot(problem_sizes, memory_bandwidth, label='Memory Bandwidth Utilization (%)') +plt.title('Problem Size vs. Memory Bandwidth Utilization') +plt.xlabel('Problem Size') +plt.ylabel('Memory Bandwidth Utilization (%)') +plt.legend() +plt.savefig('memory_bandwidth_plot.png') -problem_sizes = df[var_names[0]].values.tolist() -code1_time = df[var_names[1]].values.tolist() -code2_time = df[var_names[2]].values.tolist() -code3_time = df[var_names[3]].values.tolist() - -plt.title("Comparison of 3 Codes") - -xlocs = [i for i in range(len(problem_sizes))] - -plt.xticks(xlocs, problem_sizes) - -# here, we are plotting the raw values read from the input .csv file, which -# we interpret as being "time" that maps directly to the y-axis. -# -# what if we want to plot MFLOPS instead? How do we compute MFLOPS from -# time and problem size? You may need to add some code here to compute -# MFLOPS, then modify the plt.plot() lines below to plot MFLOPS rather than time. - -plt.plot(code1_time, "r-o") -plt.plot(code2_time, "b-x") -plt.plot(code3_time, "g-^") - -#plt.xscale("log") -#plt.yscale("log") - -plt.xlabel("Problem Sizes") -plt.ylabel("runtime") - -varNames = [var_names[1], var_names[2], var_names[3]] -plt.legend(varNames, loc="best") - -plt.grid(axis='both') +# Plot Memory Latency +plt.figure() +plt.plot(problem_sizes, memory_latency, label='Memory Latency') +plt.title('Problem Size vs. Memory Latency') +plt.xlabel('Problem Size') +plt.ylabel('Memory Latency') +plt.legend() +plt.savefig('memory_latency_plot.png') plt.show() -# EOF +# EOF \ No newline at end of file diff --git a/plot_3vars_savefig.py b/plot_3vars_savefig.py index 82616ec..17f0c6a 100644 --- a/plot_3vars_savefig.py +++ b/plot_3vars_savefig.py @@ -21,7 +21,7 @@ import matplotlib.pyplot as plt plot_fname = "myplot.png" -fname = "sample_data_3vars.csv" +fname = "benchmark_data.csv" df = pd.read_csv(fname, comment="#") print(df)