Benchmarking
In this section, we benchmark GaugeFixer’s computational performance for gauge-fixing computation in comparison with direct multiplication with the dense projection matrix.
[1]:
import tracemalloc
from time import time
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from itertools import product
from matplotlib.ticker import LogLocator, NullFormatter
from gaugefixer import AllOrderModel, PairwiseModel
Model configurations
For simplicity, we will evaluate the performance on all-order and pairwise interaction models defined over protein sequences of different lengths.
[2]:
results = []
n_points = 12
alphabet_name = "protein"
models = [AllOrderModel, PairwiseModel]
seq_lengths = [np.arange(2, 6), np.geomspace(3, 150, n_points).astype(int)]
Evaluate running time and memory usage
[3]:
for model, Ls in zip(models, seq_lengths):
for L in Ls:
m = model(alphabet_name=alphabet_name, L=L)
m.set_random_params()
for use_dense_matrix in [False, True]:
if use_dense_matrix and m.n_features > 10000:
continue
for i in range(11):
tracemalloc.start()
current1, peak1 = tracemalloc.get_traced_memory()
t0 = time()
theta_fixed = m.get_fixed_params(
gauge="zero-sum", use_dense_matrix=use_dense_matrix
)
t1 = time() - t0
current2, peak2 = tracemalloc.get_traced_memory()
tracemalloc.stop()
# Skip the first run (warm-up)
if i == 0:
continue
results.append(
{
"model": model.__name__,
"n_features": m.n_features,
"alphabet_name": alphabet_name,
"time": t1,
"current_memory": (current2 - current1) / 1e6,
"peak_memory": (peak2 - peak1) / 1e6,
"dense_matrix": use_dense_matrix,
}
)
results = pd.DataFrame(results)
results
[3]:
| model | n_features | alphabet_name | time | current_memory | peak_memory | dense_matrix | |
|---|---|---|---|---|---|---|---|
| 0 | AllOrderModel | 441 | protein | 0.001605 | 0.010924 | 0.048751 | False |
| 1 | AllOrderModel | 441 | protein | 0.001165 | 0.009152 | 0.047243 | False |
| 2 | AllOrderModel | 441 | protein | 0.001037 | 0.009824 | 0.047979 | False |
| 3 | AllOrderModel | 441 | protein | 0.001023 | 0.009280 | 0.047371 | False |
| 4 | AllOrderModel | 441 | protein | 0.001694 | 0.009121 | 0.047276 | False |
| ... | ... | ... | ... | ... | ... | ... | ... |
| 205 | PairwiseModel | 4473001 | protein | 5.179069 | 71.571140 | 295.822343 | False |
| 206 | PairwiseModel | 4473001 | protein | 4.999057 | 71.570496 | 295.821699 | False |
| 207 | PairwiseModel | 4473001 | protein | 5.332721 | 71.571140 | 295.822343 | False |
| 208 | PairwiseModel | 4473001 | protein | 4.825775 | 71.570496 | 295.821699 | False |
| 209 | PairwiseModel | 4473001 | protein | 5.127097 | 71.571140 | 295.822343 | False |
210 rows × 7 columns
Visualize computational performance
[4]:
colors = ["C0", "C0", "C1", "C1"]
models = ["AllOrderModel", "PairwiseModel"]
model_labels = ["all-order", "pairwise"]
model_dict = dict(zip(models, model_labels))
dense = [True, False]
dense_labels = ["standard", "GaugeFixer"]
dense_dict = dict(zip(dense, dense_labels))
labels = [
f"{model} ({dense})" for model, dense in product(model_labels, dense_labels)
]
palette = dict(zip(labels, colors))
labels_dict = dict(zip(models, labels))
results["label"] = [
f"{model_dict[m]} ({dense_dict[d]})"
for m, d in zip(results["model"], results["dense_matrix"])
]
results
[4]:
| model | n_features | alphabet_name | time | current_memory | peak_memory | dense_matrix | label | |
|---|---|---|---|---|---|---|---|---|
| 0 | AllOrderModel | 441 | protein | 0.001605 | 0.010924 | 0.048751 | False | all-order (GaugeFixer) |
| 1 | AllOrderModel | 441 | protein | 0.001165 | 0.009152 | 0.047243 | False | all-order (GaugeFixer) |
| 2 | AllOrderModel | 441 | protein | 0.001037 | 0.009824 | 0.047979 | False | all-order (GaugeFixer) |
| 3 | AllOrderModel | 441 | protein | 0.001023 | 0.009280 | 0.047371 | False | all-order (GaugeFixer) |
| 4 | AllOrderModel | 441 | protein | 0.001694 | 0.009121 | 0.047276 | False | all-order (GaugeFixer) |
| ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 205 | PairwiseModel | 4473001 | protein | 5.179069 | 71.571140 | 295.822343 | False | pairwise (GaugeFixer) |
| 206 | PairwiseModel | 4473001 | protein | 4.999057 | 71.570496 | 295.821699 | False | pairwise (GaugeFixer) |
| 207 | PairwiseModel | 4473001 | protein | 5.332721 | 71.571140 | 295.822343 | False | pairwise (GaugeFixer) |
| 208 | PairwiseModel | 4473001 | protein | 4.825775 | 71.570496 | 295.821699 | False | pairwise (GaugeFixer) |
| 209 | PairwiseModel | 4473001 | protein | 5.127097 | 71.571140 | 295.822343 | False | pairwise (GaugeFixer) |
210 rows × 8 columns
[5]:
variables = {
"time": "runtime (s)",
"peak_memory": "memory (MB)",
}
lw = 1
kwargs = {
"palette": palette,
"errorbar": "sd",
"err_style": "bars",
"err_kws": {"capsize": lw, "elinewidth": lw, "capthick": lw},
"lw": lw
}
fig, subplots = plt.subplots(
1,
2,
figsize=(7, 3.25),
sharex=True,
sharey=False,
)
for i, (axes, (y, ylabel)) in enumerate(zip(subplots, variables.items())):
for (dense, data), linestyle in zip(results.groupby('dense_matrix'), ['--', 'solid']):
sns.lineplot(
x="n_features",
y=y,
hue="label",
data=data,
ax=axes,
linestyle=linestyle,
**kwargs,
)
if i == 0:
yticks=(1E-5,1E-4,1E-3,1E-2,1E-1,1E0,1E1)
ylim=(3E-6, 3E1)
else:
yticks=(1E-3,1E-2,1E-1,1E0,1E1,1E2,1E3)
ylim=(3E-4, 3E3)
axes.set(
xlabel="num parameters",
ylabel=ylabel,
xscale="log",
yscale="log",
xlim=(90, 2e7),
ylim=ylim,
xticks=(1E2,1E3,1E4,1E5,1E6,1E7),
yticks=yticks,
ymargin=0.2,
)
# Set log-scale minor tickmarks
axes.xaxis.set_minor_locator(LogLocator(base=10.0, subs=np.arange(2, 10)*0.1, numticks=100))
axes.xaxis.set_minor_formatter(NullFormatter())
axes.legend_.set_visible(False)
axes.legend(loc=4)
fig.tight_layout(pad=0.2, w_pad=1)