Skip to content

Commit d221721

Browse files
committed
added performance test
1 parent 05a9770 commit d221721

1 file changed

Lines changed: 311 additions & 0 deletions

File tree

doc/articles/first_true_1d.py

Lines changed: 311 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,311 @@
1+
2+
3+
4+
import os
5+
import sys
6+
import timeit
7+
import typing as tp
8+
from itertools import repeat
9+
10+
from arraykit import first_true_1d as ak_first_true_1d
11+
import arraykit as ak
12+
13+
from arrayredox import first_true_1d as ar_first_true_1d
14+
15+
import matplotlib.pyplot as plt
16+
import numpy as np
17+
import pandas as pd
18+
19+
sys.path.append(os.getcwd())
20+
21+
22+
23+
class ArrayProcessor:
24+
NAME = ''
25+
SORT = -1
26+
27+
def __init__(self, array: np.ndarray):
28+
self.array = array
29+
30+
#-------------------------------------------------------------------------------
31+
class AKFirstTrue(ArrayProcessor):
32+
NAME = 'ak.first_true_1d()'
33+
SORT = 0
34+
35+
def __call__(self):
36+
_ = ak_first_true_1d(self.array, forward=True)
37+
38+
class ARFirstTrue(ArrayProcessor):
39+
NAME = 'ar.first_true_1d()'
40+
SORT = 0
41+
42+
def __call__(self):
43+
# _ = ar_first_true_1d(self.array, forward=True)
44+
_ = ar_first_true_1d(self.array)
45+
46+
47+
class PYLoop(ArrayProcessor):
48+
NAME = 'Python Loop'
49+
SORT = 0
50+
51+
def __call__(self):
52+
for i, e in enumerate(self.array):
53+
if e == True:
54+
break
55+
56+
57+
class NPNonZero(ArrayProcessor):
58+
NAME = 'np.nonzero()'
59+
SORT = 3
60+
61+
def __call__(self):
62+
_ = np.nonzero(self.array)[0][0]
63+
64+
class NPArgMax(ArrayProcessor):
65+
NAME = 'np.argmax()'
66+
SORT = 1
67+
68+
def __call__(self):
69+
_ = np.argmax(self.array)
70+
71+
class NPNotAnyArgMax(ArrayProcessor):
72+
NAME = 'np.any(), np.argmax()'
73+
SORT = 2
74+
75+
def __call__(self):
76+
_ = not np.any(self.array)
77+
_ = np.argmax(self.array)
78+
79+
#-------------------------------------------------------------------------------
80+
NUMBER = 100
81+
82+
def seconds_to_display(seconds: float) -> str:
83+
seconds /= NUMBER
84+
if seconds < 1e-4:
85+
return f'{seconds * 1e6: .1f} (µs)'
86+
if seconds < 1e-1:
87+
return f'{seconds * 1e3: .1f} (ms)'
88+
return f'{seconds: .1f} (s)'
89+
90+
91+
def plot_performance(frame):
92+
fixture_total = len(frame['fixture'].unique())
93+
cat_total = len(frame['size'].unique())
94+
processor_total = len(frame['cls_processor'].unique())
95+
fig, axes = plt.subplots(cat_total, fixture_total)
96+
97+
# cmap = plt.get_cmap('terrain')
98+
cmap = plt.get_cmap('plasma')
99+
100+
color = cmap(np.arange(processor_total) / processor_total)
101+
102+
# category is the size of the array
103+
for cat_count, (cat_label, cat) in enumerate(frame.groupby('size')):
104+
for fixture_count, (fixture_label, fixture) in enumerate(
105+
cat.groupby('fixture')):
106+
ax = axes[cat_count][fixture_count]
107+
108+
# set order
109+
fixture['sort'] = [f.SORT for f in fixture['cls_processor']]
110+
fixture = fixture.sort_values('sort')
111+
112+
results = fixture['time'].values.tolist()
113+
names = [cls.NAME for cls in fixture['cls_processor']]
114+
# x = np.arange(len(results))
115+
names_display = names
116+
post = ax.bar(names_display, results, color=color)
117+
118+
density, position = fixture_label.split('-')
119+
# cat_label is the size of the array
120+
title = f'{cat_label:.0e}\n{FixtureFactory.DENSITY_TO_DISPLAY[density]}\n{FixtureFactory.POSITION_TO_DISPLAY[position]}'
121+
122+
ax.set_title(title, fontsize=6)
123+
ax.set_box_aspect(0.75) # makes taller tan wide
124+
time_max = fixture['time'].max()
125+
ax.set_yticks([0, time_max * 0.5, time_max])
126+
ax.set_yticklabels(['',
127+
seconds_to_display(time_max * .5),
128+
seconds_to_display(time_max),
129+
], fontsize=6)
130+
# ax.set_xticks(x, names_display, rotation='vertical')
131+
ax.tick_params(
132+
axis='x',
133+
which='both',
134+
bottom=False,
135+
top=False,
136+
labelbottom=False,
137+
)
138+
139+
fig.set_size_inches(9, 3.5) # width, height
140+
fig.legend(post, names_display, loc='center right', fontsize=8)
141+
# horizontal, vertical
142+
fig.text(.05, .96, f'first_true_1d() Performance: {NUMBER} Iterations', fontsize=10)
143+
fig.text(.05, .90, get_versions(), fontsize=6)
144+
145+
fp = '/tmp/first_true.png'
146+
plt.subplots_adjust(
147+
left=0.075,
148+
bottom=0.05,
149+
right=0.80,
150+
top=0.85,
151+
wspace=1, # width
152+
hspace=0.1,
153+
)
154+
# plt.rcParams.update({'font.size': 22})
155+
plt.savefig(fp, dpi=300)
156+
157+
if sys.platform.startswith('linux'):
158+
os.system(f'eog {fp}&')
159+
else:
160+
os.system(f'open {fp}')
161+
162+
163+
#-------------------------------------------------------------------------------
164+
165+
class FixtureFactory:
166+
NAME = ''
167+
168+
@staticmethod
169+
def get_array(size: int) -> np.ndarray:
170+
return np.full(size, False, dtype=bool)
171+
172+
def _get_array_filled(
173+
size: int,
174+
start_third: int, # 1 or 2
175+
density: float, # less than 1
176+
) -> np.ndarray:
177+
a = FixtureFactory.get_array(size)
178+
count = size * density
179+
start = int(len(a) * (start_third/3))
180+
length = len(a) - start
181+
step = int(length / count)
182+
fill = np.arange(start, len(a), step)
183+
a[fill] = True
184+
return a
185+
186+
@classmethod
187+
def get_label_array(cls, size: int) -> tp.Tuple[str, np.ndarray]:
188+
array = cls.get_array(size)
189+
return cls.NAME, array
190+
191+
DENSITY_TO_DISPLAY = {
192+
'single': '1 True',
193+
'tenth': '10% True',
194+
'third': '33% True',
195+
}
196+
197+
POSITION_TO_DISPLAY = {
198+
'first_third': 'Fill 1/3 to End',
199+
'second_third': 'Fill 2/3 to End',
200+
}
201+
202+
203+
class FFSingleFirstThird(FixtureFactory):
204+
NAME = 'single-first_third'
205+
206+
@staticmethod
207+
def get_array(size: int) -> np.ndarray:
208+
a = FixtureFactory.get_array(size)
209+
a[int(len(a) * (1/3))] = True
210+
return a
211+
212+
class FFSingleSecondThird(FixtureFactory):
213+
NAME = 'single-second_third'
214+
215+
@staticmethod
216+
def get_array(size: int) -> np.ndarray:
217+
a = FixtureFactory.get_array(size)
218+
a[int(len(a) * (2/3))] = True
219+
return a
220+
221+
222+
class FFTenthPostFirstThird(FixtureFactory):
223+
NAME = 'tenth-first_third'
224+
225+
@classmethod
226+
def get_array(cls, size: int) -> np.ndarray:
227+
return cls._get_array_filled(size, start_third=1, density=.1)
228+
229+
230+
class FFTenthPostSecondThird(FixtureFactory):
231+
NAME = 'tenth-second_third'
232+
233+
@classmethod
234+
def get_array(cls, size: int) -> np.ndarray:
235+
return cls._get_array_filled(size, start_third=2, density=.1)
236+
237+
238+
class FFThirdPostFirstThird(FixtureFactory):
239+
NAME = 'third-first_third'
240+
241+
@classmethod
242+
def get_array(cls, size: int) -> np.ndarray:
243+
return cls._get_array_filled(size, start_third=1, density=1/3)
244+
245+
246+
class FFThirdPostSecondThird(FixtureFactory):
247+
NAME = 'third-second_third'
248+
249+
@classmethod
250+
def get_array(cls, size: int) -> np.ndarray:
251+
return cls._get_array_filled(size, start_third=2, density=1/3)
252+
253+
254+
def get_versions() -> str:
255+
import platform
256+
return f'OS: {platform.system()} / ArrayKit: {ak.__version__} / NumPy: {np.__version__}\n'
257+
258+
259+
CLS_PROCESSOR = (
260+
AKFirstTrue,
261+
ARFirstTrue,
262+
NPNonZero,
263+
NPArgMax,
264+
# NPNotAnyArgMax,
265+
# PYLoop,
266+
)
267+
268+
CLS_FF = (
269+
FFSingleFirstThird,
270+
FFSingleSecondThird,
271+
FFTenthPostFirstThird,
272+
FFTenthPostSecondThird,
273+
FFThirdPostFirstThird,
274+
FFThirdPostSecondThird,
275+
)
276+
277+
278+
def run_test():
279+
records = []
280+
for size in (100_000, 1_000_000, 10_000_000):
281+
for ff in CLS_FF:
282+
fixture_label, fixture = ff.get_label_array(size)
283+
for cls in CLS_PROCESSOR:
284+
runner = cls(fixture)
285+
286+
record = [cls, NUMBER, fixture_label, size]
287+
print(record)
288+
try:
289+
result = timeit.timeit(
290+
f'runner()',
291+
globals=locals(),
292+
number=NUMBER)
293+
except OSError:
294+
result = np.nan
295+
finally:
296+
pass
297+
record.append(result)
298+
records.append(record)
299+
300+
f = pd.DataFrame.from_records(records,
301+
columns=('cls_processor', 'number', 'fixture', 'size', 'time')
302+
)
303+
print(f)
304+
plot_performance(f)
305+
306+
if __name__ == '__main__':
307+
308+
run_test()
309+
310+
311+

0 commit comments

Comments
 (0)