Skip to content

Commit 6a3818a

Browse files
committed
WIP: Add some basic analysis to index.
Working towards an interactive dashboard.
1 parent 3cacb8d commit 6a3818a

1 file changed

Lines changed: 97 additions & 0 deletions

File tree

site/index.md

Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,16 @@
1+
---
2+
jupytext:
3+
text_representation:
4+
extension: .md
5+
format_name: myst
6+
format_version: 0.13
7+
jupytext_version: 1.13.6
8+
kernelspec:
9+
display_name: Python 3
10+
language: python
11+
name: python3
12+
---
13+
114
Scientific Python Devstats
215
==========================
316

@@ -10,3 +23,87 @@ ecosystem.
1023
1124
project_reports
1225
```
26+
27+
% TODO: automate project generation based on which data files are in devstats-data
28+
29+
```{code-cell} ipython3
30+
---
31+
tags: [remove-cell]
32+
---
33+
import json
34+
import datetime
35+
import itertools
36+
from dateutil.parser import isoparse
37+
import numpy as np
38+
import matplotlib.pyplot as plt
39+
40+
projects = [
41+
"numpy", "scipy", "matplotlib", "pandas", "scikit-learn", "scikit-image", "networkx"
42+
]
43+
44+
project_prs = dict()
45+
for proj in projects:
46+
with open(f"../devstats-data/{proj}_prs.json") as fh:
47+
data = [item["node"] for item in json.loads(fh.read())]
48+
49+
# Only consider prs to the main development branch
50+
default_branches = {"main", "master"}
51+
prs = [pr for pr in data if pr["baseRefName"] in default_branches]
52+
53+
# Ignore PRs with unknown author
54+
prs = [pr for pr in prs if pr["author"]] # Failed author query results in None
55+
56+
# Ignore bots
57+
bot_filter = {"dependabot-preview"}
58+
prs = [pr for pr in prs if pr["author"]["login"] not in bot_filter]
59+
60+
# Split into merged and open
61+
merged_prs = [pr for pr in prs if pr["state"] == "MERGED"]
62+
open_prs = [pr for pr in prs if pr["state"] == "OPEN"]
63+
64+
# Only look at PRs that have been created or merged in the last year
65+
today = np.datetime64(datetime.datetime.now(), "D")
66+
year = np.timedelta64(365, "D")
67+
merged_prs = [
68+
pr for pr in merged_prs
69+
if (today - np.datetime64(pr["mergedAt"], "D")) < year
70+
]
71+
open_prs = [
72+
pr for pr in open_prs
73+
if (today - np.datetime64(pr["createdAt"], "D")) < year
74+
]
75+
76+
project_prs[proj] = {
77+
"open_prs" : open_prs,
78+
"merged_prs" : merged_prs,
79+
}
80+
```
81+
82+
```{code-cell} ipython3
83+
---
84+
tags: [remove-input]
85+
---
86+
# Num merged PRs per month
87+
start_date = today - year
88+
bedges = np.array(
89+
[start_date + i * np.timedelta64(30, "D") for i in range(13)], dtype=np.datetime64
90+
)
91+
# Proxy date for center of bin
92+
x = bedges[:-1] + np.timedelta64(15, "D")
93+
94+
fig, ax = plt.subplots(figsize=(16, 12))
95+
ax.set_title("Merged PRs", fontsize=24)
96+
97+
# NOTE: np.histogram doesn't work on datetimes
98+
for proj, data in project_prs.items():
99+
merged_prs = data["merged_prs"]
100+
merge_dates = np.array([pr["mergedAt"] for pr in merged_prs], dtype="M8[D]")
101+
num_merged_per_month = []
102+
for lo, hi in itertools.pairwise(bedges):
103+
num_merged_per_month.append(
104+
sum(1 for md in merge_dates if md > lo and md < hi)
105+
)
106+
ax.plot(x, num_merged_per_month, label=proj)
107+
ax.legend()
108+
plt.show()
109+
```

0 commit comments

Comments
 (0)