1+ ---
2+ jupytext :
3+ text_representation :
4+ extension : .md
5+ format_name : myst
6+ format_version : 0.13
7+ jupytext_version : 1.13.6
8+ kernelspec :
9+ display_name : Python 3
10+ language : python
11+ name : python3
12+ ---
13+
114Scientific Python Devstats
215==========================
316
@@ -10,3 +23,87 @@ ecosystem.
1023
1124project_reports
1225```
26+
27+ % TODO: automate project generation based on which data files are in devstats-data
28+
29+ ``` {code-cell} ipython3
30+ ---
31+ tags: [remove-cell]
32+ ---
33+ import json
34+ import datetime
35+ import itertools
36+ from dateutil.parser import isoparse
37+ import numpy as np
38+ import matplotlib.pyplot as plt
39+
40+ projects = [
41+ "numpy", "scipy", "matplotlib", "pandas", "scikit-learn", "scikit-image", "networkx"
42+ ]
43+
44+ project_prs = dict()
45+ for proj in projects:
46+ with open(f"../devstats-data/{proj}_prs.json") as fh:
47+ data = [item["node"] for item in json.loads(fh.read())]
48+
49+ # Only consider prs to the main development branch
50+ default_branches = {"main", "master"}
51+ prs = [pr for pr in data if pr["baseRefName"] in default_branches]
52+
53+ # Ignore PRs with unknown author
54+ prs = [pr for pr in prs if pr["author"]] # Failed author query results in None
55+
56+ # Ignore bots
57+ bot_filter = {"dependabot-preview"}
58+ prs = [pr for pr in prs if pr["author"]["login"] not in bot_filter]
59+
60+ # Split into merged and open
61+ merged_prs = [pr for pr in prs if pr["state"] == "MERGED"]
62+ open_prs = [pr for pr in prs if pr["state"] == "OPEN"]
63+
64+ # Only look at PRs that have been created or merged in the last year
65+ today = np.datetime64(datetime.datetime.now(), "D")
66+ year = np.timedelta64(365, "D")
67+ merged_prs = [
68+ pr for pr in merged_prs
69+ if (today - np.datetime64(pr["mergedAt"], "D")) < year
70+ ]
71+ open_prs = [
72+ pr for pr in open_prs
73+ if (today - np.datetime64(pr["createdAt"], "D")) < year
74+ ]
75+
76+ project_prs[proj] = {
77+ "open_prs" : open_prs,
78+ "merged_prs" : merged_prs,
79+ }
80+ ```
81+
82+ ``` {code-cell} ipython3
83+ ---
84+ tags: [remove-input]
85+ ---
86+ # Num merged PRs per month
87+ start_date = today - year
88+ bedges = np.array(
89+ [start_date + i * np.timedelta64(30, "D") for i in range(13)], dtype=np.datetime64
90+ )
91+ # Proxy date for center of bin
92+ x = bedges[:-1] + np.timedelta64(15, "D")
93+
94+ fig, ax = plt.subplots(figsize=(16, 12))
95+ ax.set_title("Merged PRs", fontsize=24)
96+
97+ # NOTE: np.histogram doesn't work on datetimes
98+ for proj, data in project_prs.items():
99+ merged_prs = data["merged_prs"]
100+ merge_dates = np.array([pr["mergedAt"] for pr in merged_prs], dtype="M8[D]")
101+ num_merged_per_month = []
102+ for lo, hi in itertools.pairwise(bedges):
103+ num_merged_per_month.append(
104+ sum(1 for md in merge_dates if md > lo and md < hi)
105+ )
106+ ax.plot(x, num_merged_per_month, label=proj)
107+ ax.legend()
108+ plt.show()
109+ ```
0 commit comments