diff --git a/cqlsh-scripts/analyze_clients.py b/cqlsh-scripts/analyze_clients.py new file mode 100644 index 0000000..4c66415 --- /dev/null +++ b/cqlsh-scripts/analyze_clients.py @@ -0,0 +1,114 @@ +#!/usr/bin/env python3 + +# Verified against backend 5.5 +# to be executed inside the wire-utility pod +# After dumping brig.user, galley.clients, and gundeck.user_push, we use galley.clients as the source of all known clients per user, and brig.user only to map user IDs to emails. +# gundeck.user_push.client is matched against galley.clients.clients: matching push clients are classified by ARN as iOS (APNS) or Android (GCM/FCM). +# Any client present in galley.clients.clients but missing from gundeck.user_push.client is counted as a Webapp/Desktop client. +# We count total unique users, total unique clients, per-client-type counts, per-user client-type usage, and list Webapp/Desktop clients with their user ID and email. +# Any push client present in gundeck.user_push but missing from galley.clients is reported separately as an inconsistency. + +import csv +import re +import sys +from collections import defaultdict +from pathlib import Path + +data_dir = Path(sys.argv[1]) if len(sys.argv) > 1 else Path("./cql_export") + +push_csv = data_dir / "user_push.csv" +galley_csv = data_dir / "galley_clients.csv" +brig_csv = data_dir / "brig_user.csv" + +push_by_client = {} +ios_clients = set() +android_clients = set() +ios_users = set() +android_users = set() + +with push_csv.open(newline="") as f: + for row in csv.DictReader(f): + client = row["client"].strip() + arn = row["arn"].strip() + user = row["usr"].strip() + + push_by_client[client] = {"user": user, "arn": arn} + + if ":endpoint/APNS/" in arn or ":endpoint/APNS_SANDBOX/" in arn: + ios_clients.add(client) + ios_users.add(user) + elif ":endpoint/GCM/" in arn or ":endpoint/FCM/" in arn: + android_clients.add(client) + android_users.add(user) + +galley_clients_by_user = defaultdict(set) +galley_all_clients = set() +galley_users = set() + +with galley_csv.open(newline="") as f: + for row in csv.DictReader(f): + user = row["user"].strip() + clients_raw = row["clients"].strip() + + clients = re.findall(r"[0-9a-fA-F]+", clients_raw) + if clients: + galley_users.add(user) + + for client in clients: + galley_clients_by_user[user].add(client) + galley_all_clients.add(client) + +emails = {} + +with brig_csv.open(newline="") as f: + for row in csv.DictReader(f): + emails[row["id"].strip()] = row["email"].strip() + +push_clients = set(push_by_client) + +push_clients_missing_in_galley = sorted(push_clients - galley_all_clients) +webapp_clients = sorted(galley_all_clients - push_clients) + +missing_in_galley_by_user = defaultdict(list) +for client in push_clients_missing_in_galley: + user = push_by_client[client]["user"] + missing_in_galley_by_user[user].append(client) + +webapp_clients_by_user = defaultdict(list) +for user, clients in galley_clients_by_user.items(): + for client in sorted(clients - push_clients): + webapp_clients_by_user[user].append(client) + +all_known_clients = galley_all_clients | push_clients +all_known_users = galley_users | {v["user"] for v in push_by_client.values()} + +print("=== Total known clients and users ===") +print(f"Unique users: {len(all_known_users)}") +print(f"Total clients: {len(all_known_clients)}") +print() + +print("=== Client type summary ===") +print(f"iOS clients: {len(ios_clients)}") +print(f"Android clients: {len(android_clients)}") +print(f"Webapp/Desktop clients: {len(webapp_clients)}") +print(f"Unknown push clients not in galley.clients: {len(push_clients_missing_in_galley)}") +print() + +print("=== Client Type usage accros the users ===") +print(f"Users using iOS: {len(ios_users)}") +print(f"Users using Android: {len(android_users)}") +print(f"Users using Webapp/Desktop: {len(webapp_clients_by_user)}") +print(f"Total users using Mobile clients(iOS/Android): {len(ios_users | android_users)}") +print() + +print("=== Push clients (iOS/Android) if not present in galley.clients (all clients) ===") +print(f"Count: {len(push_clients_missing_in_galley)}") +for user, clients in sorted(missing_in_galley_by_user.items()): + print(f"user={user} email={emails.get(user, 'UNKNOWN')} clients={clients}") +print() + +print("=== Webapp/Desktop clients: galley.clients not present in gundeck.user_push ===") +print(f"Count: {len(webapp_clients)}") +for user, clients in sorted(webapp_clients_by_user.items()): + print(f"user={user} email={emails.get(user, 'UNKNOWN')} clients={clients}") + diff --git a/cqlsh-scripts/collect_user_cql_data.sh b/cqlsh-scripts/collect_user_cql_data.sh new file mode 100644 index 0000000..c846f13 --- /dev/null +++ b/cqlsh-scripts/collect_user_cql_data.sh @@ -0,0 +1,15 @@ + +#!/usr/bin/env bash +# to be executed inside the wire-utility pod +set -euo pipefail + +CQLSH="${CQLSH:-cqlsh}" +OUT_DIR="${OUT_DIR:-./cql_export}" + +mkdir -p "$OUT_DIR" + +$CQLSH -e "COPY gundeck.user_push (client, arn, usr) TO '$OUT_DIR/user_push.csv' WITH HEADER = true AND PAGESIZE = 100 AND NUMPROCESSES = 1;" +$CQLSH -e "COPY galley.clients (user, clients) TO '$OUT_DIR/galley_clients.csv' WITH HEADER = true AND PAGESIZE = 100 AND NUMPROCESSES = 1;" +$CQLSH -e "COPY brig.user (id, email) TO '$OUT_DIR/brig_user.csv' WITH HEADER = true AND PAGESIZE = 100 AND NUMPROCESSES = 1;" + +echo "CSV files written to: $OUT_DIR"