-
Notifications
You must be signed in to change notification settings - Fork 1.7k
feat: Add bigframes.execution_history API to track BigQuery jobs #16588
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 2 commits
eab6cdb
e5ec3e9
44e1b62
6055998
df8dbcd
30f0a2b
8c9deb8
a46ce69
c09b946
6f11279
60a19ae
d428370
39f4c2a
b669473
2398a67
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -109,6 +109,39 @@ | |
| logger = logging.getLogger(__name__) | ||
|
|
||
|
|
||
| class _ExecutionHistory(pandas.DataFrame): | ||
| @property | ||
| def _constructor(self): | ||
| return _ExecutionHistory | ||
|
|
||
| def _repr_html_(self) -> str | None: | ||
| try: | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Would we be able to pin down the lines that are susceptible to exceptions? If yes, let's reduce the size of this try block by keeping only the code that is likely to throw exceptions.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done! I've reduced the size of the try block in repr_html by moving the imports and self.empty check outside. Now it only wraps the code that might throw exceptions during data formatting and rendering. |
||
| import bigframes.formatting_helpers as formatter | ||
|
|
||
| if self.empty: | ||
| return "<div>No executions found.</div>" | ||
|
|
||
| cols = ["job_id", "status", "total_bytes_processed", "job_url"] | ||
| df_display = self[cols].copy() | ||
| df_display["total_bytes_processed"] = df_display[ | ||
| "total_bytes_processed" | ||
| ].apply(formatter.get_formatted_bytes) | ||
|
|
||
| def format_url(url): | ||
| return f'<a target="_blank" href="{url}">Open Job</a>' if url else "" | ||
|
|
||
| df_display["job_url"] = df_display["job_url"].apply(format_url) | ||
|
|
||
| # Rename job_id to query_id to match user expectations | ||
| df_display = df_display.rename(columns={"job_id": "query_id"}) | ||
|
|
||
| compact_html = df_display.to_html(escape=False, index=False) | ||
|
|
||
| return compact_html | ||
| except Exception: | ||
| return super()._repr_html_() # type: ignore | ||
|
|
||
|
|
||
| @log_adapter.class_logger | ||
| class Session( | ||
| third_party_pandas_gbq.GBQIOMixin, | ||
|
|
@@ -233,6 +266,7 @@ def __init__( | |
| ) | ||
|
|
||
| self._metrics = metrics.ExecutionMetrics() | ||
| self._publisher.subscribe(self._metrics.on_event) | ||
| self._function_session = bff_session.FunctionSession() | ||
| self._anon_dataset_manager = anonymous_dataset.AnonymousDatasetManager( | ||
| self._clients_provider.bqclient, | ||
|
|
@@ -371,6 +405,10 @@ def slot_millis_sum(self): | |
| """The sum of all slot time used by bigquery jobs in this session.""" | ||
| return self._metrics.slot_millis | ||
|
|
||
| def execution_history(self) -> pandas.DataFrame: | ||
| """Returns a list of underlying BigQuery executions initiated by BigFrames in the current session.""" | ||
| return _ExecutionHistory([job.__dict__ for job in self._metrics.jobs]) | ||
|
|
||
| @property | ||
| def _allows_ambiguity(self) -> bool: | ||
| return self._allow_ambiguity | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why do we need execution history to be a dataframe itself? this comes with a lot of baggage
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Great point! I've refactored _ExecutionHistory to use composition instead of inheritance. It is now a standard class that wraps a DataFrame internally, avoiding the baggage of subclassing. Users can call .to_dataframe() on it to get the DataFrame representation.