chore: updates thread handling to avoid race condition in tests

chalmerlowe · parthea · commit 25aba4f4f455 · 2026-03-31T21:45:49.000Z
diff --git a/packages/google-cloud-spanner/google/cloud/spanner_v1/snapshot.py b/packages/google-cloud-spanner/google/cloud/spanner_v1/snapshot.py
@@ -194,19 +194,20 @@ def __init__(self, session, client_context=None):
         self._client_context = _validate_client_context(client_context)
         self._execute_sql_request_count: int = 0
         self._read_request_count: int = 0
+        self._begin_request_sent: bool = False
+
+        # Identifier for the transaction.
         self._transaction_id: Optional[bytes] = None
         self._precommit_token: Optional[MultiplexedSessionPrecommitToken] = None
         self._lock: CrossSync._Sync_Impl.Lock = CrossSync._Sync_Impl.Lock()
 
-    @property
-    def _resource_info(self):
-        """Resource information for metrics labels."""
-        database = self._session._database
-        return {
-            "project": database._instance._client.project,
-            "instance": database._instance.instance_id,
-            "database": database.database_id,
-        }
+        # Operation within a transaction can be performed using multiple
+        # threads, so we need to use a lock when updating the transaction.
+        self._lock: threading.Lock = threading.Lock()
+
+        # Event to coordinate concurrent requests beginning the transaction.
+        # This is used to prevent the "Transaction has not begun" race condition.
+        self._transaction_begin_event = threading.Event()
 
     def begin(self) -> bytes:
         """Begins a transaction on the database.
@@ -234,12 +235,109 @@ def read(
         column_info=None,
         lazy_decode=False,
     ):
-        """Perform a ``StreamingRead`` API request for rows in a table."""
-        if self._read_request_count > 0:
-            if not self._multi_use:
-                raise ValueError("Cannot re-use single-use snapshot.")
-            if self._transaction_id is None:
-                raise ValueError("Transaction has not begun.")
+        """Perform a ``StreamingRead`` API request for rows in a table.
+
+        :type table: str
+        :param table: name of the table from which to fetch data
+
+        :type columns: list of str
+        :param columns: names of columns to be retrieved
+
+        :type keyset: :class:`~google.cloud.spanner_v1.keyset.KeySet`
+        :param keyset: keys / ranges identifying rows to be retrieved
+
+        :type index: str
+        :param index: (Optional) name of index to use, rather than the
+                      table's primary key
+
+        :type limit: int
+        :param limit: (Optional) maximum number of rows to return.
+                      Incompatible with ``partition``.
+
+        :type partition: bytes
+        :param partition: (Optional) one of the partition tokens returned
+                          from :meth:`partition_read`.  Incompatible with
+                          ``limit``.
+
+        :type request_options:
+            :class:`google.cloud.spanner_v1.types.RequestOptions`
+        :param request_options:
+                (Optional) Common options for this request.
+                If a dict is provided, it must be of the same form as the protobuf
+                message :class:`~google.cloud.spanner_v1.types.RequestOptions`.
+                Please note, the `transactionTag` setting will be ignored for
+                snapshot as it's not supported for read-only transactions.
+
+        :type retry: :class:`~google.api_core.retry.Retry`
+        :param retry: (Optional) The retry settings for this request.
+
+        :type timeout: float
+        :param timeout: (Optional) The timeout for this request.
+
+        :type data_boost_enabled:
+        :param data_boost_enabled:
+                (Optional) If this is for a partitioned read and this field is
+                set ``true``, the request will be executed via offline access.
+                If the field is set to ``true`` but the request does not set
+                ``partition_token``, the API will return an
+                ``INVALID_ARGUMENT`` error.
+
+        :type directed_read_options: :class:`~google.cloud.spanner_v1.DirectedReadOptions`
+            or :class:`dict`
+        :param directed_read_options: (Optional) Request level option used to set the directed_read_options
+            for all ReadRequests and ExecuteSqlRequests that indicates which replicas
+            or regions should be used for non-transactional reads or queries.
+
+        :type column_info: dict
+        :param column_info: (Optional) dict of mapping between column names and additional column information.
+            An object where column names as keys and custom objects as corresponding
+            values for deserialization. It's specifically useful for data types like
+            protobuf where deserialization logic is on user-specific code. When provided,
+            the custom object enables deserialization of backend-received column data.
+            If not provided, data remains serialized as bytes for Proto Messages and
+            integer for Proto Enums.
+
+        :type lazy_decode: bool
+        :param lazy_decode:
+            (Optional) If this argument is set to ``true``, the iterator
+            returns the underlying protobuf values instead of decoded Python
+            objects. This reduces the time that is needed to iterate through
+            large result sets. The application is responsible for decoding
+            the data that is needed. The returned row iterator contains two
+            functions that can be used for this. ``iterator.decode_row(row)``
+            decodes all the columns in the given row to an array of Python
+            objects. ``iterator.decode_column(row, column_index)`` decodes one
+            specific column in the given row.
+
+        :rtype: :class:`~google.cloud.spanner_v1.streamed.StreamedResultSet`
+        :returns: a result set instance which can be used to consume rows.
+
+        :raises ValueError: if the Transaction already used to execute a
+            read request, but is not a multi-use transaction or has not begun.
+        """
+
+        with self._lock:
+            # Check if this request is beginning the transaction.
+            # If a request is already in progress, other requests must wait
+            # until the transaction ID is available.
+            if self._begin_request_sent or self._read_request_count > 0:
+                if not self._multi_use:
+                    raise ValueError("Cannot re-use single-use snapshot.")
+                if self._transaction_id is None:
+                    wait_needed = True
+                else:
+                    wait_needed = False
+            else:
+                wait_needed = False
+                self._begin_request_sent = True
+
+        if wait_needed:
+            # Wait for the transaction to begin (set by another concurrent request).
+            # This prevents the race condition where concurrent requests think
+            # the transaction hasn't begun.
+            if not self._transaction_begin_event.wait(timeout=30.0):
+                raise ValueError("Timed out waiting for transaction to begin.")
+
         session = self._session
         database = session._database
         api = database.spanner_api
@@ -314,12 +412,128 @@ def execute_sql(
         column_info=None,
         lazy_decode=False,
     ):
-        """Perform an ``ExecuteStreamingSql`` API request."""
-        if self._read_request_count > 0:
-            if not self._multi_use:
-                raise ValueError("Cannot re-use single-use snapshot.")
-            if self._transaction_id is None:
-                raise ValueError("Transaction has not begun.")
+        """Perform an ``ExecuteStreamingSql`` API request.
+
+        :type sql: str
+        :param sql: SQL query statement
+
+        :type params: dict, {str -> column value}
+        :param params: values for parameter replacement.  Keys must match
+                       the names used in ``sql``.
+
+        :type param_types: dict[str -> Union[dict, .types.Type]]
+        :param param_types:
+            (Optional) maps explicit types for one or more param values;
+            required if parameters are passed.
+
+        :type query_mode:
+            :class:`~google.cloud.spanner_v1.types.ExecuteSqlRequest.QueryMode`
+        :param query_mode: Mode governing return of results / query plan.
+            See:
+            `QueryMode <https://cloud.google.com/spanner/reference/rpc/google.spanner.v1#google.spanner.v1.ExecuteSqlRequest.QueryMode>`_.
+
+        :type query_options:
+            :class:`~google.cloud.spanner_v1.types.ExecuteSqlRequest.QueryOptions`
+                or :class:`dict`
+        :param query_options:
+                (Optional) Query optimizer configuration to use for the given query.
+                If a dict is provided, it must be of the same form as the protobuf
+                message :class:`~google.cloud.spanner_v1.types.QueryOptions`
+
+        :type request_options:
+            :class:`google.cloud.spanner_v1.types.RequestOptions`
+        :param request_options:
+                (Optional) Common options for this request.
+                If a dict is provided, it must be of the same form as the protobuf
+                message :class:`~google.cloud.spanner_v1.types.RequestOptions`.
+
+        :type last_statement: bool
+        :param last_statement:
+                If set to true, this option marks the end of the transaction. The
+                transaction should be committed or aborted after this statement
+                executes, and attempts to execute any other requests against this
+                transaction (including reads and queries) will be rejected. Mixing
+                mutations with statements that are marked as the last statement is
+                not allowed.
+                For DML statements, setting this option may cause some error
+                reporting to be deferred until commit time (e.g. validation of
+                unique constraints). Given this, successful execution of a DML
+                statement should not be assumed until the transaction commits.
+
+        :type partition: bytes
+        :param partition: (Optional) one of the partition tokens returned
+                          from :meth:`partition_query`.
+
+        :rtype: :class:`~google.cloud.spanner_v1.streamed.StreamedResultSet`
+        :returns: a result set instance which can be used to consume rows.
+
+        :type retry: :class:`~google.api_core.retry.Retry`
+        :param retry: (Optional) The retry settings for this request.
+
+        :type timeout: float
+        :param timeout: (Optional) The timeout for this request.
+
+        :type data_boost_enabled:
+        :param data_boost_enabled:
+                (Optional) If this is for a partitioned query and this field is
+                set ``true``, the request will be executed via offline access.
+                If the field is set to ``true`` but the request does not set
+                ``partition_token``, the API will return an
+                ``INVALID_ARGUMENT`` error.
+
+        :type directed_read_options: :class:`~google.cloud.spanner_v1.DirectedReadOptions`
+            or :class:`dict`
+        :param directed_read_options: (Optional) Request level option used to set the directed_read_options
+            for all ReadRequests and ExecuteSqlRequests that indicates which replicas
+            or regions should be used for non-transactional reads or queries.
+
+        :type column_info: dict
+        :param column_info: (Optional) dict of mapping between column names and additional column information.
+            An object where column names as keys and custom objects as corresponding
+            values for deserialization. It's specifically useful for data types like
+            protobuf where deserialization logic is on user-specific code. When provided,
+            the custom object enables deserialization of backend-received column data.
+            If not provided, data remains serialized as bytes for Proto Messages and
+            integer for Proto Enums.
+
+        :type lazy_decode: bool
+        :param lazy_decode:
+            (Optional) If this argument is set to ``true``, the iterator
+            returns the underlying protobuf values instead of decoded Python
+            objects. This reduces the time that is needed to iterate through
+            large result sets. The application is responsible for decoding
+            the data that is needed. The returned row iterator contains two
+            functions that can be used for this. ``iterator.decode_row(row)``
+            decodes all the columns in the given row to an array of Python
+            objects. ``iterator.decode_column(row, column_index)`` decodes one
+            specific column in the given row.
+
+        :raises ValueError: if the Transaction already used to execute a
+            read request, but is not a multi-use transaction or has not begun.
+        """
+
+        with self._lock:
+            # Check if this request is beginning the transaction.
+            # If a request is already in progress, other requests must wait
+            # until the transaction ID is available.
+            if self._begin_request_sent or self._read_request_count > 0:
+                if not self._multi_use:
+                    raise ValueError("Cannot re-use single-use snapshot.")
+                if self._transaction_id is None:
+                    wait_needed = True
+                else:
+                    wait_needed = False
+            else:
+                wait_needed = False
+                self._begin_request_sent = True
+
+        if wait_needed:
+            # Wait for the transaction to begin (set by another concurrent request).
+            # This prevents the race condition where concurrent requests think
+            # the transaction hasn't begun.
+            if not self._transaction_begin_event.wait(timeout=30.0):
+                raise ValueError("Timed out waiting for transaction to begin.")
+
         if params is not None:
             params_pb = Struct(
                 fields={key: _make_value_pb(value) for key, value in params.items()}
@@ -670,6 +884,9 @@ def _update_for_transaction_pb(self, transaction_pb: Transaction) -> None:
         """Updates the snapshot for the given transaction."""
         if self._transaction_id is None and transaction_pb.id:
             self._transaction_id = transaction_pb.id
+            # Notify waiting threads that the transaction has begun.
+            self._transaction_begin_event.set()
+
         if transaction_pb._pb.HasField("precommit_token"):
             self._update_for_precommit_token_pb_unsafe(transaction_pb.precommit_token)
 
diff --git a/packages/google-cloud-spanner/tests/unit/test_spanner.py b/packages/google-cloud-spanner/tests/unit/test_spanner.py
@@ -1104,6 +1104,7 @@ def test_transaction_for_concurrent_statement_should_begin_one_transaction_with_
     def test_transaction_for_concurrent_statement_should_begin_one_transaction_with_read(
         self,
     ):
+        self.maxDiff = None
         database = _Database()
         api = database.spanner_api = self._make_spanner_api()
         session = _Session(database)