From 75a421c0179adeffb2001bf414cc5a566d11de35 Mon Sep 17 00:00:00 2001 From: KirillKurdyukov Date: Mon, 2 Mar 2026 17:33:53 +0300 Subject: [PATCH 1/8] feat: otel tracing and metric support --- .../tech/ydb/core/grpc/GrpcTransport.java | 6 + .../ydb/core/grpc/GrpcTransportBuilder.java | 12 + .../tech/ydb/core/impl/YdbTransportImpl.java | 8 + .../java/tech/ydb/core/metrics/Meter.java | 9 + .../java/tech/ydb/core/metrics/NoopMeter.java | 30 ++ .../ydb/core/metrics/SessionPoolObserver.java | 7 + .../ydb/core/tracing/OpenTelemetryTracer.java | 19 +- .../main/java/tech/ydb/core/tracing/Span.java | 55 +-- .../tech/ydb/core/tracing/SpanFinalizer.java | 46 ++ opentelemetry/pom.xml | 60 +++ .../ydb/opentelemetry/OpenTelemetryMeter.java | 99 ++++ .../opentelemetry/OpenTelemetryTracer.java | 93 ++++ .../OpenTelemetryMetricsIntegrationTest.java | 192 ++++++++ .../tech/ydb/query/impl/QueryServiceRpc.java | 7 + .../java/tech/ydb/query/impl/SessionImpl.java | 127 +++-- .../java/tech/ydb/query/impl/SessionPool.java | 51 ++- .../tech/ydb/query/impl/TableClientImpl.java | 17 +- .../tech/ydb/query/impl/QueryTracingTest.java | 433 +----------------- .../ydb/table/impl/SimpleTableClient.java | 6 - 19 files changed, 758 insertions(+), 519 deletions(-) create mode 100644 core/src/main/java/tech/ydb/core/metrics/Meter.java create mode 100644 core/src/main/java/tech/ydb/core/metrics/NoopMeter.java create mode 100644 core/src/main/java/tech/ydb/core/metrics/SessionPoolObserver.java create mode 100644 core/src/main/java/tech/ydb/core/tracing/SpanFinalizer.java create mode 100644 opentelemetry/pom.xml create mode 100644 opentelemetry/src/main/java/tech/ydb/opentelemetry/OpenTelemetryMeter.java create mode 100644 opentelemetry/src/main/java/tech/ydb/opentelemetry/OpenTelemetryTracer.java create mode 100644 opentelemetry/src/test/java/tech/ydb/opentelemetry/OpenTelemetryMetricsIntegrationTest.java diff --git a/core/src/main/java/tech/ydb/core/grpc/GrpcTransport.java b/core/src/main/java/tech/ydb/core/grpc/GrpcTransport.java index 96816601f..328786874 100644 --- a/core/src/main/java/tech/ydb/core/grpc/GrpcTransport.java +++ b/core/src/main/java/tech/ydb/core/grpc/GrpcTransport.java @@ -12,6 +12,8 @@ import io.grpc.MethodDescriptor; import tech.ydb.core.Result; +import tech.ydb.core.metrics.Meter; +import tech.ydb.core.metrics.NoopMeter; import tech.ydb.core.tracing.NoopTracer; import tech.ydb.core.tracing.Tracer; import tech.ydb.core.utils.URITools; @@ -46,6 +48,10 @@ default Tracer getTracer() { return NoopTracer.getInstance(); } + default Meter getMeter() { + return NoopMeter.INSTANCE; + } + @Override void close(); diff --git a/core/src/main/java/tech/ydb/core/grpc/GrpcTransportBuilder.java b/core/src/main/java/tech/ydb/core/grpc/GrpcTransportBuilder.java index 534cf08be..a77cab841 100644 --- a/core/src/main/java/tech/ydb/core/grpc/GrpcTransportBuilder.java +++ b/core/src/main/java/tech/ydb/core/grpc/GrpcTransportBuilder.java @@ -26,6 +26,8 @@ import tech.ydb.core.impl.auth.GrpcAuthRpc; import tech.ydb.core.impl.pool.ChannelFactoryLoader; import tech.ydb.core.impl.pool.ManagedChannelFactory; +import tech.ydb.core.metrics.Meter; +import tech.ydb.core.metrics.NoopMeter; import tech.ydb.core.tracing.NoopTracer; import tech.ydb.core.tracing.Tracer; import tech.ydb.core.utils.Version; @@ -87,6 +89,7 @@ public enum InitMode { private GrpcCompression compression = GrpcCompression.NO_COMPRESSION; private InitMode initMode = InitMode.SYNC; private Tracer tracer = NoopTracer.getInstance(); + private Meter meter = NoopMeter.getInstance(); /** * can cause leaks https://github.com/grpc/grpc-java/issues/9340 @@ -195,6 +198,10 @@ public Tracer getTracer() { return tracer; } + public Meter getMeter() { + return meter; + } + public ManagedChannelFactory getManagedChannelFactory() { if (channelFactoryBuilder == null) { channelFactoryBuilder = ChannelFactoryLoader.load(); @@ -423,6 +430,11 @@ public GrpcTransportBuilder withTracer(Tracer tracer) { return this; } + public GrpcTransportBuilder withMeter(Meter meter) { + this.meter = Objects.requireNonNull(meter, "meter is null"); + return this; + } + /** * use {@link GrpcTransportBuilder#withGrpcRetry(boolean) } instead * @return this diff --git a/core/src/main/java/tech/ydb/core/impl/YdbTransportImpl.java b/core/src/main/java/tech/ydb/core/impl/YdbTransportImpl.java index 52d4ba018..768634801 100644 --- a/core/src/main/java/tech/ydb/core/impl/YdbTransportImpl.java +++ b/core/src/main/java/tech/ydb/core/impl/YdbTransportImpl.java @@ -21,6 +21,7 @@ import tech.ydb.core.impl.pool.GrpcChannel; import tech.ydb.core.impl.pool.GrpcChannelPool; import tech.ydb.core.impl.pool.ManagedChannelFactory; +import tech.ydb.core.metrics.Meter; import tech.ydb.core.tracing.Tracer; /** @@ -37,6 +38,7 @@ public class YdbTransportImpl extends BaseGrpcTransport { private final GrpcChannelPool channelPool; private final YdbDiscovery discovery; private final Tracer tracer; + private final Meter meter; public YdbTransportImpl(GrpcTransportBuilder builder) { super(builder); @@ -45,6 +47,7 @@ public YdbTransportImpl(GrpcTransportBuilder builder) { this.database = Strings.nullToEmpty(builder.getDatabase()); this.tracer = builder.getTracer(); + this.meter = builder.getMeter(); logger.info("Create YDB transport with endpoint {} and {}", serverEndpoint, balancingSettings); @@ -124,6 +127,11 @@ public Tracer getTracer() { return tracer; } + @Override + public Meter getMeter() { + return meter; + } + @Override public AuthCallOptions getAuthCallOptions() { return callOptions; diff --git a/core/src/main/java/tech/ydb/core/metrics/Meter.java b/core/src/main/java/tech/ydb/core/metrics/Meter.java new file mode 100644 index 000000000..c94913257 --- /dev/null +++ b/core/src/main/java/tech/ydb/core/metrics/Meter.java @@ -0,0 +1,9 @@ +package tech.ydb.core.metrics; + +import tech.ydb.core.Status; + +public interface Meter { + void recordOperation(String name, long durationNanos, Status status); + void registerSessionPool(String poolName, SessionPoolObserver observer); + void recordSessionCreateTime(String poolName, long durationNanos); +} diff --git a/core/src/main/java/tech/ydb/core/metrics/NoopMeter.java b/core/src/main/java/tech/ydb/core/metrics/NoopMeter.java new file mode 100644 index 000000000..cd126efd5 --- /dev/null +++ b/core/src/main/java/tech/ydb/core/metrics/NoopMeter.java @@ -0,0 +1,30 @@ +package tech.ydb.core.metrics; + +import tech.ydb.core.Status; + +public final class NoopMeter implements Meter { + public static final NoopMeter INSTANCE = new NoopMeter(); + + private NoopMeter() { + // No operations. + } + + public static NoopMeter getInstance() { + return INSTANCE; + } + + @Override + public void recordOperation(String name, long durationNanos, Status status) { + + } + + @Override + public void registerSessionPool(String poolName, SessionPoolObserver observer) { + + } + + @Override + public void recordSessionCreateTime(String poolName, long durationNanos) { + + } +} diff --git a/core/src/main/java/tech/ydb/core/metrics/SessionPoolObserver.java b/core/src/main/java/tech/ydb/core/metrics/SessionPoolObserver.java new file mode 100644 index 000000000..4b872c6f5 --- /dev/null +++ b/core/src/main/java/tech/ydb/core/metrics/SessionPoolObserver.java @@ -0,0 +1,7 @@ +package tech.ydb.core.metrics; + +public interface SessionPoolObserver { + int getIdleCount(); + int getUsedCount(); + int getPendingCount(); +} diff --git a/core/src/main/java/tech/ydb/core/tracing/OpenTelemetryTracer.java b/core/src/main/java/tech/ydb/core/tracing/OpenTelemetryTracer.java index 4fc7025a8..e8f13ba7e 100644 --- a/core/src/main/java/tech/ydb/core/tracing/OpenTelemetryTracer.java +++ b/core/src/main/java/tech/ydb/core/tracing/OpenTelemetryTracer.java @@ -94,16 +94,6 @@ public void setAttribute(String key, long value) { @Override public void setStatus(@Nullable Status status, @Nullable Throwable error) { - if (status != null) { - if (status.isSuccess()) { - span.setStatus(StatusCode.OK); - } else { - tech.ydb.core.StatusCode code = status.getCode(); - span.setAttribute("db.response.status_code", code.toString()); - span.setAttribute("error.type", code.isTransportError() ? "transport_error" : "ydb_error"); - span.setStatus(StatusCode.ERROR, status.toString()); - } - } if (error != null) { if (error instanceof UnexpectedResultException) { tech.ydb.core.StatusCode code = ((UnexpectedResultException) error).getStatus().getCode(); @@ -113,7 +103,16 @@ public void setStatus(@Nullable Status status, @Nullable Throwable error) { span.setAttribute("error.type", error.getClass().getName()); } span.setStatus(StatusCode.ERROR, error.getMessage()); + return; + } + if (status != null && !status.isSuccess()) { + tech.ydb.core.StatusCode code = status.getCode(); + span.setAttribute("db.response.status_code", code.toString()); + span.setAttribute("error.type", code.isTransportError() ? "transport_error" : "ydb_error"); + span.setStatus(StatusCode.ERROR, status.toString()); + return; } + span.setStatus(StatusCode.OK); } @Override diff --git a/core/src/main/java/tech/ydb/core/tracing/Span.java b/core/src/main/java/tech/ydb/core/tracing/Span.java index b482bc6cb..f4c63b60d 100644 --- a/core/src/main/java/tech/ydb/core/tracing/Span.java +++ b/core/src/main/java/tech/ydb/core/tracing/Span.java @@ -16,20 +16,9 @@ @ExperimentalApi("YDB Tracer is experimental and API may change without notice") public interface Span { Span NOOP = new Span() { + // No operations. }; - /** - * Returns W3C traceparent value for request propagation. - * - *

For {@link #NOOP} this returns an empty string. Check {@link #isValid()} to decide whether - * trace headers should be sent to server. - * - * @return traceparent value - */ - default String getId() { - return ""; - } - /** * Indicates whether this span carries a real tracing context. * @@ -39,23 +28,6 @@ default boolean isValid() { return false; } - /** - * Sets a string attribute on the span. - * - * @param key attribute key - * @param value attribute value, may be null - */ - default void setAttribute(String key, @Nullable String value) { - } - - /** - * Sets a long attribute on the span. - * - * @param key attribute key - * @param value attribute value - */ - default void setAttribute(String key, long value) { - } /** * Sets span status (success or error) with human-readable message. @@ -66,6 +38,10 @@ default void setAttribute(String key, long value) { default void setStatus(@Nullable Status status, @Nullable Throwable error) { } + default String getId() { + return ""; + } + /** * Makes this span current in the active execution context. * @@ -76,6 +52,10 @@ default Scope makeCurrent() { }; } + /** Sets a string attribute on the span (ignored by Noop implementation). */ + default void setAttribute(String key, String value) { + } + /** * Restores context captured when this span was created. * @@ -88,10 +68,11 @@ default Scope restoreContext() { }; } - /** - * Ends (finishes) this span. - */ - default void end() { + /** Sets a long attribute on the span (ignored by Noop implementation). */ + default void setAttribute(String key, long value) { + } + + default void setError(Status status) { } /** @@ -101,6 +82,7 @@ default void end() { * @param span the span to finalize * @param future the future to observe * @return the same future (for chaining) + * Sets span status to error from exception. */ static CompletableFuture endOnStatus(Span span, CompletableFuture future) { return span.isValid() ? future.whenComplete((status, th) -> { @@ -109,6 +91,9 @@ static CompletableFuture endOnStatus(Span span, CompletableFuture CompletableFuture> endOnResult(Span span, CompletableFuture span.end(); }) : future; } + + default void end() { + } } + diff --git a/core/src/main/java/tech/ydb/core/tracing/SpanFinalizer.java b/core/src/main/java/tech/ydb/core/tracing/SpanFinalizer.java new file mode 100644 index 000000000..25db26cd2 --- /dev/null +++ b/core/src/main/java/tech/ydb/core/tracing/SpanFinalizer.java @@ -0,0 +1,46 @@ +package tech.ydb.core.tracing; + +import java.util.function.BiConsumer; + +import tech.ydb.core.Status; +import tech.ydb.core.utils.FutureTools; + +/** + * Shared helpers to finish spans for status/exception outcomes. + */ +public final class SpanFinalizer { + private SpanFinalizer() { + } + + public static void finishByStatus(Span span, Status status) { + if (span == null) { + return; + } + + span.setStatus(status, null); + span.end(); + } + + public static void finishByError(Span span, Throwable error) { + if (span == null) { + return; + } + + span.setStatus(null, error != null ? FutureTools.unwrapCompletionException(error) : null); + span.end(); + } + + public static BiConsumer whenComplete(Span span) { + return (status, error) -> { + if (span == null) { + return; + } + + if (error != null) { + finishByError(span, error); + return; + } + finishByStatus(span, status); + }; + } +} diff --git a/opentelemetry/pom.xml b/opentelemetry/pom.xml new file mode 100644 index 000000000..9f5647393 --- /dev/null +++ b/opentelemetry/pom.xml @@ -0,0 +1,60 @@ + + + 4.0.0 + + + tech.ydb + ydb-sdk-parent + 2.4.1-SNAPSHOT + + + ydb-sdk-opentelemetry + OpenTelemetry integration for YDB Java SDK + OpenTelemetry integration module for tracing and metrics in YDB SDK + + + 1.59.0 + + + + + tech.ydb + ydb-sdk-core + + + io.opentelemetry + opentelemetry-api + ${opentelemetry.version} + + + io.opentelemetry + opentelemetry-sdk + 1.59.0 + test + + + junit + junit + test + + + tech.ydb + ydb-sdk-query + test + + + tech.ydb.test + ydb-junit4-support + test + + + io.opentelemetry + opentelemetry-sdk-testing + 1.59.0 + test + + + diff --git a/opentelemetry/src/main/java/tech/ydb/opentelemetry/OpenTelemetryMeter.java b/opentelemetry/src/main/java/tech/ydb/opentelemetry/OpenTelemetryMeter.java new file mode 100644 index 000000000..63867688b --- /dev/null +++ b/opentelemetry/src/main/java/tech/ydb/opentelemetry/OpenTelemetryMeter.java @@ -0,0 +1,99 @@ +package tech.ydb.opentelemetry; + +import java.util.Objects; + +import io.opentelemetry.api.GlobalOpenTelemetry; +import io.opentelemetry.api.OpenTelemetry; +import io.opentelemetry.api.common.AttributeKey; +import io.opentelemetry.api.common.Attributes; +import io.opentelemetry.api.metrics.DoubleHistogram; +import io.opentelemetry.api.metrics.LongCounter; + +import tech.ydb.core.Status; +import tech.ydb.core.metrics.Meter; +import tech.ydb.core.metrics.SessionPoolObserver; + +public final class OpenTelemetryMeter implements Meter { + private final io.opentelemetry.api.metrics.Meter meter; + + private final DoubleHistogram operationDuration; // db.client.operation.duration + private final LongCounter operationFailed; // ydb.client.operation.failed + private final DoubleHistogram sessionCreateTime; // ydb.query.session.create_time + + private final Attributes baseAttributes; // db.system.name, db.namespace, server.address, server.port + + private OpenTelemetryMeter(io.opentelemetry.api.metrics.Meter meter, + String database, String host, int port) { + this.meter = Objects.requireNonNull(meter, "meter is null"); + this.operationDuration = meter.histogramBuilder("db.client.operation.duration") + .setUnit("s") + .build(); + this.operationFailed = meter.counterBuilder("ydb.client.operation.failed") + .setUnit("{command}") + .build(); + this.sessionCreateTime = meter.histogramBuilder("ydb.query.session.create_time") + .setUnit("s") + .build(); + + this.baseAttributes = Attributes.of( + AttributeKey.stringKey("db.system.name"), "ydb", + AttributeKey.stringKey("db.namespace"), database, + AttributeKey.stringKey("server.address"), host, + AttributeKey.longKey("server.port"), (long) port + ); + } + + public static OpenTelemetryMeter fromOpenTelemetry(OpenTelemetry openTelemetry, + String database, String host, int port) { + io.opentelemetry.api.metrics.Meter meter = openTelemetry + .getMeter("tech.ydb.sdk"); + + return new OpenTelemetryMeter(meter, database, host, port); + } + + public static OpenTelemetryMeter createGlobal(String database, String host, int port) { + return fromOpenTelemetry(GlobalOpenTelemetry.get(), database, host, port); + } + + @Override + public void recordOperation(String name, long durationNanos, Status status) { + Attributes attrs = baseAttributes.toBuilder() + .put("ydb.operation.name", name) + .build(); + + operationDuration.record(durationNanos / 1_000_000_000.0, attrs); + + if (status != null && !status.isSuccess()) { + Attributes errAttrs = attrs.toBuilder() + .put("db.response.status_code", status.getCode().toString()) + .build(); + operationFailed.add(1, errAttrs); + } + } + + @Override + public void registerSessionPool(String poolName, SessionPoolObserver observer) { + meter.upDownCounterBuilder("ydb.query.session.count") + .setUnit("{connection}") + .buildWithCallback(measurement -> { + Attributes idle = Attributes.of( + AttributeKey.stringKey("ydb.query.session.pool.name"), poolName, + AttributeKey.stringKey("ydb.query.session.state"), "idle" + ); + Attributes used = Attributes.of( + AttributeKey.stringKey("ydb.query.session.pool.name"), poolName, + AttributeKey.stringKey("ydb.query.session.state"), "used" + ); + measurement.record(observer.getIdleCount(), idle); + measurement.record(observer.getUsedCount(), used); + }); + } + + @Override + public void recordSessionCreateTime(String poolName, long durationNanos) { + Attributes attrs = Attributes.of( + AttributeKey.stringKey("ydb.query.session.pool.name"), poolName + ); + sessionCreateTime.record(durationNanos / 1_000_000_000.0, attrs); + } +} diff --git a/opentelemetry/src/main/java/tech/ydb/opentelemetry/OpenTelemetryTracer.java b/opentelemetry/src/main/java/tech/ydb/opentelemetry/OpenTelemetryTracer.java new file mode 100644 index 000000000..27665b3df --- /dev/null +++ b/opentelemetry/src/main/java/tech/ydb/opentelemetry/OpenTelemetryTracer.java @@ -0,0 +1,93 @@ +package tech.ydb.opentelemetry; + +import java.util.Objects; + +import io.opentelemetry.api.GlobalOpenTelemetry; +import io.opentelemetry.api.OpenTelemetry; +import io.opentelemetry.api.trace.StatusCode; + +import tech.ydb.core.Status; +import tech.ydb.core.tracing.Span; +import tech.ydb.core.tracing.SpanKind; +import tech.ydb.core.tracing.Tracer; + +public final class OpenTelemetryTracer implements Tracer { + private static final String DEFAULT_SCOPE = "tech.ydb.sdk"; + + private final io.opentelemetry.api.trace.Tracer tracer; + + private OpenTelemetryTracer(io.opentelemetry.api.trace.Tracer tracer) { + this.tracer = Objects.requireNonNull(tracer, "tracer is null"); + } + + public static OpenTelemetryTracer createGlobal() { + return fromOpenTelemetry(GlobalOpenTelemetry.get()); + } + + public static OpenTelemetryTracer fromOpenTelemetry(OpenTelemetry openTelemetry) { + return fromOpenTelemetry(openTelemetry, DEFAULT_SCOPE); + } + + public static OpenTelemetryTracer fromOpenTelemetry(OpenTelemetry openTelemetry, String scopeName) { + Objects.requireNonNull(openTelemetry, "openTelemetry is null"); + Objects.requireNonNull(scopeName, "scopeName is null"); + return new OpenTelemetryTracer(openTelemetry.getTracer(scopeName)); + } + + @Override + public Span startSpan(String spanName, SpanKind spanKind) { + io.opentelemetry.api.trace.Span span = tracer.spanBuilder(spanName) + .setSpanKind(mapSpanKind(spanKind)) + .startSpan(); + return new OtelSpan(span); + } + + private static io.opentelemetry.api.trace.SpanKind mapSpanKind(SpanKind kind) { + if (kind == SpanKind.CLIENT) { + return io.opentelemetry.api.trace.SpanKind.CLIENT; + } + return io.opentelemetry.api.trace.SpanKind.INTERNAL; + } + + private static final class OtelSpan implements Span { + private final io.opentelemetry.api.trace.Span span; + + private OtelSpan(io.opentelemetry.api.trace.Span span) { + this.span = span; + } + + @Override + public String getId() { + return "00-" + span.getSpanContext().getTraceId() + "-" + span.getSpanContext().getSpanId() + "01"; + } + + @Override + public void setAttribute(String key, String value) { + span.setAttribute(key, value); + } + + @Override + public void setAttribute(String key, long value) { + span.setAttribute(key, value); + } + + @Override + public void setError(Status status) { + span.setAttribute("db.response.status_code", status.getCode().toString()); + span.setAttribute("error.type", status.getCode().isTransportError() ? "transport_error" : "ydb_error"); + + span.setStatus(StatusCode.ERROR, status.toString()); + } + + @Override + public void setError(Throwable error) { + span.setAttribute("error.type", error.getClass().getName()); + span.setStatus(StatusCode.ERROR, error.getMessage()); + } + + @Override + public void end() { + span.end(); + } + } +} diff --git a/opentelemetry/src/test/java/tech/ydb/opentelemetry/OpenTelemetryMetricsIntegrationTest.java b/opentelemetry/src/test/java/tech/ydb/opentelemetry/OpenTelemetryMetricsIntegrationTest.java new file mode 100644 index 000000000..dc4d5c736 --- /dev/null +++ b/opentelemetry/src/test/java/tech/ydb/opentelemetry/OpenTelemetryMetricsIntegrationTest.java @@ -0,0 +1,192 @@ +package tech.ydb.opentelemetry; + +import java.io.IOException; +import java.time.Duration; +import java.util.Collection; + +import io.opentelemetry.api.OpenTelemetry; +import io.opentelemetry.api.common.AttributeKey; +import io.opentelemetry.sdk.OpenTelemetrySdk; +import io.opentelemetry.sdk.metrics.SdkMeterProvider; +import io.opentelemetry.sdk.metrics.data.HistogramPointData; +import io.opentelemetry.sdk.metrics.data.LongPointData; +import io.opentelemetry.sdk.metrics.data.MetricData; +import io.opentelemetry.sdk.testing.exporter.InMemoryMetricReader; +import org.junit.After; +import org.junit.AfterClass; +import org.junit.Assert; +import org.junit.Before; +import org.junit.BeforeClass; +import org.junit.ClassRule; +import org.junit.Test; + +import tech.ydb.auth.TokenAuthProvider; +import tech.ydb.common.transaction.TxMode; +import tech.ydb.core.grpc.GrpcTransport; +import tech.ydb.query.QueryClient; +import tech.ydb.query.QuerySession; +import tech.ydb.query.QueryTransaction; +import tech.ydb.test.junit4.YdbHelperRule; + +public class OpenTelemetryMetricsIntegrationTest { + @ClassRule + public static final YdbHelperRule YDB = new YdbHelperRule(); + + private static final AttributeKey DB_SYSTEM_NAME = AttributeKey.stringKey("db.system.name"); + private static final AttributeKey DB_NAMESPACE = AttributeKey.stringKey("db.namespace"); + private static final AttributeKey SERVER_ADDRESS = AttributeKey.stringKey("server.address"); + private static final AttributeKey SERVER_PORT = AttributeKey.longKey("server.port"); + private static final AttributeKey OPERATION_NAME = AttributeKey.stringKey("ydb.operation.name"); + + private static InMemoryMetricReader metricReader; + private static SdkMeterProvider meterProvider; + private static GrpcTransport transport; + + private QueryClient queryClient; + + @BeforeClass + public static void initTransport() { + metricReader = InMemoryMetricReader.create(); + meterProvider = SdkMeterProvider.builder() + .registerMetricReader(metricReader) + .build(); + + OpenTelemetry openTelemetry = OpenTelemetrySdk.builder() + .setMeterProvider(meterProvider) + .build(); + + String host = extractHost(YDB.endpoint()); + int port = extractPort(YDB.endpoint()); + + transport = GrpcTransport.forEndpoint(YDB.endpoint(), YDB.database()) + .withAuthProvider(new TokenAuthProvider(YDB.authToken())) + .withMeter(OpenTelemetryMeter.fromOpenTelemetry(openTelemetry, YDB.database(), host, port)) + .build(); + } + + @AfterClass + public static void closeTransport() throws IOException { + transport.close(); + meterProvider.close(); + metricReader.close(); + } + + @Before + public void initClient() { + queryClient = QueryClient.newClient(transport).build(); + } + + @After + public void closeClient() { + queryClient.close(); + } + + @Test + public void executeQueryRecordsOperationDuration() { + try (QuerySession session = queryClient.createSession(Duration.ofSeconds(5)).join().getValue()) { + session.createQuery("SELECT 1", TxMode.NONE).execute().join().getStatus().expectSuccess(); + } + + MetricData metric = findMetric("db.client.operation.duration"); + Assert.assertNotNull("db.client.operation.duration metric not found", metric); + + HistogramPointData point = findHistogramPoint(metric, "ydb.ExecuteQuery"); + Assert.assertNotNull("No histogram point for ydb.ExecuteQuery", point); + Assert.assertTrue("Duration must be > 0", point.getSum() > 0); + Assert.assertEquals("ydb", point.getAttributes().get(DB_SYSTEM_NAME)); + Assert.assertEquals(YDB.database(), point.getAttributes().get(DB_NAMESPACE)); + Assert.assertNotNull(point.getAttributes().get(SERVER_ADDRESS)); + Assert.assertNotNull(point.getAttributes().get(SERVER_PORT)); + } + + @Test + public void commitAndRollbackRecordOperationDuration() { + try (QuerySession session = queryClient.createSession(Duration.ofSeconds(5)).join().getValue()) { + QueryTransaction txCommit = session.beginTransaction(TxMode.SERIALIZABLE_RW) + .join().getValue(); + txCommit.createQuery("SELECT 1").execute().join().getStatus().expectSuccess(); + txCommit.commit().join().getStatus().expectSuccess(); + + QueryTransaction txRollback = session.beginTransaction(TxMode.SERIALIZABLE_RW) + .join().getValue(); + txRollback.createQuery("SELECT 1").execute().join().getStatus().expectSuccess(); + txRollback.rollback().join().expectSuccess(); + } + + MetricData metric = findMetric("db.client.operation.duration"); + Assert.assertNotNull(metric); + + Assert.assertNotNull("No histogram point for ydb.Commit", + findHistogramPoint(metric, "ydb.Commit")); + Assert.assertNotNull("No histogram point for ydb.Rollback", + findHistogramPoint(metric, "ydb.Rollback")); + } + + @Test + public void failedOperationRecordsFailedCounter() { + try (QuerySession session = queryClient.createSession(Duration.ofSeconds(5)).join().getValue()) { + session.createQuery("SELECT * FROM __nonexistent_table__", TxMode.NONE) + .execute().join(); + } + + MetricData metric = findMetric("ydb.client.operation.failed"); + Assert.assertNotNull("ydb.client.operation.failed metric not found", metric); + + Collection points = metric.getLongSumData().getPoints(); + Assert.assertFalse("Failed counter must have at least one point", points.isEmpty()); + long total = points.stream().mapToLong(LongPointData::getValue).sum(); + Assert.assertTrue("Failed counter must be > 0", total > 0); + } + + @Test + public void sessionPoolMetricsAreReported() { + // создаём сессию чтобы пул оживился + try (QuerySession session = queryClient.createSession(Duration.ofSeconds(5)).join().getValue()) { + session.createQuery("SELECT 1", TxMode.NONE).execute().join().getStatus().expectSuccess(); + } + + MetricData metric = findMetric("ydb.query.session.count"); + Assert.assertNotNull("ydb.query.session.count metric not found", metric); + } + + // --- helpers --- + + private MetricData findMetric(String name) { + Collection metrics = metricReader.collectAllMetrics(); + for (MetricData m : metrics) { + if (name.equals(m.getName())) { + return m; + } + } + return null; + } + + private HistogramPointData findHistogramPoint(MetricData metric, String operationName) { + for (HistogramPointData point : metric.getHistogramData().getPoints()) { + String op = point.getAttributes().get(OPERATION_NAME); + if (operationName.equals(op)) { + return point; + } + } + return null; + } + + private static String extractHost(String endpoint) { + // endpoint вида grpc://host:port или host:port + String stripped = endpoint.replaceFirst("grpcs?://", ""); + int colon = stripped.lastIndexOf(':'); + return colon >= 0 ? stripped.substring(0, colon) : stripped; + } + + private static int extractPort(String endpoint) { + String stripped = endpoint.replaceFirst("grpcs?://", ""); + int colon = stripped.lastIndexOf(':'); + if (colon >= 0) { + try { + return Integer.parseInt(stripped.substring(colon + 1)); + } catch (NumberFormatException ignored) { + } + } + return 2135; + } +} diff --git a/query/src/main/java/tech/ydb/query/impl/QueryServiceRpc.java b/query/src/main/java/tech/ydb/query/impl/QueryServiceRpc.java index 66c2e0a6c..6038c447a 100644 --- a/query/src/main/java/tech/ydb/query/impl/QueryServiceRpc.java +++ b/query/src/main/java/tech/ydb/query/impl/QueryServiceRpc.java @@ -6,6 +6,7 @@ import tech.ydb.core.grpc.GrpcReadStream; import tech.ydb.core.grpc.GrpcRequestSettings; import tech.ydb.core.grpc.GrpcTransport; +import tech.ydb.core.metrics.Meter; import tech.ydb.core.operation.StatusExtractor; import tech.ydb.core.tracing.Span; import tech.ydb.core.tracing.SpanKind; @@ -51,16 +52,22 @@ class QueryServiceRpc { private final GrpcTransport transport; private final Tracer trace; + private final Meter meter; QueryServiceRpc(GrpcTransport transport) { this.transport = transport; this.trace = transport.getTracer(); + this.meter = transport.getMeter(); } Span startSpan(String spanName) { return trace.startSpan(spanName, SpanKind.CLIENT); } + Meter getMeter() { + return meter; + } + public CompletableFuture> createSession( YdbQuery.CreateSessionRequest request, GrpcRequestSettings settings) { return transport diff --git a/query/src/main/java/tech/ydb/query/impl/SessionImpl.java b/query/src/main/java/tech/ydb/query/impl/SessionImpl.java index f4c5a4065..4ba4308c4 100644 --- a/query/src/main/java/tech/ydb/query/impl/SessionImpl.java +++ b/query/src/main/java/tech/ydb/query/impl/SessionImpl.java @@ -27,6 +27,7 @@ import tech.ydb.core.operation.StatusExtractor; import tech.ydb.core.settings.BaseRequestSettings; import tech.ydb.core.tracing.Span; +import tech.ydb.core.tracing.SpanFinalizer; import tech.ydb.core.utils.URITools; import tech.ydb.core.utils.UpdatableOptional; import tech.ydb.proto.ValueProtos; @@ -144,15 +145,14 @@ GrpcReadStream attach(AttachSessionSettings settings) { YdbQuery.AttachSessionRequest request = YdbQuery.AttachSessionRequest.newBuilder() .setSessionId(sessionId) .build(); - // Execute attachSession call outside current context to avoid cancellation and deadline propagation + // Execute attachSession call outside current context to avoid cancellation and deadline propogation Context ctx = Context.ROOT.fork(); Context previous = ctx.attach(); try { - AtomicBoolean pessimizationHook = new AtomicBoolean(false); - + AtomicBoolean pessimize = new AtomicBoolean(false); GrpcRequestSettings grpcSettings = makeOptions(settings) - .withPessimizationHook(pessimizationHook::get) .disableDeadline() + .withPessimizationHook(pessimize::get) .build(); GrpcReadStream origin = rpc.attachSession(request, grpcSettings); return new GrpcReadStream() { @@ -163,22 +163,17 @@ public CompletableFuture start(GrpcReadStream.Observer observer) String msg = TextFormat.shortDebugString(message); logger.trace("session '{}' got attach stream message {}", sessionId, msg); } + if (message.hasNodeShutdown() || message.hasSessionShutdown()) { + if (message.hasNodeShutdown() && nodeID != 0) { + pessimize.set(true); + } + updateSessionState(Status.of(StatusCode.BAD_SESSION)); + observer.onNext(Status.of(StatusCode.BAD_SESSION)); + return; + } StatusCode code = StatusCode.fromProto(message.getStatus()); Status status = Status.of(code, Issue.fromPb(message.getIssuesList())); updateSessionState(status); - // The hint is sent by the server with a success status. - switch (message.getSessionHintCase()) { - case NODE_SHUTDOWN: - pessimizationHook.set(nodeID != 0); - updateSessionState(Status.of(StatusCode.BAD_SESSION)); - break; - case SESSION_SHUTDOWN: - updateSessionState(Status.of(StatusCode.BAD_SESSION)); - break; - default: - break; - } - observer.onNext(status); }); } @@ -330,8 +325,10 @@ GrpcReadStream createGrpcStream( @Override public QueryStream createQuery(String query, TxMode tx, Params prms, ExecuteQuerySettings settings) { YdbQuery.TransactionControl tc = TxControl.txModeCtrl(tx, true); - Span span = startSpan("ydb.ExecuteQuery"); - return new StreamImpl(createGrpcStream(query, tc, prms, settings, span), span) { + Span span = rpc.startSpan("ydb.ExecuteQuery"); + long startNanos = System.nanoTime(); + + return new StreamImpl(createGrpcStream(query, tc, prms, settings, span), span, startNanos, "ydb.ExecuteQuery") { @Override void handleTxMeta(String txID) { if (txID != null && !txID.isEmpty()) { @@ -371,17 +368,29 @@ static CompletableFuture> createSession( return rpc.createSession(request, grpcSettingsBuilder.build()).thenApply(result -> { pessimizationHook.set(result.getStatus().getCode() == StatusCode.OVERLOADED); + if (!result.isSuccess()) { + SpanFinalizer.finishByStatus(createSpan, result.getStatus()); + } return CREATE_SESSION.apply(result); }); } abstract class StreamImpl implements QueryStream { private final GrpcReadStream grpcStream; - private final Span span; - - StreamImpl(GrpcReadStream grpcStream, Span operationSpan) { + @Nullable + private final Span operationSpan; + private final long startNanos; + private final String operationName; + + StreamImpl( + GrpcReadStream grpcStream, + @Nullable + Span operationSpan, long startNanos, String operationName + ) { this.grpcStream = grpcStream; - this.span = operationSpan; + this.operationSpan = operationSpan; + this.startNanos = startNanos; + this.operationName = operationName; } abstract void handleTxMeta(String txId); @@ -393,10 +402,10 @@ void handleCompletion(Status status, Throwable th) { public CompletableFuture> execute(PartsHandler handler) { final UpdatableOptional operationStatus = new UpdatableOptional<>(); final UpdatableOptional stats = new UpdatableOptional<>(); - return Span.endOnResult(span, grpcStream.start(msg -> { + return grpcStream.start(msg -> { if (isTraceEnabled) { - logger.trace("{} got stream message {}", - SessionImpl.this, TextFormat.shortDebugString(msg)); + logger.trace("{} got stream message {}", SessionImpl.this, + TextFormat.shortDebugString(msg)); } Issue[] issues = Issue.fromPb(msg.getIssuesList()); Status status = Status.of(StatusCode.fromProto(msg.getStatus()), issues); @@ -431,16 +440,22 @@ public CompletableFuture> execute(PartsHandler handler) { logger.trace("{} lost result set part with index {}", SessionImpl.this, index); } } - }).whenComplete(this::handleCompletion).thenApply(streamStatus -> { + }) + .whenComplete(this::handleCompletion) + .thenApply(streamStatus -> { updateSessionState(streamStatus); Status status = operationStatus.orElse(streamStatus); + long elapsed = System.nanoTime() - startNanos; + rpc.getMeter().recordOperation(operationName, elapsed, status); + Result result; if (status.isSuccess()) { - return Result.success(new QueryInfo(stats.get()), streamStatus); + result = Result.success(new QueryInfo(stats.get()), streamStatus); } else { - return Result.fail(status); + result = Result.fail(status); } - }) - ); + SpanFinalizer.finishByStatus(operationSpan, status); + return result; + }); } @Override @@ -477,8 +492,11 @@ public QueryStream createQuery(String query, boolean commitAtEnd, Params prms, E ? TxControl.txIdCtrl(currentId, commitAtEnd) : TxControl.txModeCtrl(txMode, commitAtEnd); - Span span = startSpan("ydb.ExecuteQuery"); - return new StreamImpl(createGrpcStream(query, tc, prms, settings, span), span) { + Span span = rpc.startSpan("ydb.ExecuteQuery"); + long startNanos = System.nanoTime(); + + return new StreamImpl( + createGrpcStream(query, tc, prms, settings, span), span, startNanos, "ydb.ExecuteQuery") { @Override void handleTxMeta(String txID) { String newId = txID == null || txID.isEmpty() ? null : txID; @@ -501,7 +519,7 @@ void handleCompletion(Status status, Throwable th) { currentStatusFuture.complete(Status .of(StatusCode.ABORTED) .withIssues(Issue.of("Query on transaction failed with status " - + status, Issue.Severity.ERROR))); + + status, Issue.Severity.ERROR))); } } @@ -517,13 +535,16 @@ public void cancel() { @Override public CompletableFuture> commit(CommitTransactionSettings settings) { - Span span = startSpan("ydb.Commit"); + final Span commitSpan = rpc.startSpan("ydb.Commit"); + final long startNanos = System.nanoTime(); + CompletableFuture currentStatusFuture = statusFuture.getAndSet(new CompletableFuture<>()); - String transactionId = txId.get(); + final String transactionId = txId.get(); if (transactionId == null) { Issue issue = Issue.of("Transaction is not started", Issue.Severity.WARNING); Result res = Result.success(new QueryInfo(null), Status.of(StatusCode.SUCCESS, issue)); - return Span.endOnResult(span, CompletableFuture.completedFuture(res)); + SpanFinalizer.finishByStatus(commitSpan, res.getStatus()); + return CompletableFuture.completedFuture(res); } YdbQuery.CommitTransactionRequest request = YdbQuery.CommitTransactionRequest.newBuilder() @@ -531,7 +552,7 @@ public CompletableFuture> commit(CommitTransactionSettings set .setTxId(transactionId) .build(); - return Span.endOnResult(span, rpc.commitTransaction(request, makeOptions(settings, span).build())) + return rpc.commitTransaction(request, makeOptions(settings, commitSpan).build()) .thenApply(res -> { Status status = res.getStatus(); currentStatusFuture.complete(status); @@ -545,27 +566,38 @@ public CompletableFuture> commit(CommitTransactionSettings set if (th != null) { currentStatusFuture.completeExceptionally( new RuntimeException("Transaction commit failed with exception", th)); + SpanFinalizer.finishByError(commitSpan, th); + return; } - })); + SpanFinalizer.finishByStatus(commitSpan, status.getStatus()); + })).whenComplete((status, th) -> { // добавить + long elapsed = System.nanoTime() - startNanos; + Status finalStatus = status != null + ? status.getStatus() : Status.of(StatusCode.CLIENT_INTERNAL_ERROR); + rpc.getMeter().recordOperation("ydb.Commit", elapsed, finalStatus); + }); } @Override public CompletableFuture rollback(RollbackTransactionSettings settings) { - Span span = startSpan("ydb.Rollback"); + final Span rollbackSpan = rpc.startSpan("ydb.Rollback"); + final long startNanos = System.nanoTime(); + CompletableFuture currentStatusFuture = statusFuture.getAndSet(new CompletableFuture<>()); - String transactionId = txId.get(); + final String transactionId = txId.get(); if (transactionId == null) { Issue issue = Issue.of("Transaction is not started", Issue.Severity.WARNING); Status status = Status.of(StatusCode.SUCCESS, issue); - return Span.endOnStatus(span, CompletableFuture.completedFuture(status)); + SpanFinalizer.finishByStatus(rollbackSpan, status); + return CompletableFuture.completedFuture(status); } YdbQuery.RollbackTransactionRequest request = YdbQuery.RollbackTransactionRequest.newBuilder() .setSessionId(sessionId) .setTxId(transactionId) .build(); - return Span.endOnResult(span, rpc.rollbackTransaction(request, makeOptions(settings, span).build())) + return rpc.rollbackTransaction(request, makeOptions(settings, rollbackSpan).build()) .thenApply(result -> { updateSessionState(result.getStatus()); if (!txId.compareAndSet(transactionId, null)) { @@ -578,6 +610,15 @@ public CompletableFuture rollback(RollbackTransactionSettings settings) currentStatusFuture.complete(Status .of(StatusCode.ABORTED) .withIssues(Issue.of("Transaction was rolled back", Issue.Severity.ERROR))); + if (th != null) { + SpanFinalizer.finishByError(rollbackSpan, th); + return; + } + SpanFinalizer.finishByStatus(rollbackSpan, status); + }).whenComplete((status, th) -> { + long elapsed = System.nanoTime() - startNanos; + Status finalStatus = status != null ? status : Status.of(StatusCode.CLIENT_INTERNAL_ERROR); + rpc.getMeter().recordOperation("ydb.Rollback", elapsed, finalStatus); }); } } diff --git a/query/src/main/java/tech/ydb/query/impl/SessionPool.java b/query/src/main/java/tech/ydb/query/impl/SessionPool.java index ccbbf4889..107cdc002 100644 --- a/query/src/main/java/tech/ydb/query/impl/SessionPool.java +++ b/query/src/main/java/tech/ydb/query/impl/SessionPool.java @@ -22,7 +22,9 @@ import tech.ydb.core.StatusCode; import tech.ydb.core.UnexpectedResultException; import tech.ydb.core.grpc.GrpcReadStream; +import tech.ydb.core.metrics.SessionPoolObserver; import tech.ydb.core.tracing.Span; +import tech.ydb.core.tracing.SpanFinalizer; import tech.ydb.core.utils.FutureTools; import tech.ydb.proto.query.YdbQuery; import tech.ydb.query.QuerySession; @@ -78,6 +80,23 @@ class SessionPool implements AutoCloseable { minSize, maxSize, cleaner.periodMillis); + + rpc.getMeter().registerSessionPool("default", new SessionPoolObserver() { + @Override + public int getIdleCount() { + return queue.getIdleCount(); + } + + @Override + public int getUsedCount() { + return queue.getUsedCount(); + } + + @Override + public int getPendingCount() { + return queue.getPendingCount(); + } + }); } public void updateMaxSize(int maxSize) { @@ -277,16 +296,44 @@ public CompletableFuture create() { Context previous = ctx.attach(); try { Span createSpan = rpc.startSpan("ydb.CreateSession"); + long startNanos = System.nanoTime(); + stats.requested.increment(); - return Span.endOnResult(createSpan, SessionImpl.createSession(rpc, CREATE_SETTINGS, true, createSpan)) + return SessionImpl + .createSession(rpc, CREATE_SETTINGS, true, createSpan) .thenCompose(r -> { if (!r.isSuccess()) { + SpanFinalizer.finishByStatus(createSpan, r.getStatus()); stats.failed.increment(); throw new UnexpectedResultException("create session problem", r.getStatus()); } PooledQuerySession session = new PooledQuerySession(rpc, r.getValue()); return session.start(); - }).thenApply(Result::getValue); + }) + .whenComplete((result, th) -> { + if (th != null) { + Throwable error = FutureTools.unwrapCompletionException(th); + if (error instanceof UnexpectedResultException) { + SpanFinalizer.finishByStatus( + createSpan, + ((UnexpectedResultException) error).getStatus() + ); + } else { + SpanFinalizer.finishByError(createSpan, error); + } + return; + } + + SpanFinalizer.finishByStatus(createSpan, result.getStatus()); + }) + .whenComplete((status, th) -> { + long elapsed = System.nanoTime() - startNanos; + Status finalStatus = status != null + ? status.getStatus() : Status.of(StatusCode.CLIENT_INTERNAL_ERROR); + rpc.getMeter().recordOperation("ydb.CreateSession", elapsed, finalStatus); + rpc.getMeter().recordSessionCreateTime("default", elapsed); + }) + .thenApply(Result::getValue); } finally { ctx.detach(previous); } diff --git a/query/src/main/java/tech/ydb/query/impl/TableClientImpl.java b/query/src/main/java/tech/ydb/query/impl/TableClientImpl.java index 8fde13bc8..a81fb2fca 100644 --- a/query/src/main/java/tech/ydb/query/impl/TableClientImpl.java +++ b/query/src/main/java/tech/ydb/query/impl/TableClientImpl.java @@ -16,6 +16,7 @@ import tech.ydb.core.UnexpectedResultException; import tech.ydb.core.grpc.GrpcTransport; import tech.ydb.core.tracing.Span; +import tech.ydb.core.tracing.SpanFinalizer; import tech.ydb.core.tracing.Tracer; import tech.ydb.proto.ValueProtos; import tech.ydb.proto.query.YdbQuery; @@ -92,9 +93,6 @@ private YdbQuery.TransactionControl mapTxControl(YdbTable.TransactionControl tc) if (tc.getBeginTx().hasSnapshotReadOnly()) { return TxControl.txModeCtrl(TxMode.SNAPSHOT_RO, tc.getCommitTx()); } - if (tc.getBeginTx().hasSnapshotReadWrite()) { - return TxControl.txModeCtrl(TxMode.SNAPSHOT_RW, tc.getCommitTx()); - } if (tc.getBeginTx().hasStaleReadOnly()) { return TxControl.txModeCtrl(TxMode.STALE_RO, tc.getCommitTx()); } @@ -132,9 +130,10 @@ public CompletableFuture> executeDataQueryInternal( final List issues = new ArrayList<>(); final List results = new ArrayList<>(); Span span = querySession.startSpan("ydb.ExecuteQuery"); + final long startNanos = System.nanoTime(); QueryStream stream = querySession.new StreamImpl(querySession.createGrpcStream(query, tc, prms, qs, span), - span) { + span, startNanos, "ydb.ExecuteQuery") { @Override void handleTxMeta(String txID) { txRef.set(txID); @@ -205,23 +204,23 @@ public CompletableFuture> beginTransaction(TxMode txMod } @Override - protected CompletableFuture commitTransactionInternal(String txId, CommitTxSettings settings) { + public CompletableFuture commitTransaction(String txId, CommitTxSettings settings) { Span span = querySession.startSpan("ydb.Commit"); CommitTransactionSettings querySettings = CommitTransactionSettings.newBuilder() .withTraceId(settings.getTraceId()) .withRequestTimeout(settings.getTimeoutDuration()) .build(); - return Span.endOnStatus(span, querySession.commitById(txId, querySettings, span)); + return querySession.commitById(txId, querySettings, span).whenComplete(SpanFinalizer.whenComplete(span)); } @Override - protected CompletableFuture rollbackTransactionInternal(String txId, RollbackTxSettings settings) { + public CompletableFuture rollbackTransaction(String txId, RollbackTxSettings settings) { Span span = querySession.startSpan("ydb.Rollback"); RollbackTransactionSettings querySettings = RollbackTransactionSettings.newBuilder() .withTraceId(settings.getTraceId()) .withRequestTimeout(settings.getTimeoutDuration()) .build(); - return Span.endOnStatus(span, querySession.rollbackById(txId, querySettings, span)); + return querySession.rollbackById(txId, querySettings, span).whenComplete(SpanFinalizer.whenComplete(span)); } private final class TracedTableTransaction implements TableTransaction { @@ -258,7 +257,7 @@ public CompletableFuture rollback(RollbackTxSettings settings) { if (txId == null) { return delegate.rollback(settings); } - return rollbackTransactionInternal(txId, settings); + return TableSession.this.rollbackTransaction(txId, settings); } @Override diff --git a/query/src/test/java/tech/ydb/query/impl/QueryTracingTest.java b/query/src/test/java/tech/ydb/query/impl/QueryTracingTest.java index 303e69229..34f5f1893 100644 --- a/query/src/test/java/tech/ydb/query/impl/QueryTracingTest.java +++ b/query/src/test/java/tech/ydb/query/impl/QueryTracingTest.java @@ -4,16 +4,6 @@ import java.util.ArrayList; import java.util.Collections; import java.util.List; -import java.util.concurrent.CancellationException; -import java.util.concurrent.CompletableFuture; -import java.util.concurrent.ConcurrentHashMap; -import java.util.concurrent.ConcurrentMap; -import java.util.concurrent.CountDownLatch; -import java.util.concurrent.TimeUnit; -import java.util.concurrent.atomic.AtomicBoolean; -import java.util.concurrent.atomic.AtomicInteger; - -import javax.annotation.Nullable; import org.junit.After; import org.junit.AfterClass; @@ -27,10 +17,7 @@ import tech.ydb.common.transaction.TxMode; import tech.ydb.core.Result; import tech.ydb.core.Status; -import tech.ydb.core.StatusCode; -import tech.ydb.core.UnexpectedResultException; import tech.ydb.core.grpc.GrpcTransport; -import tech.ydb.core.tracing.Scope; import tech.ydb.core.tracing.Span; import tech.ydb.core.tracing.SpanKind; import tech.ydb.core.tracing.Tracer; @@ -38,8 +25,6 @@ import tech.ydb.query.QuerySession; import tech.ydb.query.QueryTransaction; import tech.ydb.query.result.QueryInfo; -import tech.ydb.query.tools.SessionRetryContext; -import tech.ydb.proto.StatusCodesProtos; import tech.ydb.table.Session; import tech.ydb.table.TableClient; import tech.ydb.table.query.DataQueryResult; @@ -54,7 +39,6 @@ public class QueryTracingTest { private static GrpcTransport transport; private static RecordingTracer tracer; - private static final GrpcTestInterceptor grpcInterceptor = new GrpcTestInterceptor(); private QueryClient queryClient; private TableClient tableClient; @@ -64,7 +48,6 @@ public static void initTransport() { tracer = new RecordingTracer(); transport = GrpcTransport.forEndpoint(YDB.endpoint(), YDB.database()) .withAuthProvider(new TokenAuthProvider(YDB.authToken())) - .addChannelInitializer(grpcInterceptor) .withTracer(tracer) .build(); } @@ -77,7 +60,6 @@ public static void closeTransport() { @Before public void initClient() { tracer.reset(); - grpcInterceptor.reset(); queryClient = QueryClient.newClient(transport).build(); tableClient = null; } @@ -92,22 +74,13 @@ public void closeClient() { } } - private static void awaitTracing() { - try { - tracer.awaitAllSpansClosed(Duration.ofSeconds(30)); - } catch (InterruptedException e) { - Thread.currentThread().interrupt(); - Assert.fail(e.toString()); - } - } - @Test public void createSessionSpanIsRecorded() { - try (QuerySession session = queryClient.createSession(Duration.ofSeconds(5)).join().getValue()) { - Assert.assertNotNull(session.getId()); - awaitTracing(); - Assert.assertEquals(1, tracer.countClosedSpan("ydb.CreateSession")); + try (QuerySession ignored = queryClient.createSession(Duration.ofSeconds(5)).join().getValue()) { + // no-op } + + Assert.assertEquals(1, tracer.countClosedSpan("ydb.CreateSession")); } @Test @@ -116,7 +89,6 @@ public void executeQuerySpanIsRecorded() { session.createQuery("SELECT 1", TxMode.NONE).execute().join().getStatus().expectSuccess(); } - awaitTracing(); Assert.assertEquals(1, tracer.countClosedSpan("ydb.ExecuteQuery")); } @@ -131,7 +103,6 @@ public void commitSpanIsRecordedInQueryTransaction() { commitResult.getStatus().expectSuccess(); } - awaitTracing(); Assert.assertEquals(1, tracer.countClosedSpan("ydb.Commit")); } @@ -146,7 +117,6 @@ public void rollbackSpanIsRecordedInQueryTransaction() { rollbackStatus.expectSuccess(); } - awaitTracing(); Assert.assertEquals(1, tracer.countClosedSpan("ydb.Rollback")); } @@ -162,7 +132,6 @@ public void createSessionAndExecuteQuerySpansAreRecordedInTableProxy() { result.getStatus().expectSuccess(); } - awaitTracing(); Assert.assertEquals(1, tracer.countClosedSpan("ydb.CreateSession")); Assert.assertEquals(1, tracer.countClosedSpan("ydb.ExecuteQuery")); } @@ -184,406 +153,47 @@ public void executeQuerySpansAreRecordedInTableProxyTransaction() { rollbackStatus.expectSuccess(); } - awaitTracing(); Assert.assertEquals(1, tracer.countClosedSpan("ydb.CreateSession")); Assert.assertEquals(2, tracer.countClosedSpan("ydb.ExecuteQuery")); Assert.assertEquals(1, tracer.countClosedSpan("ydb.Commit")); Assert.assertEquals(1, tracer.countClosedSpan("ydb.Rollback")); } - @Test - public void querySpanIsChildOfApplicationSpan() { - try (QuerySession session = queryClient.createSession(Duration.ofSeconds(5)).join().getValue()) { - Span appParent = tracer.startSpan("app.parent", SpanKind.INTERNAL); - try (Scope ignored = appParent.makeCurrent()) { - session.createQuery("SELECT 1", TxMode.NONE).execute().join().getStatus().expectSuccess(); - } finally { - appParent.end(); - } - } - - awaitTracing(); - Assert.assertEquals(1, tracer.countClosedSpanWithParent("ydb.ExecuteQuery", "app.parent")); - } - - @Test - public void retrySpanIsParentOfRpcSpans() { - grpcInterceptor.failExecuteQuery(StatusCodesProtos.StatusIds.StatusCode.BAD_SESSION, 2); - SessionRetryContext retryContext = SessionRetryContext.create(queryClient) - .maxRetries(5) - .backoffSlot(Duration.ofMillis(1)) - .fastBackoffSlot(Duration.ofMillis(1)) - .build(); - - Span appParent = tracer.startSpan("app.parent.retry", SpanKind.INTERNAL); - try (Scope ignored = appParent.makeCurrent()) { - Status status = retryContext.supplyStatus(session -> - session.createQuery("SELECT 1", TxMode.NONE).execute().thenApply(Result::getStatus)).join(); - status.expectSuccess(); - } finally { - appParent.end(); - } - - awaitTracing(); - Assert.assertEquals(3, tracer.countClosedSpan("ydb.Try")); - Assert.assertEquals(1, tracer.countClosedSpanWithParent("ydb.RunWithRetry", "app.parent.retry")); - Assert.assertEquals(3, tracer.countClosedSpanWithParent("ydb.Try", "ydb.RunWithRetry")); - Assert.assertEquals(3, tracer.countClosedSpanWithParent("ydb.CreateSession", "ydb.Try")); - Assert.assertEquals(3, tracer.countClosedSpanWithParent("ydb.ExecuteQuery", "ydb.Try")); - // First Try has no backoff_ms (no prior sleep); subsequent tries capture the prior sleep duration - Assert.assertEquals(2, tracer.countClosedSpanWithAttribute("ydb.Try", "ydb.retry.backoff_ms")); - } - - @Test - public void retryContextRetriesOnCreateSessionFailures() { - grpcInterceptor.failCreateSession(StatusCodesProtos.StatusIds.StatusCode.ABORTED, 2); - SessionRetryContext retryContext = SessionRetryContext.create(queryClient) - .maxRetries(5) - .backoffSlot(Duration.ofMillis(1)) - .fastBackoffSlot(Duration.ofMillis(1)) - .build(); - - Span appParent = tracer.startSpan("app.parent.createSession.retry", SpanKind.INTERNAL); - try (Scope ignored = appParent.makeCurrent()) { - Status status = retryContext.supplyStatus(session -> - session.createQuery("SELECT 1", TxMode.NONE).execute().thenApply(Result::getStatus)).join(); - status.expectSuccess(); - } finally { - appParent.end(); - } - - awaitTracing(); - Assert.assertEquals(1, tracer.countClosedSpanWithParent("ydb.RunWithRetry", "app.parent.createSession.retry")); - Assert.assertEquals(3, tracer.countClosedSpan("ydb.Try")); - Assert.assertEquals(3, tracer.countClosedSpanWithParent("ydb.Try", "ydb.RunWithRetry")); - Assert.assertEquals(3, tracer.countClosedSpanWithParent("ydb.CreateSession", "ydb.Try")); - Assert.assertEquals(1, tracer.countClosedSpanWithParent("ydb.ExecuteQuery", "ydb.Try")); - Assert.assertEquals(2, tracer.countClosedSpanWithAttribute("ydb.Try", "ydb.retry.backoff_ms")); - } - - @Test - public void retryContextRetriesOnCommitFailures() { - grpcInterceptor.failCommit(StatusCodesProtos.StatusIds.StatusCode.BAD_SESSION, 2); - SessionRetryContext retryContext = SessionRetryContext.create(queryClient) - .maxRetries(5) - .backoffSlot(Duration.ofMillis(1)) - .fastBackoffSlot(Duration.ofMillis(1)) - .build(); - - Span appParent = tracer.startSpan("app.parent.commit.retry", SpanKind.INTERNAL); - try (Scope ignored = appParent.makeCurrent()) { - Status status = retryContext.supplyStatus(session -> { - QueryTransaction tx = session.createNewTransaction(TxMode.SERIALIZABLE_RW); - return tx.createQuery("SELECT 1").execute() - .thenCompose(queryResult -> { - queryResult.getStatus().expectSuccess(); - return tx.commit().thenApply(Result::getStatus); - }); - } - ).join(); - status.expectSuccess(); - } finally { - appParent.end(); - } - - awaitTracing(); - Assert.assertEquals(1, tracer.countClosedSpanWithParent("ydb.RunWithRetry", "app.parent.commit.retry")); - Assert.assertEquals(3, tracer.countClosedSpan("ydb.Try")); - Assert.assertEquals(3, tracer.countClosedSpanWithParent("ydb.Try", "ydb.RunWithRetry")); - Assert.assertEquals(3, tracer.countClosedSpanWithParent("ydb.CreateSession", "ydb.Try")); - Assert.assertEquals(3, tracer.countClosedSpanWithParent("ydb.ExecuteQuery", "ydb.Try")); - Assert.assertEquals(3, tracer.countClosedSpanWithParent("ydb.Commit", "ydb.Try")); - Assert.assertEquals(2, tracer.countClosedSpanWithAttribute("ydb.Try", "ydb.retry.backoff_ms")); - } - - @Test - public void tableProxyRetrySpanIsParentOfRpcSpans() { - AtomicInteger attempt = new AtomicInteger(); - tableClient = QueryClient.newTableClient(transport).build(); - tech.ydb.table.SessionRetryContext retryContext = tech.ydb.table.SessionRetryContext.create(tableClient) - .maxRetries(5) - .backoffSlot(Duration.ofMillis(1)) - .fastBackoffSlot(Duration.ofMillis(1)) - .build(); - - Span appParent = tracer.startSpan("app.parent.table.retry", SpanKind.INTERNAL); - try (Scope ignored = appParent.makeCurrent()) { - Status status = retryContext.supplyStatus(session -> { - if (attempt.getAndIncrement() == 0) { - return CompletableFuture.completedFuture(Status.of(StatusCode.OVERLOADED)); - } - TableTransaction tx = session.createNewTransaction(TxMode.SERIALIZABLE_RW); - Result queryResult = tx.executeDataQuery("SELECT 1", Params.empty()).join(); - queryResult.getStatus().expectSuccess(); - return tx.commit(); - }).join(); - status.expectSuccess(); - } finally { - appParent.end(); - } - - awaitTracing(); - Assert.assertEquals(2, tracer.countClosedSpan("ydb.Try")); - Assert.assertEquals(1, tracer.countClosedSpanWithParent("ydb.RunWithRetry", "app.parent.table.retry")); - Assert.assertEquals(2, tracer.countClosedSpanWithParent("ydb.Try", "ydb.RunWithRetry")); - Assert.assertEquals(1, tracer.countClosedSpanWithParent("ydb.CreateSession", "ydb.Try")); - Assert.assertEquals(1, tracer.countClosedSpanWithParent("ydb.ExecuteQuery", "ydb.Try")); - Assert.assertEquals(1, tracer.countClosedSpanWithParent("ydb.Commit", "ydb.Try")); - } - - @Test - public void nonRetryableExceptionClosesRetrySpan() { - SessionRetryContext retryContext = SessionRetryContext.create(queryClient) - .maxRetries(5) - .backoffSlot(Duration.ofMillis(1)) - .fastBackoffSlot(Duration.ofMillis(1)) - .build(); - - RuntimeException thrown; - try { - retryContext.supplyStatus(session -> { - throw new IllegalStateException("boom"); - }).join(); - throw new AssertionError("Exception expected"); - } catch (RuntimeException ex) { - thrown = ex; - } - - awaitTracing(); - Assert.assertNotNull(thrown.getCause()); - Assert.assertTrue(thrown.getCause() instanceof IllegalStateException); - Assert.assertEquals(1, - tracer.countClosedSpanWithErrorType("ydb.RunWithRetry", IllegalStateException.class.getName())); - Assert.assertEquals(1, tracer.countClosedSpanWithParent("ydb.Try", "ydb.RunWithRetry")); - Assert.assertEquals(1, tracer.countClosedSpan("ydb.Try")); - Assert.assertEquals(1, - tracer.countClosedSpanWithErrorType("ydb.Try", IllegalStateException.class.getName())); - Assert.assertEquals(1, tracer.countClosedSpanWithParent("ydb.CreateSession", "ydb.Try")); - Assert.assertEquals(0, tracer.countClosedSpanWithParent("ydb.ExecuteQuery", "ydb.Try")); - } - - @Test - public void retryableUnexpectedResultExceptionRetriesAndSetsErrorType() { - AtomicInteger attempt = new AtomicInteger(); - SessionRetryContext retryContext = SessionRetryContext.create(queryClient) - .maxRetries(5) - .backoffSlot(Duration.ofMillis(1)) - .fastBackoffSlot(Duration.ofMillis(1)) - .build(); - - Status status = retryContext.supplyStatus(session -> { - if (attempt.getAndIncrement() == 0) { - throw new UnexpectedResultException("retryable", Status.of(StatusCode.OVERLOADED)); - } - return session.createQuery("SELECT 1", TxMode.NONE).execute().thenApply(Result::getStatus); - }).join(); - status.expectSuccess(); - - awaitTracing(); - Assert.assertEquals(1, tracer.countClosedSpan("ydb.RunWithRetry")); - Assert.assertEquals(2, tracer.countClosedSpanWithParent("ydb.Try", "ydb.RunWithRetry")); - Assert.assertEquals(2, tracer.countClosedSpan("ydb.Try")); - Assert.assertEquals(1, - tracer.countClosedSpanWithErrorType("ydb.Try", UnexpectedResultException.class.getName())); - Assert.assertEquals(1, tracer.countClosedSpanWithParent("ydb.ExecuteQuery", "ydb.Try")); - } - - @Test - public void contextCancelClosesRunWithRetrySpan() throws InterruptedException { - SessionRetryContext retryContext = SessionRetryContext.create(queryClient) - .maxRetries(5) - .backoffSlot(Duration.ofMillis(1)) - .fastBackoffSlot(Duration.ofMillis(1)) - .build(); - - CountDownLatch firstPartReceived = new CountDownLatch(1); - CountDownLatch unblockReader = new CountDownLatch(1); - - CompletableFuture future = retryContext.supplyStatus(session -> - session.createQuery("SELECT 1; SELECT 2;", TxMode.NONE).execute(part -> { - firstPartReceived.countDown(); - try { - unblockReader.await(30, TimeUnit.SECONDS); - } catch (InterruptedException ex) { - Thread.currentThread().interrupt(); - } - }).thenApply(Result::getStatus)); - - Assert.assertTrue(firstPartReceived.await(30, TimeUnit.SECONDS)); - future.cancel(false); - unblockReader.countDown(); - - awaitTracing(); - Assert.assertThrows(CancellationException.class, future::join); - Assert.assertTrue(future.isCancelled()); - Assert.assertEquals(1, - tracer.countClosedSpanWithErrorType("ydb.RunWithRetry", CancellationException.class.getName())); - } - private static final class RecordingTracer implements Tracer { - private final List spans = Collections.synchronizedList(new ArrayList<>()); - private final ThreadLocal currentSpan = new ThreadLocal<>(); - - private final Object spanLock = new Object(); - private int openSpans = 0; + private final List spans = Collections.synchronizedList(new ArrayList()); @Override public Span startSpan(String spanName, SpanKind spanKind) { - RecordingSpan span = new RecordingSpan(this, spanName, spanKind, currentSpan.get()); + RecordingSpan span = new RecordingSpan(spanName, spanKind); spans.add(span); return span; } - /** Called from {@link RecordingSpan} constructor: one span opened. */ - void recordSpanOpen() { - synchronized (spanLock) { - openSpans++; - } - } - - /** Called from {@link RecordingSpan#end()}: one span closed. */ - void recordSpanClose() { - synchronized (spanLock) { - openSpans--; - if (openSpans == 0) { - spanLock.notifyAll(); - } - } - } - void reset() { - synchronized (spanLock) { - spans.clear(); - openSpans = 0; - } - } - - /** Waits until every span that called {@link #recordSpanOpen} has ended ({@code openSpans == 0}). */ - void awaitAllSpansClosed(Duration timeout) throws InterruptedException { - long deadlineNanos = System.nanoTime() + timeout.toNanos(); - synchronized (spanLock) { - while (openSpans > 0) { - long remainingNanos = deadlineNanos - System.nanoTime(); - if (remainingNanos <= 0) { - Assert.fail("await timeout, openSpans=" + openSpans); - } - long waitMillis = TimeUnit.NANOSECONDS.toMillis(remainingNanos); - if (waitMillis == 0) { - waitMillis = 1; - } - spanLock.wait(waitMillis); - } - } + spans.clear(); } int countClosedSpan(String spanName) { int count = 0; synchronized (spans) { for (RecordingSpan span : spans) { - if (span.spanName().equals(spanName) && span.isClosed()) { - count++; - } - } - } - return count; - } - - int countClosedSpanWithParent(String spanName, String parentSpanName) { - int count = 0; - synchronized (spans) { - for (RecordingSpan span : spans) { - if (span.isClosed() - && span.spanName().equals(spanName) - && span.parentSpan() != null - && span.parentSpan().spanName().equals(parentSpanName)) { - count++; - } - } - } - return count; - } - - int countClosedSpanWithErrorType(String spanName, String errorType) { - int count = 0; - synchronized (spans) { - for (RecordingSpan span : spans) { - Throwable err = span.throwableError(); - if (span.isClosed() - && span.spanName().equals(spanName) - && err != null - && err.getClass().getName().equals(errorType)) { + if (span.name.equals(spanName) && span.closed) { count++; } } } return count; } - - int countClosedSpanWithAttribute(String spanName, String key) { - int count = 0; - synchronized (spans) { - for (RecordingSpan span : spans) { - if (span.isClosed() - && span.spanName().equals(spanName) - && span.hasLongAttribute(key)) { - count++; - } - } - } - return count; - } - - Scope makeSpanCurrent(RecordingSpan span) { - RecordingSpan previous = currentSpan.get(); - currentSpan.set(span); - return () -> { - if (previous == null) { - currentSpan.remove(); - } else { - currentSpan.set(previous); - } - }; - } } private static final class RecordingSpan implements Span { - private final RecordingTracer tracer; private final String name; private final SpanKind kind; - private final RecordingSpan parent; - private final ConcurrentMap longAttributes = new ConcurrentHashMap<>(); - private Throwable throwableError; private volatile boolean closed = false; - private final AtomicBoolean endedOnce = new AtomicBoolean(false); - RecordingSpan(RecordingTracer tracer, String name, SpanKind kind, RecordingSpan parent) { - this.tracer = tracer; + RecordingSpan(String name, SpanKind kind) { this.name = name; this.kind = kind; - this.parent = parent; - tracer.recordSpanOpen(); - } - - String spanName() { - return name; - } - - boolean isClosed() { - return closed; - } - - @Nullable - RecordingSpan parentSpan() { - return parent; - } - - @Nullable - Throwable throwableError() { - return throwableError; - } - - boolean hasLongAttribute(String key) { - return longAttributes.containsKey(key); } @Override @@ -592,37 +202,28 @@ public String getId() { } @Override - public boolean isValid() { - return true; - } - - @Override - public Scope makeCurrent() { - return tracer.makeSpanCurrent(this); + public void setAttribute(String key, String value) { + // not needed for this test } @Override - public Scope restoreContext() { - return parent != null ? parent.makeCurrent() : Span.NOOP.makeCurrent(); + public void setAttribute(String key, long value) { + // not needed for this test } @Override - public void setStatus(@Nullable Status status, @Nullable Throwable error) { - this.throwableError = error; + public void setError(Status status) { + // not needed for this test } @Override - public void setAttribute(String key, long value) { - longAttributes.put(key, value); + public void setError(Throwable error) { + // not needed for this test } @Override public void end() { - if (!endedOnce.compareAndSet(false, true)) { - return; - } this.closed = true; - tracer.recordSpanClose(); } } } diff --git a/table/src/main/java/tech/ydb/table/impl/SimpleTableClient.java b/table/src/main/java/tech/ydb/table/impl/SimpleTableClient.java index 596466acb..199d3ae73 100644 --- a/table/src/main/java/tech/ydb/table/impl/SimpleTableClient.java +++ b/table/src/main/java/tech/ydb/table/impl/SimpleTableClient.java @@ -6,7 +6,6 @@ import tech.ydb.core.Result; import tech.ydb.core.StatusCode; -import tech.ydb.core.tracing.Tracer; import tech.ydb.table.Session; import tech.ydb.table.SessionSupplier; import tech.ydb.table.rpc.TableRpc; @@ -39,11 +38,6 @@ public ScheduledExecutorService getScheduler() { return tableRpc.getScheduler(); } - @Override - public Tracer getTracer() { - return tableRpc.getTracer(); - } - public static Builder newClient(TableRpc rpc) { return new Builder(rpc); } From 912ba1d19839ea939312040d2abbf24b62848a58 Mon Sep 17 00:00:00 2001 From: Michael Kim Date: Wed, 6 May 2026 23:59:51 +0300 Subject: [PATCH 2/8] undo useless diff's --- .../ydb/core/tracing/OpenTelemetryTracer.java | 19 ++++--- .../main/java/tech/ydb/core/tracing/Span.java | 55 +++++++++++-------- .../java/tech/ydb/query/impl/SessionImpl.java | 24 ++++---- .../tech/ydb/query/impl/QueryTracingTest.java | 9 --- 4 files changed, 57 insertions(+), 50 deletions(-) diff --git a/core/src/main/java/tech/ydb/core/tracing/OpenTelemetryTracer.java b/core/src/main/java/tech/ydb/core/tracing/OpenTelemetryTracer.java index e8f13ba7e..4fc7025a8 100644 --- a/core/src/main/java/tech/ydb/core/tracing/OpenTelemetryTracer.java +++ b/core/src/main/java/tech/ydb/core/tracing/OpenTelemetryTracer.java @@ -94,6 +94,16 @@ public void setAttribute(String key, long value) { @Override public void setStatus(@Nullable Status status, @Nullable Throwable error) { + if (status != null) { + if (status.isSuccess()) { + span.setStatus(StatusCode.OK); + } else { + tech.ydb.core.StatusCode code = status.getCode(); + span.setAttribute("db.response.status_code", code.toString()); + span.setAttribute("error.type", code.isTransportError() ? "transport_error" : "ydb_error"); + span.setStatus(StatusCode.ERROR, status.toString()); + } + } if (error != null) { if (error instanceof UnexpectedResultException) { tech.ydb.core.StatusCode code = ((UnexpectedResultException) error).getStatus().getCode(); @@ -103,16 +113,7 @@ public void setStatus(@Nullable Status status, @Nullable Throwable error) { span.setAttribute("error.type", error.getClass().getName()); } span.setStatus(StatusCode.ERROR, error.getMessage()); - return; - } - if (status != null && !status.isSuccess()) { - tech.ydb.core.StatusCode code = status.getCode(); - span.setAttribute("db.response.status_code", code.toString()); - span.setAttribute("error.type", code.isTransportError() ? "transport_error" : "ydb_error"); - span.setStatus(StatusCode.ERROR, status.toString()); - return; } - span.setStatus(StatusCode.OK); } @Override diff --git a/core/src/main/java/tech/ydb/core/tracing/Span.java b/core/src/main/java/tech/ydb/core/tracing/Span.java index f4c63b60d..b482bc6cb 100644 --- a/core/src/main/java/tech/ydb/core/tracing/Span.java +++ b/core/src/main/java/tech/ydb/core/tracing/Span.java @@ -16,9 +16,20 @@ @ExperimentalApi("YDB Tracer is experimental and API may change without notice") public interface Span { Span NOOP = new Span() { - // No operations. }; + /** + * Returns W3C traceparent value for request propagation. + * + *

For {@link #NOOP} this returns an empty string. Check {@link #isValid()} to decide whether + * trace headers should be sent to server. + * + * @return traceparent value + */ + default String getId() { + return ""; + } + /** * Indicates whether this span carries a real tracing context. * @@ -28,6 +39,23 @@ default boolean isValid() { return false; } + /** + * Sets a string attribute on the span. + * + * @param key attribute key + * @param value attribute value, may be null + */ + default void setAttribute(String key, @Nullable String value) { + } + + /** + * Sets a long attribute on the span. + * + * @param key attribute key + * @param value attribute value + */ + default void setAttribute(String key, long value) { + } /** * Sets span status (success or error) with human-readable message. @@ -38,10 +66,6 @@ default boolean isValid() { default void setStatus(@Nullable Status status, @Nullable Throwable error) { } - default String getId() { - return ""; - } - /** * Makes this span current in the active execution context. * @@ -52,10 +76,6 @@ default Scope makeCurrent() { }; } - /** Sets a string attribute on the span (ignored by Noop implementation). */ - default void setAttribute(String key, String value) { - } - /** * Restores context captured when this span was created. * @@ -68,11 +88,10 @@ default Scope restoreContext() { }; } - /** Sets a long attribute on the span (ignored by Noop implementation). */ - default void setAttribute(String key, long value) { - } - - default void setError(Status status) { + /** + * Ends (finishes) this span. + */ + default void end() { } /** @@ -82,7 +101,6 @@ default void setError(Status status) { * @param span the span to finalize * @param future the future to observe * @return the same future (for chaining) - * Sets span status to error from exception. */ static CompletableFuture endOnStatus(Span span, CompletableFuture future) { return span.isValid() ? future.whenComplete((status, th) -> { @@ -91,9 +109,6 @@ static CompletableFuture endOnStatus(Span span, CompletableFuture CompletableFuture> endOnResult(Span span, CompletableFuture span.end(); }) : future; } - - default void end() { - } } - diff --git a/query/src/main/java/tech/ydb/query/impl/SessionImpl.java b/query/src/main/java/tech/ydb/query/impl/SessionImpl.java index 4ba4308c4..804ac388c 100644 --- a/query/src/main/java/tech/ydb/query/impl/SessionImpl.java +++ b/query/src/main/java/tech/ydb/query/impl/SessionImpl.java @@ -149,10 +149,10 @@ GrpcReadStream attach(AttachSessionSettings settings) { Context ctx = Context.ROOT.fork(); Context previous = ctx.attach(); try { - AtomicBoolean pessimize = new AtomicBoolean(false); + AtomicBoolean pessimizationHook = new AtomicBoolean(false); GrpcRequestSettings grpcSettings = makeOptions(settings) .disableDeadline() - .withPessimizationHook(pessimize::get) + .withPessimizationHook(pessimizationHook::get) .build(); GrpcReadStream origin = rpc.attachSession(request, grpcSettings); return new GrpcReadStream() { @@ -163,17 +163,21 @@ public CompletableFuture start(GrpcReadStream.Observer observer) String msg = TextFormat.shortDebugString(message); logger.trace("session '{}' got attach stream message {}", sessionId, msg); } - if (message.hasNodeShutdown() || message.hasSessionShutdown()) { - if (message.hasNodeShutdown() && nodeID != 0) { - pessimize.set(true); - } - updateSessionState(Status.of(StatusCode.BAD_SESSION)); - observer.onNext(Status.of(StatusCode.BAD_SESSION)); - return; - } + StatusCode code = StatusCode.fromProto(message.getStatus()); Status status = Status.of(code, Issue.fromPb(message.getIssuesList())); updateSessionState(status); + switch (message.getSessionHintCase()) { + case NODE_SHUTDOWN: + pessimizationHook.set(nodeID != 0); + updateSessionState(Status.of(StatusCode.BAD_SESSION)); + break; + case SESSION_SHUTDOWN: + updateSessionState(Status.of(StatusCode.BAD_SESSION)); + break; + default: + break; + } observer.onNext(status); }); } diff --git a/query/src/test/java/tech/ydb/query/impl/QueryTracingTest.java b/query/src/test/java/tech/ydb/query/impl/QueryTracingTest.java index 34f5f1893..b68eda3ec 100644 --- a/query/src/test/java/tech/ydb/query/impl/QueryTracingTest.java +++ b/query/src/test/java/tech/ydb/query/impl/QueryTracingTest.java @@ -211,15 +211,6 @@ public void setAttribute(String key, long value) { // not needed for this test } - @Override - public void setError(Status status) { - // not needed for this test - } - - @Override - public void setError(Throwable error) { - // not needed for this test - } @Override public void end() { From 9e616032c5858017d458ae4cb849f8e1f2f79bdc Mon Sep 17 00:00:00 2001 From: KirillKurdyukov Date: Thu, 7 May 2026 13:09:39 +0300 Subject: [PATCH 3/8] feat: complete OpenTelemetry metrics implementation Drop the standalone opentelemetry/ module and the redundant SpanFinalizer helper, revert unrelated regressions in TableClientImpl/SimpleTableClient and the QueryTracingTest test reduction. Move OpenTelemetryMeter alongside OpenTelemetryTracer in core (using the optional opentelemetry-api dep) and relocate the integration test to query/test. Implements the agreed-on metric set: ydb.client.operation.{duration,failed}, ydb.query.session.{create_time,pending_requests,timeouts,count,min,max} with operation.name, db.response.status_code, ydb.query.session.{pool.name,state} attributes. Pool name comes from QueryClient.Builder#sessionPoolName, default is the connection's database. Retry-policy metrics are intentionally out of scope for this iteration. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../java/tech/ydb/core/metrics/Meter.java | 10 +- .../java/tech/ydb/core/metrics/NoopMeter.java | 14 +- .../ydb/core/metrics/OpenTelemetryMeter.java | 159 +++++++ .../ydb/core/metrics/SessionPoolObserver.java | 6 +- .../tech/ydb/core/tracing/SpanFinalizer.java | 46 -- opentelemetry/pom.xml | 60 --- .../ydb/opentelemetry/OpenTelemetryMeter.java | 99 ---- .../opentelemetry/OpenTelemetryTracer.java | 93 ---- .../main/java/tech/ydb/query/QueryClient.java | 2 + .../tech/ydb/query/impl/QueryClientImpl.java | 14 +- .../java/tech/ydb/query/impl/SessionImpl.java | 192 ++++---- .../java/tech/ydb/query/impl/SessionPool.java | 80 ++-- .../tech/ydb/query/impl/TableClientImpl.java | 18 +- .../tech/ydb/query/impl/QueryTracingTest.java | 434 +++++++++++++++++- ...nTelemetryQueryMetricsIntegrationTest.java | 87 +++- .../ydb/table/impl/SimpleTableClient.java | 6 + 16 files changed, 839 insertions(+), 481 deletions(-) create mode 100644 core/src/main/java/tech/ydb/core/metrics/OpenTelemetryMeter.java delete mode 100644 core/src/main/java/tech/ydb/core/tracing/SpanFinalizer.java delete mode 100644 opentelemetry/pom.xml delete mode 100644 opentelemetry/src/main/java/tech/ydb/opentelemetry/OpenTelemetryMeter.java delete mode 100644 opentelemetry/src/main/java/tech/ydb/opentelemetry/OpenTelemetryTracer.java rename opentelemetry/src/test/java/tech/ydb/opentelemetry/OpenTelemetryMetricsIntegrationTest.java => query/src/test/java/tech/ydb/query/opentelemetry/OpenTelemetryQueryMetricsIntegrationTest.java (62%) diff --git a/core/src/main/java/tech/ydb/core/metrics/Meter.java b/core/src/main/java/tech/ydb/core/metrics/Meter.java index c94913257..a8856351c 100644 --- a/core/src/main/java/tech/ydb/core/metrics/Meter.java +++ b/core/src/main/java/tech/ydb/core/metrics/Meter.java @@ -3,7 +3,15 @@ import tech.ydb.core.Status; public interface Meter { - void recordOperation(String name, long durationNanos, Status status); + void recordOperationDuration(String operationName, long durationNanos); + + void recordOperationFailed(String operationName, Status status); + void registerSessionPool(String poolName, SessionPoolObserver observer); + void recordSessionCreateTime(String poolName, long durationNanos); + + void incrementSessionPendingRequests(String poolName); + + void incrementSessionTimeouts(String poolName); } diff --git a/core/src/main/java/tech/ydb/core/metrics/NoopMeter.java b/core/src/main/java/tech/ydb/core/metrics/NoopMeter.java index cd126efd5..6ca3d8c4a 100644 --- a/core/src/main/java/tech/ydb/core/metrics/NoopMeter.java +++ b/core/src/main/java/tech/ydb/core/metrics/NoopMeter.java @@ -6,7 +6,6 @@ public final class NoopMeter implements Meter { public static final NoopMeter INSTANCE = new NoopMeter(); private NoopMeter() { - // No operations. } public static NoopMeter getInstance() { @@ -14,17 +13,26 @@ public static NoopMeter getInstance() { } @Override - public void recordOperation(String name, long durationNanos, Status status) { + public void recordOperationDuration(String operationName, long durationNanos) { + } + @Override + public void recordOperationFailed(String operationName, Status status) { } @Override public void registerSessionPool(String poolName, SessionPoolObserver observer) { - } @Override public void recordSessionCreateTime(String poolName, long durationNanos) { + } + @Override + public void incrementSessionPendingRequests(String poolName) { + } + + @Override + public void incrementSessionTimeouts(String poolName) { } } diff --git a/core/src/main/java/tech/ydb/core/metrics/OpenTelemetryMeter.java b/core/src/main/java/tech/ydb/core/metrics/OpenTelemetryMeter.java new file mode 100644 index 000000000..916aeaf98 --- /dev/null +++ b/core/src/main/java/tech/ydb/core/metrics/OpenTelemetryMeter.java @@ -0,0 +1,159 @@ +package tech.ydb.core.metrics; + +import java.util.Objects; + +import io.grpc.ExperimentalApi; +import io.opentelemetry.api.GlobalOpenTelemetry; +import io.opentelemetry.api.OpenTelemetry; +import io.opentelemetry.api.common.AttributeKey; +import io.opentelemetry.api.common.Attributes; +import io.opentelemetry.api.metrics.DoubleHistogram; +import io.opentelemetry.api.metrics.LongCounter; + +import tech.ydb.core.Status; + +@ExperimentalApi("YDB Meter is experimental and API may change without notice") +public final class OpenTelemetryMeter implements Meter { + private static final String DEFAULT_SCOPE = "tech.ydb.sdk"; + + private static final AttributeKey OPERATION_NAME = AttributeKey.stringKey("operation.name"); + private static final AttributeKey STATUS_CODE = AttributeKey.stringKey("db.response.status_code"); + private static final AttributeKey POOL_NAME = AttributeKey.stringKey("ydb.query.session.pool.name"); + private static final AttributeKey SESSION_STATE = AttributeKey.stringKey("ydb.query.session.state"); + + private final io.opentelemetry.api.metrics.Meter meter; + private final Attributes baseAttributes; + + private final DoubleHistogram operationDuration; + private final LongCounter operationFailed; + private final DoubleHistogram sessionCreateTime; + private final LongCounter sessionPendingRequests; + private final LongCounter sessionTimeouts; + + private OpenTelemetryMeter(io.opentelemetry.api.metrics.Meter meter, + String database, String host, int port) { + this.meter = Objects.requireNonNull(meter, "meter is null"); + + this.baseAttributes = Attributes.of( + AttributeKey.stringKey("db.system.name"), "ydb", + AttributeKey.stringKey("db.namespace"), database, + AttributeKey.stringKey("server.address"), host, + AttributeKey.longKey("server.port"), (long) port + ); + + this.operationDuration = meter.histogramBuilder("ydb.client.operation.duration") + .setUnit("s") + .setDescription("Duration of a single client operation attempt (ExecuteQuery, Commit, Rollback).") + .build(); + + this.operationFailed = meter.counterBuilder("ydb.client.operation.failed") + .setUnit("{operation}") + .setDescription("Number of failed client operation attempts.") + .build(); + + this.sessionCreateTime = meter.histogramBuilder("ydb.query.session.create_time") + .setUnit("s") + .setDescription("Time spent creating a new session.") + .build(); + + this.sessionPendingRequests = meter.counterBuilder("ydb.query.session.pending_requests") + .setUnit("{request}") + .setDescription("Number of session-acquire requests that had to wait for a free session.") + .build(); + + this.sessionTimeouts = meter.counterBuilder("ydb.query.session.timeouts") + .setUnit("{timeout}") + .setDescription("Number of session-acquire timeouts.") + .build(); + } + + public static OpenTelemetryMeter fromOpenTelemetry(OpenTelemetry openTelemetry, + String database, String host, int port) { + Objects.requireNonNull(openTelemetry, "openTelemetry is null"); + return new OpenTelemetryMeter(openTelemetry.getMeter(DEFAULT_SCOPE), database, host, port); + } + + public static OpenTelemetryMeter createGlobal(String database, String host, int port) { + return fromOpenTelemetry(GlobalOpenTelemetry.get(), database, host, port); + } + + @Override + public void recordOperationDuration(String operationName, long durationNanos) { + operationDuration.record(toSeconds(durationNanos), withOperation(operationName)); + } + + @Override + public void recordOperationFailed(String operationName, Status status) { + if (status == null || status.isSuccess()) { + return; + } + Attributes attrs = baseAttributes.toBuilder() + .put(OPERATION_NAME, operationName) + .put(STATUS_CODE, status.getCode().toString()) + .build(); + operationFailed.add(1L, attrs); + } + + @Override + public void registerSessionPool(String poolName, SessionPoolObserver observer) { + Attributes idle = baseAttributes.toBuilder() + .put(POOL_NAME, poolName) + .put(SESSION_STATE, "idle") + .build(); + Attributes used = baseAttributes.toBuilder() + .put(POOL_NAME, poolName) + .put(SESSION_STATE, "used") + .build(); + Attributes pool = baseAttributes.toBuilder() + .put(POOL_NAME, poolName) + .build(); + + meter.gaugeBuilder("ydb.query.session.count") + .ofLongs() + .setUnit("{session}") + .setDescription("Current number of sessions in the pool by state.") + .buildWithCallback(measurement -> { + measurement.record(observer.getIdleCount(), idle); + measurement.record(observer.getUsedCount(), used); + }); + + meter.gaugeBuilder("ydb.query.session.min") + .ofLongs() + .setUnit("{session}") + .setDescription("Configured minimum size of the session pool.") + .buildWithCallback(measurement -> measurement.record(observer.getMinSize(), pool)); + + meter.gaugeBuilder("ydb.query.session.max") + .ofLongs() + .setUnit("{session}") + .setDescription("Configured maximum size of the session pool.") + .buildWithCallback(measurement -> measurement.record(observer.getMaxSize(), pool)); + } + + @Override + public void recordSessionCreateTime(String poolName, long durationNanos) { + sessionCreateTime.record(toSeconds(durationNanos), withPool(poolName)); + } + + @Override + public void incrementSessionPendingRequests(String poolName) { + sessionPendingRequests.add(1L, withPool(poolName)); + } + + @Override + public void incrementSessionTimeouts(String poolName) { + sessionTimeouts.add(1L, withPool(poolName)); + } + + private Attributes withOperation(String operationName) { + return baseAttributes.toBuilder().put(OPERATION_NAME, operationName).build(); + } + + private Attributes withPool(String poolName) { + return baseAttributes.toBuilder().put(POOL_NAME, poolName).build(); + } + + private static double toSeconds(long durationNanos) { + return durationNanos / 1_000_000_000.0; + } +} diff --git a/core/src/main/java/tech/ydb/core/metrics/SessionPoolObserver.java b/core/src/main/java/tech/ydb/core/metrics/SessionPoolObserver.java index 4b872c6f5..9ae5f0d14 100644 --- a/core/src/main/java/tech/ydb/core/metrics/SessionPoolObserver.java +++ b/core/src/main/java/tech/ydb/core/metrics/SessionPoolObserver.java @@ -1,7 +1,11 @@ package tech.ydb.core.metrics; public interface SessionPoolObserver { + int getMinSize(); + + int getMaxSize(); + int getIdleCount(); + int getUsedCount(); - int getPendingCount(); } diff --git a/core/src/main/java/tech/ydb/core/tracing/SpanFinalizer.java b/core/src/main/java/tech/ydb/core/tracing/SpanFinalizer.java deleted file mode 100644 index 25db26cd2..000000000 --- a/core/src/main/java/tech/ydb/core/tracing/SpanFinalizer.java +++ /dev/null @@ -1,46 +0,0 @@ -package tech.ydb.core.tracing; - -import java.util.function.BiConsumer; - -import tech.ydb.core.Status; -import tech.ydb.core.utils.FutureTools; - -/** - * Shared helpers to finish spans for status/exception outcomes. - */ -public final class SpanFinalizer { - private SpanFinalizer() { - } - - public static void finishByStatus(Span span, Status status) { - if (span == null) { - return; - } - - span.setStatus(status, null); - span.end(); - } - - public static void finishByError(Span span, Throwable error) { - if (span == null) { - return; - } - - span.setStatus(null, error != null ? FutureTools.unwrapCompletionException(error) : null); - span.end(); - } - - public static BiConsumer whenComplete(Span span) { - return (status, error) -> { - if (span == null) { - return; - } - - if (error != null) { - finishByError(span, error); - return; - } - finishByStatus(span, status); - }; - } -} diff --git a/opentelemetry/pom.xml b/opentelemetry/pom.xml deleted file mode 100644 index 9f5647393..000000000 --- a/opentelemetry/pom.xml +++ /dev/null @@ -1,60 +0,0 @@ - - - 4.0.0 - - - tech.ydb - ydb-sdk-parent - 2.4.1-SNAPSHOT - - - ydb-sdk-opentelemetry - OpenTelemetry integration for YDB Java SDK - OpenTelemetry integration module for tracing and metrics in YDB SDK - - - 1.59.0 - - - - - tech.ydb - ydb-sdk-core - - - io.opentelemetry - opentelemetry-api - ${opentelemetry.version} - - - io.opentelemetry - opentelemetry-sdk - 1.59.0 - test - - - junit - junit - test - - - tech.ydb - ydb-sdk-query - test - - - tech.ydb.test - ydb-junit4-support - test - - - io.opentelemetry - opentelemetry-sdk-testing - 1.59.0 - test - - - diff --git a/opentelemetry/src/main/java/tech/ydb/opentelemetry/OpenTelemetryMeter.java b/opentelemetry/src/main/java/tech/ydb/opentelemetry/OpenTelemetryMeter.java deleted file mode 100644 index 63867688b..000000000 --- a/opentelemetry/src/main/java/tech/ydb/opentelemetry/OpenTelemetryMeter.java +++ /dev/null @@ -1,99 +0,0 @@ -package tech.ydb.opentelemetry; - -import java.util.Objects; - -import io.opentelemetry.api.GlobalOpenTelemetry; -import io.opentelemetry.api.OpenTelemetry; -import io.opentelemetry.api.common.AttributeKey; -import io.opentelemetry.api.common.Attributes; -import io.opentelemetry.api.metrics.DoubleHistogram; -import io.opentelemetry.api.metrics.LongCounter; - -import tech.ydb.core.Status; -import tech.ydb.core.metrics.Meter; -import tech.ydb.core.metrics.SessionPoolObserver; - -public final class OpenTelemetryMeter implements Meter { - private final io.opentelemetry.api.metrics.Meter meter; - - private final DoubleHistogram operationDuration; // db.client.operation.duration - private final LongCounter operationFailed; // ydb.client.operation.failed - private final DoubleHistogram sessionCreateTime; // ydb.query.session.create_time - - private final Attributes baseAttributes; // db.system.name, db.namespace, server.address, server.port - - private OpenTelemetryMeter(io.opentelemetry.api.metrics.Meter meter, - String database, String host, int port) { - this.meter = Objects.requireNonNull(meter, "meter is null"); - this.operationDuration = meter.histogramBuilder("db.client.operation.duration") - .setUnit("s") - .build(); - this.operationFailed = meter.counterBuilder("ydb.client.operation.failed") - .setUnit("{command}") - .build(); - this.sessionCreateTime = meter.histogramBuilder("ydb.query.session.create_time") - .setUnit("s") - .build(); - - this.baseAttributes = Attributes.of( - AttributeKey.stringKey("db.system.name"), "ydb", - AttributeKey.stringKey("db.namespace"), database, - AttributeKey.stringKey("server.address"), host, - AttributeKey.longKey("server.port"), (long) port - ); - } - - public static OpenTelemetryMeter fromOpenTelemetry(OpenTelemetry openTelemetry, - String database, String host, int port) { - io.opentelemetry.api.metrics.Meter meter = openTelemetry - .getMeter("tech.ydb.sdk"); - - return new OpenTelemetryMeter(meter, database, host, port); - } - - public static OpenTelemetryMeter createGlobal(String database, String host, int port) { - return fromOpenTelemetry(GlobalOpenTelemetry.get(), database, host, port); - } - - @Override - public void recordOperation(String name, long durationNanos, Status status) { - Attributes attrs = baseAttributes.toBuilder() - .put("ydb.operation.name", name) - .build(); - - operationDuration.record(durationNanos / 1_000_000_000.0, attrs); - - if (status != null && !status.isSuccess()) { - Attributes errAttrs = attrs.toBuilder() - .put("db.response.status_code", status.getCode().toString()) - .build(); - operationFailed.add(1, errAttrs); - } - } - - @Override - public void registerSessionPool(String poolName, SessionPoolObserver observer) { - meter.upDownCounterBuilder("ydb.query.session.count") - .setUnit("{connection}") - .buildWithCallback(measurement -> { - Attributes idle = Attributes.of( - AttributeKey.stringKey("ydb.query.session.pool.name"), poolName, - AttributeKey.stringKey("ydb.query.session.state"), "idle" - ); - Attributes used = Attributes.of( - AttributeKey.stringKey("ydb.query.session.pool.name"), poolName, - AttributeKey.stringKey("ydb.query.session.state"), "used" - ); - measurement.record(observer.getIdleCount(), idle); - measurement.record(observer.getUsedCount(), used); - }); - } - - @Override - public void recordSessionCreateTime(String poolName, long durationNanos) { - Attributes attrs = Attributes.of( - AttributeKey.stringKey("ydb.query.session.pool.name"), poolName - ); - sessionCreateTime.record(durationNanos / 1_000_000_000.0, attrs); - } -} diff --git a/opentelemetry/src/main/java/tech/ydb/opentelemetry/OpenTelemetryTracer.java b/opentelemetry/src/main/java/tech/ydb/opentelemetry/OpenTelemetryTracer.java deleted file mode 100644 index 27665b3df..000000000 --- a/opentelemetry/src/main/java/tech/ydb/opentelemetry/OpenTelemetryTracer.java +++ /dev/null @@ -1,93 +0,0 @@ -package tech.ydb.opentelemetry; - -import java.util.Objects; - -import io.opentelemetry.api.GlobalOpenTelemetry; -import io.opentelemetry.api.OpenTelemetry; -import io.opentelemetry.api.trace.StatusCode; - -import tech.ydb.core.Status; -import tech.ydb.core.tracing.Span; -import tech.ydb.core.tracing.SpanKind; -import tech.ydb.core.tracing.Tracer; - -public final class OpenTelemetryTracer implements Tracer { - private static final String DEFAULT_SCOPE = "tech.ydb.sdk"; - - private final io.opentelemetry.api.trace.Tracer tracer; - - private OpenTelemetryTracer(io.opentelemetry.api.trace.Tracer tracer) { - this.tracer = Objects.requireNonNull(tracer, "tracer is null"); - } - - public static OpenTelemetryTracer createGlobal() { - return fromOpenTelemetry(GlobalOpenTelemetry.get()); - } - - public static OpenTelemetryTracer fromOpenTelemetry(OpenTelemetry openTelemetry) { - return fromOpenTelemetry(openTelemetry, DEFAULT_SCOPE); - } - - public static OpenTelemetryTracer fromOpenTelemetry(OpenTelemetry openTelemetry, String scopeName) { - Objects.requireNonNull(openTelemetry, "openTelemetry is null"); - Objects.requireNonNull(scopeName, "scopeName is null"); - return new OpenTelemetryTracer(openTelemetry.getTracer(scopeName)); - } - - @Override - public Span startSpan(String spanName, SpanKind spanKind) { - io.opentelemetry.api.trace.Span span = tracer.spanBuilder(spanName) - .setSpanKind(mapSpanKind(spanKind)) - .startSpan(); - return new OtelSpan(span); - } - - private static io.opentelemetry.api.trace.SpanKind mapSpanKind(SpanKind kind) { - if (kind == SpanKind.CLIENT) { - return io.opentelemetry.api.trace.SpanKind.CLIENT; - } - return io.opentelemetry.api.trace.SpanKind.INTERNAL; - } - - private static final class OtelSpan implements Span { - private final io.opentelemetry.api.trace.Span span; - - private OtelSpan(io.opentelemetry.api.trace.Span span) { - this.span = span; - } - - @Override - public String getId() { - return "00-" + span.getSpanContext().getTraceId() + "-" + span.getSpanContext().getSpanId() + "01"; - } - - @Override - public void setAttribute(String key, String value) { - span.setAttribute(key, value); - } - - @Override - public void setAttribute(String key, long value) { - span.setAttribute(key, value); - } - - @Override - public void setError(Status status) { - span.setAttribute("db.response.status_code", status.getCode().toString()); - span.setAttribute("error.type", status.getCode().isTransportError() ? "transport_error" : "ydb_error"); - - span.setStatus(StatusCode.ERROR, status.toString()); - } - - @Override - public void setError(Throwable error) { - span.setAttribute("error.type", error.getClass().getName()); - span.setStatus(StatusCode.ERROR, error.getMessage()); - } - - @Override - public void end() { - span.end(); - } - } -} diff --git a/query/src/main/java/tech/ydb/query/QueryClient.java b/query/src/main/java/tech/ydb/query/QueryClient.java index 343ccb01f..7a2177aa2 100644 --- a/query/src/main/java/tech/ydb/query/QueryClient.java +++ b/query/src/main/java/tech/ydb/query/QueryClient.java @@ -55,6 +55,8 @@ interface Builder { Builder sessionMaxIdleTime(Duration duration); + Builder sessionPoolName(String poolName); + QueryClient build(); } } diff --git a/query/src/main/java/tech/ydb/query/impl/QueryClientImpl.java b/query/src/main/java/tech/ydb/query/impl/QueryClientImpl.java index 4716abc85..1c266e0ef 100644 --- a/query/src/main/java/tech/ydb/query/impl/QueryClientImpl.java +++ b/query/src/main/java/tech/ydb/query/impl/QueryClientImpl.java @@ -24,13 +24,16 @@ public class QueryClientImpl implements QueryClient { private final Tracer tracer; public QueryClientImpl(Builder builder) { + String poolName = builder.sessionPoolName != null + ? builder.sessionPoolName : builder.transport.getDatabase(); this.pool = new SessionPool( Clock.systemUTC(), new QueryServiceRpc(builder.transport), builder.transport.getScheduler(), builder.sessionPoolMinSize, builder.sessionPoolMaxSize, - builder.sessionPoolIdleDuration + builder.sessionPoolIdleDuration, + poolName ); this.scheduler = builder.transport.getScheduler(); this.tracer = builder.transport.getTracer(); @@ -77,6 +80,7 @@ public static class Builder implements QueryClient.Builder { private int sessionPoolMinSize = 0; private int sessionPoolMaxSize = 50; private Duration sessionPoolIdleDuration = Duration.ofMinutes(5); + private String sessionPoolName = null; Builder(GrpcTransport transport) { Preconditions.checkArgument(transport != null, "transport is null"); @@ -126,6 +130,14 @@ public Builder sessionMaxIdleTime(Duration duration) { return this; } + @Override + public Builder sessionPoolName(String poolName) { + Preconditions.checkArgument(poolName != null && !poolName.isEmpty(), + "sessionPoolName must be a non-empty string"); + this.sessionPoolName = poolName; + return this; + } + @Override public QueryClientImpl build() { return new QueryClientImpl(this); diff --git a/query/src/main/java/tech/ydb/query/impl/SessionImpl.java b/query/src/main/java/tech/ydb/query/impl/SessionImpl.java index 804ac388c..3b62210ea 100644 --- a/query/src/main/java/tech/ydb/query/impl/SessionImpl.java +++ b/query/src/main/java/tech/ydb/query/impl/SessionImpl.java @@ -27,7 +27,6 @@ import tech.ydb.core.operation.StatusExtractor; import tech.ydb.core.settings.BaseRequestSettings; import tech.ydb.core.tracing.Span; -import tech.ydb.core.tracing.SpanFinalizer; import tech.ydb.core.utils.URITools; import tech.ydb.core.utils.UpdatableOptional; import tech.ydb.proto.ValueProtos; @@ -145,14 +144,15 @@ GrpcReadStream attach(AttachSessionSettings settings) { YdbQuery.AttachSessionRequest request = YdbQuery.AttachSessionRequest.newBuilder() .setSessionId(sessionId) .build(); - // Execute attachSession call outside current context to avoid cancellation and deadline propogation + // Execute attachSession call outside current context to avoid cancellation and deadline propagation Context ctx = Context.ROOT.fork(); Context previous = ctx.attach(); try { AtomicBoolean pessimizationHook = new AtomicBoolean(false); + GrpcRequestSettings grpcSettings = makeOptions(settings) - .disableDeadline() .withPessimizationHook(pessimizationHook::get) + .disableDeadline() .build(); GrpcReadStream origin = rpc.attachSession(request, grpcSettings); return new GrpcReadStream() { @@ -163,10 +163,10 @@ public CompletableFuture start(GrpcReadStream.Observer observer) String msg = TextFormat.shortDebugString(message); logger.trace("session '{}' got attach stream message {}", sessionId, msg); } - StatusCode code = StatusCode.fromProto(message.getStatus()); Status status = Status.of(code, Issue.fromPb(message.getIssuesList())); updateSessionState(status); + // The hint is sent by the server with a success status. switch (message.getSessionHintCase()) { case NODE_SHUTDOWN: pessimizationHook.set(nodeID != 0); @@ -178,6 +178,7 @@ public CompletableFuture start(GrpcReadStream.Observer observer) default: break; } + observer.onNext(status); }); } @@ -329,10 +330,10 @@ GrpcReadStream createGrpcStream( @Override public QueryStream createQuery(String query, TxMode tx, Params prms, ExecuteQuerySettings settings) { YdbQuery.TransactionControl tc = TxControl.txModeCtrl(tx, true); - Span span = rpc.startSpan("ydb.ExecuteQuery"); + Span span = startSpan("ydb.ExecuteQuery"); long startNanos = System.nanoTime(); - - return new StreamImpl(createGrpcStream(query, tc, prms, settings, span), span, startNanos, "ydb.ExecuteQuery") { + return new StreamImpl(createGrpcStream(query, tc, prms, settings, span), span, + startNanos, "ExecuteQuery") { @Override void handleTxMeta(String txID) { if (txID != null && !txID.isEmpty()) { @@ -372,27 +373,20 @@ static CompletableFuture> createSession( return rpc.createSession(request, grpcSettingsBuilder.build()).thenApply(result -> { pessimizationHook.set(result.getStatus().getCode() == StatusCode.OVERLOADED); - if (!result.isSuccess()) { - SpanFinalizer.finishByStatus(createSpan, result.getStatus()); - } return CREATE_SESSION.apply(result); }); } abstract class StreamImpl implements QueryStream { private final GrpcReadStream grpcStream; - @Nullable - private final Span operationSpan; + private final Span span; private final long startNanos; private final String operationName; - StreamImpl( - GrpcReadStream grpcStream, - @Nullable - Span operationSpan, long startNanos, String operationName - ) { + StreamImpl(GrpcReadStream grpcStream, Span operationSpan, + long startNanos, String operationName) { this.grpcStream = grpcStream; - this.operationSpan = operationSpan; + this.span = operationSpan; this.startNanos = startNanos; this.operationName = operationName; } @@ -406,10 +400,10 @@ void handleCompletion(Status status, Throwable th) { public CompletableFuture> execute(PartsHandler handler) { final UpdatableOptional operationStatus = new UpdatableOptional<>(); final UpdatableOptional stats = new UpdatableOptional<>(); - return grpcStream.start(msg -> { + return Span.endOnResult(span, grpcStream.start(msg -> { if (isTraceEnabled) { - logger.trace("{} got stream message {}", SessionImpl.this, - TextFormat.shortDebugString(msg)); + logger.trace("{} got stream message {}", + SessionImpl.this, TextFormat.shortDebugString(msg)); } Issue[] issues = Issue.fromPb(msg.getIssuesList()); Status status = Status.of(StatusCode.fromProto(msg.getStatus()), issues); @@ -444,22 +438,18 @@ public CompletableFuture> execute(PartsHandler handler) { logger.trace("{} lost result set part with index {}", SessionImpl.this, index); } } - }) - .whenComplete(this::handleCompletion) - .thenApply(streamStatus -> { + }).whenComplete(this::handleCompletion).thenApply(streamStatus -> { updateSessionState(streamStatus); Status status = operationStatus.orElse(streamStatus); long elapsed = System.nanoTime() - startNanos; - rpc.getMeter().recordOperation(operationName, elapsed, status); - Result result; - if (status.isSuccess()) { - result = Result.success(new QueryInfo(stats.get()), streamStatus); - } else { - result = Result.fail(status); + rpc.getMeter().recordOperationDuration(operationName, elapsed); + if (!status.isSuccess()) { + rpc.getMeter().recordOperationFailed(operationName, status); + return Result.fail(status); } - SpanFinalizer.finishByStatus(operationSpan, status); - return result; - }); + return Result.success(new QueryInfo(stats.get()), streamStatus); + }) + ); } @Override @@ -496,11 +486,10 @@ public QueryStream createQuery(String query, boolean commitAtEnd, Params prms, E ? TxControl.txIdCtrl(currentId, commitAtEnd) : TxControl.txModeCtrl(txMode, commitAtEnd); - Span span = rpc.startSpan("ydb.ExecuteQuery"); + Span span = startSpan("ydb.ExecuteQuery"); long startNanos = System.nanoTime(); - - return new StreamImpl( - createGrpcStream(query, tc, prms, settings, span), span, startNanos, "ydb.ExecuteQuery") { + return new StreamImpl(createGrpcStream(query, tc, prms, settings, span), span, + startNanos, "ExecuteQuery") { @Override void handleTxMeta(String txID) { String newId = txID == null || txID.isEmpty() ? null : txID; @@ -523,7 +512,7 @@ void handleCompletion(Status status, Throwable th) { currentStatusFuture.complete(Status .of(StatusCode.ABORTED) .withIssues(Issue.of("Query on transaction failed with status " - + status, Issue.Severity.ERROR))); + + status, Issue.Severity.ERROR))); } } @@ -539,16 +528,16 @@ public void cancel() { @Override public CompletableFuture> commit(CommitTransactionSettings settings) { - final Span commitSpan = rpc.startSpan("ydb.Commit"); - final long startNanos = System.nanoTime(); - + Span span = startSpan("ydb.Commit"); + long startNanos = System.nanoTime(); CompletableFuture currentStatusFuture = statusFuture.getAndSet(new CompletableFuture<>()); - final String transactionId = txId.get(); + String transactionId = txId.get(); if (transactionId == null) { Issue issue = Issue.of("Transaction is not started", Issue.Severity.WARNING); Result res = Result.success(new QueryInfo(null), Status.of(StatusCode.SUCCESS, issue)); - SpanFinalizer.finishByStatus(commitSpan, res.getStatus()); - return CompletableFuture.completedFuture(res); + return recordCommitMetrics( + Span.endOnResult(span, CompletableFuture.completedFuture(res)), + startNanos); } YdbQuery.CommitTransactionRequest request = YdbQuery.CommitTransactionRequest.newBuilder() @@ -556,74 +545,83 @@ public CompletableFuture> commit(CommitTransactionSettings set .setTxId(transactionId) .build(); - return rpc.commitTransaction(request, makeOptions(settings, commitSpan).build()) - .thenApply(res -> { - Status status = res.getStatus(); - currentStatusFuture.complete(status); - updateSessionState(status); - if (!txId.compareAndSet(transactionId, null)) { - logger.warn("{} lost commit response for transaction {}", SessionImpl.this, transactionId); - } - // TODO: CommitTransactionResponse must contain exec_stats - return res.map(resp -> new QueryInfo(null)); - }).whenComplete(((status, th) -> { - if (th != null) { - currentStatusFuture.completeExceptionally( - new RuntimeException("Transaction commit failed with exception", th)); - SpanFinalizer.finishByError(commitSpan, th); - return; - } - SpanFinalizer.finishByStatus(commitSpan, status.getStatus()); - })).whenComplete((status, th) -> { // добавить - long elapsed = System.nanoTime() - startNanos; - Status finalStatus = status != null - ? status.getStatus() : Status.of(StatusCode.CLIENT_INTERNAL_ERROR); - rpc.getMeter().recordOperation("ydb.Commit", elapsed, finalStatus); - }); + return recordCommitMetrics( + Span.endOnResult(span, rpc.commitTransaction(request, makeOptions(settings, span).build())) + .thenApply(res -> { + Status status = res.getStatus(); + currentStatusFuture.complete(status); + updateSessionState(status); + if (!txId.compareAndSet(transactionId, null)) { + logger.warn("{} lost commit response for transaction {}", + SessionImpl.this, transactionId); + } + // TODO: CommitTransactionResponse must contain exec_stats + return res.map(resp -> new QueryInfo(null)); + }).whenComplete(((status, th) -> { + if (th != null) { + currentStatusFuture.completeExceptionally( + new RuntimeException("Transaction commit failed with exception", th)); + } + })), + startNanos); + } + + private CompletableFuture> recordCommitMetrics( + CompletableFuture> future, long startNanos) { + return future.whenComplete((res, th) -> { + long elapsed = System.nanoTime() - startNanos; + rpc.getMeter().recordOperationDuration("Commit", elapsed); + if (res != null && !res.getStatus().isSuccess()) { + rpc.getMeter().recordOperationFailed("Commit", res.getStatus()); + } + }); } @Override public CompletableFuture rollback(RollbackTransactionSettings settings) { - final Span rollbackSpan = rpc.startSpan("ydb.Rollback"); - final long startNanos = System.nanoTime(); - + Span span = startSpan("ydb.Rollback"); + long startNanos = System.nanoTime(); CompletableFuture currentStatusFuture = statusFuture.getAndSet(new CompletableFuture<>()); - final String transactionId = txId.get(); + String transactionId = txId.get(); if (transactionId == null) { Issue issue = Issue.of("Transaction is not started", Issue.Severity.WARNING); Status status = Status.of(StatusCode.SUCCESS, issue); - SpanFinalizer.finishByStatus(rollbackSpan, status); - return CompletableFuture.completedFuture(status); + return recordRollbackMetrics( + Span.endOnStatus(span, CompletableFuture.completedFuture(status)), + startNanos); } YdbQuery.RollbackTransactionRequest request = YdbQuery.RollbackTransactionRequest.newBuilder() .setSessionId(sessionId) .setTxId(transactionId) .build(); - return rpc.rollbackTransaction(request, makeOptions(settings, rollbackSpan).build()) - .thenApply(result -> { - updateSessionState(result.getStatus()); - if (!txId.compareAndSet(transactionId, null)) { - logger.warn("{} lost rollback response for transaction {}", SessionImpl.this, - transactionId); - } - return result.getStatus(); - }) - .whenComplete((status, th) -> { - currentStatusFuture.complete(Status - .of(StatusCode.ABORTED) - .withIssues(Issue.of("Transaction was rolled back", Issue.Severity.ERROR))); - if (th != null) { - SpanFinalizer.finishByError(rollbackSpan, th); - return; - } - SpanFinalizer.finishByStatus(rollbackSpan, status); - }).whenComplete((status, th) -> { - long elapsed = System.nanoTime() - startNanos; - Status finalStatus = status != null ? status : Status.of(StatusCode.CLIENT_INTERNAL_ERROR); - rpc.getMeter().recordOperation("ydb.Rollback", elapsed, finalStatus); - }); + return recordRollbackMetrics( + Span.endOnResult(span, rpc.rollbackTransaction(request, makeOptions(settings, span).build())) + .thenApply(result -> { + updateSessionState(result.getStatus()); + if (!txId.compareAndSet(transactionId, null)) { + logger.warn("{} lost rollback response for transaction {}", SessionImpl.this, + transactionId); + } + return result.getStatus(); + }) + .whenComplete((status, th) -> { + currentStatusFuture.complete(Status + .of(StatusCode.ABORTED) + .withIssues(Issue.of("Transaction was rolled back", Issue.Severity.ERROR))); + }), + startNanos); + } + + private CompletableFuture recordRollbackMetrics(CompletableFuture future, long startNanos) { + return future.whenComplete((status, th) -> { + long elapsed = System.nanoTime() - startNanos; + rpc.getMeter().recordOperationDuration("Rollback", elapsed); + if (status != null && !status.isSuccess()) { + rpc.getMeter().recordOperationFailed("Rollback", status); + } + }); } } } diff --git a/query/src/main/java/tech/ydb/query/impl/SessionPool.java b/query/src/main/java/tech/ydb/query/impl/SessionPool.java index 107cdc002..e4bbbcc7f 100644 --- a/query/src/main/java/tech/ydb/query/impl/SessionPool.java +++ b/query/src/main/java/tech/ydb/query/impl/SessionPool.java @@ -22,9 +22,9 @@ import tech.ydb.core.StatusCode; import tech.ydb.core.UnexpectedResultException; import tech.ydb.core.grpc.GrpcReadStream; +import tech.ydb.core.metrics.Meter; import tech.ydb.core.metrics.SessionPoolObserver; import tech.ydb.core.tracing.Span; -import tech.ydb.core.tracing.SpanFinalizer; import tech.ydb.core.utils.FutureTools; import tech.ydb.proto.query.YdbQuery; import tech.ydb.query.QuerySession; @@ -56,6 +56,8 @@ class SessionPool implements AutoCloseable { .build(); private final int minSize; + private final String poolName; + private final Meter meter; private final Clock clock; private final ScheduledExecutorService scheduler; private final WaitingQueue queue; @@ -63,8 +65,10 @@ class SessionPool implements AutoCloseable { private final StatsImpl stats = new StatsImpl(); SessionPool(Clock clock, QueryServiceRpc rpc, ScheduledExecutorService scheduler, int minSize, int maxSize, - Duration idleDuration) { + Duration idleDuration, String poolName) { this.minSize = minSize; + this.poolName = poolName; + this.meter = rpc.getMeter(); this.clock = clock; this.scheduler = scheduler; @@ -76,12 +80,23 @@ class SessionPool implements AutoCloseable { cleaner.periodMillis / 2, cleaner.periodMillis, TimeUnit.MILLISECONDS); - logger.info("init QuerySession pool, min size = {}, max size = {}, keep alive period = {}", + logger.info("init QuerySession pool '{}', min size = {}, max size = {}, keep alive period = {}", + poolName, minSize, maxSize, cleaner.periodMillis); - rpc.getMeter().registerSessionPool("default", new SessionPoolObserver() { + meter.registerSessionPool(poolName, new SessionPoolObserver() { + @Override + public int getMinSize() { + return minSize; + } + + @Override + public int getMaxSize() { + return queue.getTotalLimit(); + } + @Override public int getIdleCount() { return queue.getIdleCount(); @@ -91,11 +106,6 @@ public int getIdleCount() { public int getUsedCount() { return queue.getUsedCount(); } - - @Override - public int getPendingCount() { - return queue.getPendingCount(); - } }); } @@ -121,8 +131,9 @@ public CompletableFuture> acquire(Duration timeout) { // If next session is not ready - add timeout canceler if (!pollNext(future)) { + meter.incrementSessionPendingRequests(poolName); future.whenComplete(new Canceller(scheduler.schedule( - new Timeout(future), + new Timeout(future, meter, poolName), timeout.toMillis(), TimeUnit.MILLISECONDS) )); @@ -297,43 +308,24 @@ public CompletableFuture create() { try { Span createSpan = rpc.startSpan("ydb.CreateSession"); long startNanos = System.nanoTime(); - stats.requested.increment(); - return SessionImpl - .createSession(rpc, CREATE_SETTINGS, true, createSpan) + return Span.endOnResult(createSpan, SessionImpl.createSession(rpc, CREATE_SETTINGS, true, createSpan)) + .whenComplete((r, th) -> { + long elapsed = System.nanoTime() - startNanos; + meter.recordSessionCreateTime(poolName, elapsed); + meter.recordOperationDuration("CreateSession", elapsed); + if (r != null && !r.isSuccess()) { + meter.recordOperationFailed("CreateSession", r.getStatus()); + } + }) .thenCompose(r -> { if (!r.isSuccess()) { - SpanFinalizer.finishByStatus(createSpan, r.getStatus()); stats.failed.increment(); throw new UnexpectedResultException("create session problem", r.getStatus()); } PooledQuerySession session = new PooledQuerySession(rpc, r.getValue()); return session.start(); - }) - .whenComplete((result, th) -> { - if (th != null) { - Throwable error = FutureTools.unwrapCompletionException(th); - if (error instanceof UnexpectedResultException) { - SpanFinalizer.finishByStatus( - createSpan, - ((UnexpectedResultException) error).getStatus() - ); - } else { - SpanFinalizer.finishByError(createSpan, error); - } - return; - } - - SpanFinalizer.finishByStatus(createSpan, result.getStatus()); - }) - .whenComplete((status, th) -> { - long elapsed = System.nanoTime() - startNanos; - Status finalStatus = status != null - ? status.getStatus() : Status.of(StatusCode.CLIENT_INTERNAL_ERROR); - rpc.getMeter().recordOperation("ydb.CreateSession", elapsed, finalStatus); - rpc.getMeter().recordSessionCreateTime("default", elapsed); - }) - .thenApply(Result::getValue); + }).thenApply(Result::getValue); } finally { ctx.detach(previous); } @@ -491,15 +483,19 @@ static final class Timeout implements Runnable { ); private final CompletableFuture> f; + private final Meter meter; + private final String poolName; - Timeout(CompletableFuture> f) { + Timeout(CompletableFuture> f, Meter meter, String poolName) { this.f = f; + this.meter = meter; + this.poolName = poolName; } @Override public void run() { - if (f != null && !f.isDone()) { - f.complete(Result.fail(EXPIRE)); + if (f != null && !f.isDone() && f.complete(Result.fail(EXPIRE))) { + meter.incrementSessionTimeouts(poolName); } } } diff --git a/query/src/main/java/tech/ydb/query/impl/TableClientImpl.java b/query/src/main/java/tech/ydb/query/impl/TableClientImpl.java index a81fb2fca..d84d14063 100644 --- a/query/src/main/java/tech/ydb/query/impl/TableClientImpl.java +++ b/query/src/main/java/tech/ydb/query/impl/TableClientImpl.java @@ -16,7 +16,6 @@ import tech.ydb.core.UnexpectedResultException; import tech.ydb.core.grpc.GrpcTransport; import tech.ydb.core.tracing.Span; -import tech.ydb.core.tracing.SpanFinalizer; import tech.ydb.core.tracing.Tracer; import tech.ydb.proto.ValueProtos; import tech.ydb.proto.query.YdbQuery; @@ -93,6 +92,9 @@ private YdbQuery.TransactionControl mapTxControl(YdbTable.TransactionControl tc) if (tc.getBeginTx().hasSnapshotReadOnly()) { return TxControl.txModeCtrl(TxMode.SNAPSHOT_RO, tc.getCommitTx()); } + if (tc.getBeginTx().hasSnapshotReadWrite()) { + return TxControl.txModeCtrl(TxMode.SNAPSHOT_RW, tc.getCommitTx()); + } if (tc.getBeginTx().hasStaleReadOnly()) { return TxControl.txModeCtrl(TxMode.STALE_RO, tc.getCommitTx()); } @@ -130,10 +132,10 @@ public CompletableFuture> executeDataQueryInternal( final List issues = new ArrayList<>(); final List results = new ArrayList<>(); Span span = querySession.startSpan("ydb.ExecuteQuery"); - final long startNanos = System.nanoTime(); + long startNanos = System.nanoTime(); QueryStream stream = querySession.new StreamImpl(querySession.createGrpcStream(query, tc, prms, qs, span), - span, startNanos, "ydb.ExecuteQuery") { + span, startNanos, "ExecuteQuery") { @Override void handleTxMeta(String txID) { txRef.set(txID); @@ -204,23 +206,23 @@ public CompletableFuture> beginTransaction(TxMode txMod } @Override - public CompletableFuture commitTransaction(String txId, CommitTxSettings settings) { + protected CompletableFuture commitTransactionInternal(String txId, CommitTxSettings settings) { Span span = querySession.startSpan("ydb.Commit"); CommitTransactionSettings querySettings = CommitTransactionSettings.newBuilder() .withTraceId(settings.getTraceId()) .withRequestTimeout(settings.getTimeoutDuration()) .build(); - return querySession.commitById(txId, querySettings, span).whenComplete(SpanFinalizer.whenComplete(span)); + return Span.endOnStatus(span, querySession.commitById(txId, querySettings, span)); } @Override - public CompletableFuture rollbackTransaction(String txId, RollbackTxSettings settings) { + protected CompletableFuture rollbackTransactionInternal(String txId, RollbackTxSettings settings) { Span span = querySession.startSpan("ydb.Rollback"); RollbackTransactionSettings querySettings = RollbackTransactionSettings.newBuilder() .withTraceId(settings.getTraceId()) .withRequestTimeout(settings.getTimeoutDuration()) .build(); - return querySession.rollbackById(txId, querySettings, span).whenComplete(SpanFinalizer.whenComplete(span)); + return Span.endOnStatus(span, querySession.rollbackById(txId, querySettings, span)); } private final class TracedTableTransaction implements TableTransaction { @@ -257,7 +259,7 @@ public CompletableFuture rollback(RollbackTxSettings settings) { if (txId == null) { return delegate.rollback(settings); } - return TableSession.this.rollbackTransaction(txId, settings); + return rollbackTransactionInternal(txId, settings); } @Override diff --git a/query/src/test/java/tech/ydb/query/impl/QueryTracingTest.java b/query/src/test/java/tech/ydb/query/impl/QueryTracingTest.java index b68eda3ec..303e69229 100644 --- a/query/src/test/java/tech/ydb/query/impl/QueryTracingTest.java +++ b/query/src/test/java/tech/ydb/query/impl/QueryTracingTest.java @@ -4,6 +4,16 @@ import java.util.ArrayList; import java.util.Collections; import java.util.List; +import java.util.concurrent.CancellationException; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.ConcurrentMap; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.atomic.AtomicInteger; + +import javax.annotation.Nullable; import org.junit.After; import org.junit.AfterClass; @@ -17,7 +27,10 @@ import tech.ydb.common.transaction.TxMode; import tech.ydb.core.Result; import tech.ydb.core.Status; +import tech.ydb.core.StatusCode; +import tech.ydb.core.UnexpectedResultException; import tech.ydb.core.grpc.GrpcTransport; +import tech.ydb.core.tracing.Scope; import tech.ydb.core.tracing.Span; import tech.ydb.core.tracing.SpanKind; import tech.ydb.core.tracing.Tracer; @@ -25,6 +38,8 @@ import tech.ydb.query.QuerySession; import tech.ydb.query.QueryTransaction; import tech.ydb.query.result.QueryInfo; +import tech.ydb.query.tools.SessionRetryContext; +import tech.ydb.proto.StatusCodesProtos; import tech.ydb.table.Session; import tech.ydb.table.TableClient; import tech.ydb.table.query.DataQueryResult; @@ -39,6 +54,7 @@ public class QueryTracingTest { private static GrpcTransport transport; private static RecordingTracer tracer; + private static final GrpcTestInterceptor grpcInterceptor = new GrpcTestInterceptor(); private QueryClient queryClient; private TableClient tableClient; @@ -48,6 +64,7 @@ public static void initTransport() { tracer = new RecordingTracer(); transport = GrpcTransport.forEndpoint(YDB.endpoint(), YDB.database()) .withAuthProvider(new TokenAuthProvider(YDB.authToken())) + .addChannelInitializer(grpcInterceptor) .withTracer(tracer) .build(); } @@ -60,6 +77,7 @@ public static void closeTransport() { @Before public void initClient() { tracer.reset(); + grpcInterceptor.reset(); queryClient = QueryClient.newClient(transport).build(); tableClient = null; } @@ -74,13 +92,22 @@ public void closeClient() { } } + private static void awaitTracing() { + try { + tracer.awaitAllSpansClosed(Duration.ofSeconds(30)); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + Assert.fail(e.toString()); + } + } + @Test public void createSessionSpanIsRecorded() { - try (QuerySession ignored = queryClient.createSession(Duration.ofSeconds(5)).join().getValue()) { - // no-op + try (QuerySession session = queryClient.createSession(Duration.ofSeconds(5)).join().getValue()) { + Assert.assertNotNull(session.getId()); + awaitTracing(); + Assert.assertEquals(1, tracer.countClosedSpan("ydb.CreateSession")); } - - Assert.assertEquals(1, tracer.countClosedSpan("ydb.CreateSession")); } @Test @@ -89,6 +116,7 @@ public void executeQuerySpanIsRecorded() { session.createQuery("SELECT 1", TxMode.NONE).execute().join().getStatus().expectSuccess(); } + awaitTracing(); Assert.assertEquals(1, tracer.countClosedSpan("ydb.ExecuteQuery")); } @@ -103,6 +131,7 @@ public void commitSpanIsRecordedInQueryTransaction() { commitResult.getStatus().expectSuccess(); } + awaitTracing(); Assert.assertEquals(1, tracer.countClosedSpan("ydb.Commit")); } @@ -117,6 +146,7 @@ public void rollbackSpanIsRecordedInQueryTransaction() { rollbackStatus.expectSuccess(); } + awaitTracing(); Assert.assertEquals(1, tracer.countClosedSpan("ydb.Rollback")); } @@ -132,6 +162,7 @@ public void createSessionAndExecuteQuerySpansAreRecordedInTableProxy() { result.getStatus().expectSuccess(); } + awaitTracing(); Assert.assertEquals(1, tracer.countClosedSpan("ydb.CreateSession")); Assert.assertEquals(1, tracer.countClosedSpan("ydb.ExecuteQuery")); } @@ -153,47 +184,406 @@ public void executeQuerySpansAreRecordedInTableProxyTransaction() { rollbackStatus.expectSuccess(); } + awaitTracing(); Assert.assertEquals(1, tracer.countClosedSpan("ydb.CreateSession")); Assert.assertEquals(2, tracer.countClosedSpan("ydb.ExecuteQuery")); Assert.assertEquals(1, tracer.countClosedSpan("ydb.Commit")); Assert.assertEquals(1, tracer.countClosedSpan("ydb.Rollback")); } + @Test + public void querySpanIsChildOfApplicationSpan() { + try (QuerySession session = queryClient.createSession(Duration.ofSeconds(5)).join().getValue()) { + Span appParent = tracer.startSpan("app.parent", SpanKind.INTERNAL); + try (Scope ignored = appParent.makeCurrent()) { + session.createQuery("SELECT 1", TxMode.NONE).execute().join().getStatus().expectSuccess(); + } finally { + appParent.end(); + } + } + + awaitTracing(); + Assert.assertEquals(1, tracer.countClosedSpanWithParent("ydb.ExecuteQuery", "app.parent")); + } + + @Test + public void retrySpanIsParentOfRpcSpans() { + grpcInterceptor.failExecuteQuery(StatusCodesProtos.StatusIds.StatusCode.BAD_SESSION, 2); + SessionRetryContext retryContext = SessionRetryContext.create(queryClient) + .maxRetries(5) + .backoffSlot(Duration.ofMillis(1)) + .fastBackoffSlot(Duration.ofMillis(1)) + .build(); + + Span appParent = tracer.startSpan("app.parent.retry", SpanKind.INTERNAL); + try (Scope ignored = appParent.makeCurrent()) { + Status status = retryContext.supplyStatus(session -> + session.createQuery("SELECT 1", TxMode.NONE).execute().thenApply(Result::getStatus)).join(); + status.expectSuccess(); + } finally { + appParent.end(); + } + + awaitTracing(); + Assert.assertEquals(3, tracer.countClosedSpan("ydb.Try")); + Assert.assertEquals(1, tracer.countClosedSpanWithParent("ydb.RunWithRetry", "app.parent.retry")); + Assert.assertEquals(3, tracer.countClosedSpanWithParent("ydb.Try", "ydb.RunWithRetry")); + Assert.assertEquals(3, tracer.countClosedSpanWithParent("ydb.CreateSession", "ydb.Try")); + Assert.assertEquals(3, tracer.countClosedSpanWithParent("ydb.ExecuteQuery", "ydb.Try")); + // First Try has no backoff_ms (no prior sleep); subsequent tries capture the prior sleep duration + Assert.assertEquals(2, tracer.countClosedSpanWithAttribute("ydb.Try", "ydb.retry.backoff_ms")); + } + + @Test + public void retryContextRetriesOnCreateSessionFailures() { + grpcInterceptor.failCreateSession(StatusCodesProtos.StatusIds.StatusCode.ABORTED, 2); + SessionRetryContext retryContext = SessionRetryContext.create(queryClient) + .maxRetries(5) + .backoffSlot(Duration.ofMillis(1)) + .fastBackoffSlot(Duration.ofMillis(1)) + .build(); + + Span appParent = tracer.startSpan("app.parent.createSession.retry", SpanKind.INTERNAL); + try (Scope ignored = appParent.makeCurrent()) { + Status status = retryContext.supplyStatus(session -> + session.createQuery("SELECT 1", TxMode.NONE).execute().thenApply(Result::getStatus)).join(); + status.expectSuccess(); + } finally { + appParent.end(); + } + + awaitTracing(); + Assert.assertEquals(1, tracer.countClosedSpanWithParent("ydb.RunWithRetry", "app.parent.createSession.retry")); + Assert.assertEquals(3, tracer.countClosedSpan("ydb.Try")); + Assert.assertEquals(3, tracer.countClosedSpanWithParent("ydb.Try", "ydb.RunWithRetry")); + Assert.assertEquals(3, tracer.countClosedSpanWithParent("ydb.CreateSession", "ydb.Try")); + Assert.assertEquals(1, tracer.countClosedSpanWithParent("ydb.ExecuteQuery", "ydb.Try")); + Assert.assertEquals(2, tracer.countClosedSpanWithAttribute("ydb.Try", "ydb.retry.backoff_ms")); + } + + @Test + public void retryContextRetriesOnCommitFailures() { + grpcInterceptor.failCommit(StatusCodesProtos.StatusIds.StatusCode.BAD_SESSION, 2); + SessionRetryContext retryContext = SessionRetryContext.create(queryClient) + .maxRetries(5) + .backoffSlot(Duration.ofMillis(1)) + .fastBackoffSlot(Duration.ofMillis(1)) + .build(); + + Span appParent = tracer.startSpan("app.parent.commit.retry", SpanKind.INTERNAL); + try (Scope ignored = appParent.makeCurrent()) { + Status status = retryContext.supplyStatus(session -> { + QueryTransaction tx = session.createNewTransaction(TxMode.SERIALIZABLE_RW); + return tx.createQuery("SELECT 1").execute() + .thenCompose(queryResult -> { + queryResult.getStatus().expectSuccess(); + return tx.commit().thenApply(Result::getStatus); + }); + } + ).join(); + status.expectSuccess(); + } finally { + appParent.end(); + } + + awaitTracing(); + Assert.assertEquals(1, tracer.countClosedSpanWithParent("ydb.RunWithRetry", "app.parent.commit.retry")); + Assert.assertEquals(3, tracer.countClosedSpan("ydb.Try")); + Assert.assertEquals(3, tracer.countClosedSpanWithParent("ydb.Try", "ydb.RunWithRetry")); + Assert.assertEquals(3, tracer.countClosedSpanWithParent("ydb.CreateSession", "ydb.Try")); + Assert.assertEquals(3, tracer.countClosedSpanWithParent("ydb.ExecuteQuery", "ydb.Try")); + Assert.assertEquals(3, tracer.countClosedSpanWithParent("ydb.Commit", "ydb.Try")); + Assert.assertEquals(2, tracer.countClosedSpanWithAttribute("ydb.Try", "ydb.retry.backoff_ms")); + } + + @Test + public void tableProxyRetrySpanIsParentOfRpcSpans() { + AtomicInteger attempt = new AtomicInteger(); + tableClient = QueryClient.newTableClient(transport).build(); + tech.ydb.table.SessionRetryContext retryContext = tech.ydb.table.SessionRetryContext.create(tableClient) + .maxRetries(5) + .backoffSlot(Duration.ofMillis(1)) + .fastBackoffSlot(Duration.ofMillis(1)) + .build(); + + Span appParent = tracer.startSpan("app.parent.table.retry", SpanKind.INTERNAL); + try (Scope ignored = appParent.makeCurrent()) { + Status status = retryContext.supplyStatus(session -> { + if (attempt.getAndIncrement() == 0) { + return CompletableFuture.completedFuture(Status.of(StatusCode.OVERLOADED)); + } + TableTransaction tx = session.createNewTransaction(TxMode.SERIALIZABLE_RW); + Result queryResult = tx.executeDataQuery("SELECT 1", Params.empty()).join(); + queryResult.getStatus().expectSuccess(); + return tx.commit(); + }).join(); + status.expectSuccess(); + } finally { + appParent.end(); + } + + awaitTracing(); + Assert.assertEquals(2, tracer.countClosedSpan("ydb.Try")); + Assert.assertEquals(1, tracer.countClosedSpanWithParent("ydb.RunWithRetry", "app.parent.table.retry")); + Assert.assertEquals(2, tracer.countClosedSpanWithParent("ydb.Try", "ydb.RunWithRetry")); + Assert.assertEquals(1, tracer.countClosedSpanWithParent("ydb.CreateSession", "ydb.Try")); + Assert.assertEquals(1, tracer.countClosedSpanWithParent("ydb.ExecuteQuery", "ydb.Try")); + Assert.assertEquals(1, tracer.countClosedSpanWithParent("ydb.Commit", "ydb.Try")); + } + + @Test + public void nonRetryableExceptionClosesRetrySpan() { + SessionRetryContext retryContext = SessionRetryContext.create(queryClient) + .maxRetries(5) + .backoffSlot(Duration.ofMillis(1)) + .fastBackoffSlot(Duration.ofMillis(1)) + .build(); + + RuntimeException thrown; + try { + retryContext.supplyStatus(session -> { + throw new IllegalStateException("boom"); + }).join(); + throw new AssertionError("Exception expected"); + } catch (RuntimeException ex) { + thrown = ex; + } + + awaitTracing(); + Assert.assertNotNull(thrown.getCause()); + Assert.assertTrue(thrown.getCause() instanceof IllegalStateException); + Assert.assertEquals(1, + tracer.countClosedSpanWithErrorType("ydb.RunWithRetry", IllegalStateException.class.getName())); + Assert.assertEquals(1, tracer.countClosedSpanWithParent("ydb.Try", "ydb.RunWithRetry")); + Assert.assertEquals(1, tracer.countClosedSpan("ydb.Try")); + Assert.assertEquals(1, + tracer.countClosedSpanWithErrorType("ydb.Try", IllegalStateException.class.getName())); + Assert.assertEquals(1, tracer.countClosedSpanWithParent("ydb.CreateSession", "ydb.Try")); + Assert.assertEquals(0, tracer.countClosedSpanWithParent("ydb.ExecuteQuery", "ydb.Try")); + } + + @Test + public void retryableUnexpectedResultExceptionRetriesAndSetsErrorType() { + AtomicInteger attempt = new AtomicInteger(); + SessionRetryContext retryContext = SessionRetryContext.create(queryClient) + .maxRetries(5) + .backoffSlot(Duration.ofMillis(1)) + .fastBackoffSlot(Duration.ofMillis(1)) + .build(); + + Status status = retryContext.supplyStatus(session -> { + if (attempt.getAndIncrement() == 0) { + throw new UnexpectedResultException("retryable", Status.of(StatusCode.OVERLOADED)); + } + return session.createQuery("SELECT 1", TxMode.NONE).execute().thenApply(Result::getStatus); + }).join(); + status.expectSuccess(); + + awaitTracing(); + Assert.assertEquals(1, tracer.countClosedSpan("ydb.RunWithRetry")); + Assert.assertEquals(2, tracer.countClosedSpanWithParent("ydb.Try", "ydb.RunWithRetry")); + Assert.assertEquals(2, tracer.countClosedSpan("ydb.Try")); + Assert.assertEquals(1, + tracer.countClosedSpanWithErrorType("ydb.Try", UnexpectedResultException.class.getName())); + Assert.assertEquals(1, tracer.countClosedSpanWithParent("ydb.ExecuteQuery", "ydb.Try")); + } + + @Test + public void contextCancelClosesRunWithRetrySpan() throws InterruptedException { + SessionRetryContext retryContext = SessionRetryContext.create(queryClient) + .maxRetries(5) + .backoffSlot(Duration.ofMillis(1)) + .fastBackoffSlot(Duration.ofMillis(1)) + .build(); + + CountDownLatch firstPartReceived = new CountDownLatch(1); + CountDownLatch unblockReader = new CountDownLatch(1); + + CompletableFuture future = retryContext.supplyStatus(session -> + session.createQuery("SELECT 1; SELECT 2;", TxMode.NONE).execute(part -> { + firstPartReceived.countDown(); + try { + unblockReader.await(30, TimeUnit.SECONDS); + } catch (InterruptedException ex) { + Thread.currentThread().interrupt(); + } + }).thenApply(Result::getStatus)); + + Assert.assertTrue(firstPartReceived.await(30, TimeUnit.SECONDS)); + future.cancel(false); + unblockReader.countDown(); + + awaitTracing(); + Assert.assertThrows(CancellationException.class, future::join); + Assert.assertTrue(future.isCancelled()); + Assert.assertEquals(1, + tracer.countClosedSpanWithErrorType("ydb.RunWithRetry", CancellationException.class.getName())); + } + private static final class RecordingTracer implements Tracer { - private final List spans = Collections.synchronizedList(new ArrayList()); + private final List spans = Collections.synchronizedList(new ArrayList<>()); + private final ThreadLocal currentSpan = new ThreadLocal<>(); + + private final Object spanLock = new Object(); + private int openSpans = 0; @Override public Span startSpan(String spanName, SpanKind spanKind) { - RecordingSpan span = new RecordingSpan(spanName, spanKind); + RecordingSpan span = new RecordingSpan(this, spanName, spanKind, currentSpan.get()); spans.add(span); return span; } + /** Called from {@link RecordingSpan} constructor: one span opened. */ + void recordSpanOpen() { + synchronized (spanLock) { + openSpans++; + } + } + + /** Called from {@link RecordingSpan#end()}: one span closed. */ + void recordSpanClose() { + synchronized (spanLock) { + openSpans--; + if (openSpans == 0) { + spanLock.notifyAll(); + } + } + } + void reset() { - spans.clear(); + synchronized (spanLock) { + spans.clear(); + openSpans = 0; + } + } + + /** Waits until every span that called {@link #recordSpanOpen} has ended ({@code openSpans == 0}). */ + void awaitAllSpansClosed(Duration timeout) throws InterruptedException { + long deadlineNanos = System.nanoTime() + timeout.toNanos(); + synchronized (spanLock) { + while (openSpans > 0) { + long remainingNanos = deadlineNanos - System.nanoTime(); + if (remainingNanos <= 0) { + Assert.fail("await timeout, openSpans=" + openSpans); + } + long waitMillis = TimeUnit.NANOSECONDS.toMillis(remainingNanos); + if (waitMillis == 0) { + waitMillis = 1; + } + spanLock.wait(waitMillis); + } + } } int countClosedSpan(String spanName) { int count = 0; synchronized (spans) { for (RecordingSpan span : spans) { - if (span.name.equals(spanName) && span.closed) { + if (span.spanName().equals(spanName) && span.isClosed()) { + count++; + } + } + } + return count; + } + + int countClosedSpanWithParent(String spanName, String parentSpanName) { + int count = 0; + synchronized (spans) { + for (RecordingSpan span : spans) { + if (span.isClosed() + && span.spanName().equals(spanName) + && span.parentSpan() != null + && span.parentSpan().spanName().equals(parentSpanName)) { + count++; + } + } + } + return count; + } + + int countClosedSpanWithErrorType(String spanName, String errorType) { + int count = 0; + synchronized (spans) { + for (RecordingSpan span : spans) { + Throwable err = span.throwableError(); + if (span.isClosed() + && span.spanName().equals(spanName) + && err != null + && err.getClass().getName().equals(errorType)) { count++; } } } return count; } + + int countClosedSpanWithAttribute(String spanName, String key) { + int count = 0; + synchronized (spans) { + for (RecordingSpan span : spans) { + if (span.isClosed() + && span.spanName().equals(spanName) + && span.hasLongAttribute(key)) { + count++; + } + } + } + return count; + } + + Scope makeSpanCurrent(RecordingSpan span) { + RecordingSpan previous = currentSpan.get(); + currentSpan.set(span); + return () -> { + if (previous == null) { + currentSpan.remove(); + } else { + currentSpan.set(previous); + } + }; + } } private static final class RecordingSpan implements Span { + private final RecordingTracer tracer; private final String name; private final SpanKind kind; + private final RecordingSpan parent; + private final ConcurrentMap longAttributes = new ConcurrentHashMap<>(); + private Throwable throwableError; private volatile boolean closed = false; + private final AtomicBoolean endedOnce = new AtomicBoolean(false); - RecordingSpan(String name, SpanKind kind) { + RecordingSpan(RecordingTracer tracer, String name, SpanKind kind, RecordingSpan parent) { + this.tracer = tracer; this.name = name; this.kind = kind; + this.parent = parent; + tracer.recordSpanOpen(); + } + + String spanName() { + return name; + } + + boolean isClosed() { + return closed; + } + + @Nullable + RecordingSpan parentSpan() { + return parent; + } + + @Nullable + Throwable throwableError() { + return throwableError; + } + + boolean hasLongAttribute(String key) { + return longAttributes.containsKey(key); } @Override @@ -202,19 +592,37 @@ public String getId() { } @Override - public void setAttribute(String key, String value) { - // not needed for this test + public boolean isValid() { + return true; } @Override - public void setAttribute(String key, long value) { - // not needed for this test + public Scope makeCurrent() { + return tracer.makeSpanCurrent(this); } + @Override + public Scope restoreContext() { + return parent != null ? parent.makeCurrent() : Span.NOOP.makeCurrent(); + } + + @Override + public void setStatus(@Nullable Status status, @Nullable Throwable error) { + this.throwableError = error; + } + + @Override + public void setAttribute(String key, long value) { + longAttributes.put(key, value); + } @Override public void end() { + if (!endedOnce.compareAndSet(false, true)) { + return; + } this.closed = true; + tracer.recordSpanClose(); } } } diff --git a/opentelemetry/src/test/java/tech/ydb/opentelemetry/OpenTelemetryMetricsIntegrationTest.java b/query/src/test/java/tech/ydb/query/opentelemetry/OpenTelemetryQueryMetricsIntegrationTest.java similarity index 62% rename from opentelemetry/src/test/java/tech/ydb/opentelemetry/OpenTelemetryMetricsIntegrationTest.java rename to query/src/test/java/tech/ydb/query/opentelemetry/OpenTelemetryQueryMetricsIntegrationTest.java index dc4d5c736..1111488a4 100644 --- a/opentelemetry/src/test/java/tech/ydb/opentelemetry/OpenTelemetryMetricsIntegrationTest.java +++ b/query/src/test/java/tech/ydb/query/opentelemetry/OpenTelemetryQueryMetricsIntegrationTest.java @@ -1,8 +1,9 @@ -package tech.ydb.opentelemetry; +package tech.ydb.query.opentelemetry; import java.io.IOException; import java.time.Duration; import java.util.Collection; +import java.util.concurrent.CompletableFuture; import io.opentelemetry.api.OpenTelemetry; import io.opentelemetry.api.common.AttributeKey; @@ -22,13 +23,15 @@ import tech.ydb.auth.TokenAuthProvider; import tech.ydb.common.transaction.TxMode; +import tech.ydb.core.Result; import tech.ydb.core.grpc.GrpcTransport; +import tech.ydb.core.metrics.OpenTelemetryMeter; import tech.ydb.query.QueryClient; import tech.ydb.query.QuerySession; import tech.ydb.query.QueryTransaction; import tech.ydb.test.junit4.YdbHelperRule; -public class OpenTelemetryMetricsIntegrationTest { +public class OpenTelemetryQueryMetricsIntegrationTest { @ClassRule public static final YdbHelperRule YDB = new YdbHelperRule(); @@ -36,7 +39,9 @@ public class OpenTelemetryMetricsIntegrationTest { private static final AttributeKey DB_NAMESPACE = AttributeKey.stringKey("db.namespace"); private static final AttributeKey SERVER_ADDRESS = AttributeKey.stringKey("server.address"); private static final AttributeKey SERVER_PORT = AttributeKey.longKey("server.port"); - private static final AttributeKey OPERATION_NAME = AttributeKey.stringKey("ydb.operation.name"); + private static final AttributeKey OPERATION_NAME = AttributeKey.stringKey("operation.name"); + private static final AttributeKey POOL_NAME = AttributeKey.stringKey("ydb.query.session.pool.name"); + private static final AttributeKey SESSION_STATE = AttributeKey.stringKey("ydb.query.session.state"); private static InMemoryMetricReader metricReader; private static SdkMeterProvider meterProvider; @@ -87,11 +92,12 @@ public void executeQueryRecordsOperationDuration() { session.createQuery("SELECT 1", TxMode.NONE).execute().join().getStatus().expectSuccess(); } - MetricData metric = findMetric("db.client.operation.duration"); - Assert.assertNotNull("db.client.operation.duration metric not found", metric); + MetricData metric = findMetric("ydb.client.operation.duration"); + Assert.assertNotNull("ydb.client.operation.duration metric not found", metric); + Assert.assertEquals("s", metric.getUnit()); - HistogramPointData point = findHistogramPoint(metric, "ydb.ExecuteQuery"); - Assert.assertNotNull("No histogram point for ydb.ExecuteQuery", point); + HistogramPointData point = findHistogramPoint(metric, "ExecuteQuery"); + Assert.assertNotNull("No histogram point for ExecuteQuery", point); Assert.assertTrue("Duration must be > 0", point.getSum() > 0); Assert.assertEquals("ydb", point.getAttributes().get(DB_SYSTEM_NAME)); Assert.assertEquals(YDB.database(), point.getAttributes().get(DB_NAMESPACE)); @@ -113,13 +119,13 @@ public void commitAndRollbackRecordOperationDuration() { txRollback.rollback().join().expectSuccess(); } - MetricData metric = findMetric("db.client.operation.duration"); + MetricData metric = findMetric("ydb.client.operation.duration"); Assert.assertNotNull(metric); - Assert.assertNotNull("No histogram point for ydb.Commit", - findHistogramPoint(metric, "ydb.Commit")); - Assert.assertNotNull("No histogram point for ydb.Rollback", - findHistogramPoint(metric, "ydb.Rollback")); + Assert.assertNotNull("No histogram point for Commit", + findHistogramPoint(metric, "Commit")); + Assert.assertNotNull("No histogram point for Rollback", + findHistogramPoint(metric, "Rollback")); } @Test @@ -131,6 +137,7 @@ public void failedOperationRecordsFailedCounter() { MetricData metric = findMetric("ydb.client.operation.failed"); Assert.assertNotNull("ydb.client.operation.failed metric not found", metric); + Assert.assertEquals("{operation}", metric.getUnit()); Collection points = metric.getLongSumData().getPoints(); Assert.assertFalse("Failed counter must have at least one point", points.isEmpty()); @@ -140,16 +147,63 @@ public void failedOperationRecordsFailedCounter() { @Test public void sessionPoolMetricsAreReported() { - // создаём сессию чтобы пул оживился try (QuerySession session = queryClient.createSession(Duration.ofSeconds(5)).join().getValue()) { session.createQuery("SELECT 1", TxMode.NONE).execute().join().getStatus().expectSuccess(); } - MetricData metric = findMetric("ydb.query.session.count"); - Assert.assertNotNull("ydb.query.session.count metric not found", metric); + MetricData count = findMetric("ydb.query.session.count"); + Assert.assertNotNull("ydb.query.session.count metric not found", count); + Assert.assertEquals("{session}", count.getUnit()); + Assert.assertTrue("session.count must have idle/used buckets", + count.getLongGaugeData().getPoints().stream() + .map(p -> p.getAttributes().get(SESSION_STATE)) + .anyMatch("idle"::equals)); + + MetricData min = findMetric("ydb.query.session.min"); + Assert.assertNotNull("ydb.query.session.min metric not found", min); + Assert.assertEquals("{session}", min.getUnit()); + Assert.assertTrue("min must have a pool.name attribute", + min.getLongGaugeData().getPoints().stream() + .anyMatch(p -> p.getAttributes().get(POOL_NAME) != null)); + + MetricData max = findMetric("ydb.query.session.max"); + Assert.assertNotNull("ydb.query.session.max metric not found", max); + Assert.assertEquals("{session}", max.getUnit()); + + MetricData createTime = findMetric("ydb.query.session.create_time"); + Assert.assertNotNull("ydb.query.session.create_time metric not found", createTime); + Assert.assertEquals("s", createTime.getUnit()); + Assert.assertFalse("session.create_time must have at least one point", + createTime.getHistogramData().getPoints().isEmpty()); } - // --- helpers --- + @Test + public void sessionPendingAndTimeoutsMetricsAreCounters() { + try (QueryClient tinyClient = QueryClient.newClient(transport) + .sessionPoolMaxSize(1) + .sessionPoolName("tiny") + .build()) { + try (QuerySession s1 = tinyClient.createSession(Duration.ofSeconds(5)).join().getValue()) { + CompletableFuture> waiter = + tinyClient.createSession(Duration.ofMillis(100)); + waiter.join(); + } + } + + MetricData pending = findMetric("ydb.query.session.pending_requests"); + Assert.assertNotNull("ydb.query.session.pending_requests metric not found", pending); + Assert.assertEquals("{request}", pending.getUnit()); + long pendingTotal = pending.getLongSumData().getPoints().stream() + .mapToLong(LongPointData::getValue).sum(); + Assert.assertTrue("pending_requests must be > 0", pendingTotal > 0); + + MetricData timeouts = findMetric("ydb.query.session.timeouts"); + Assert.assertNotNull("ydb.query.session.timeouts metric not found", timeouts); + Assert.assertEquals("{timeout}", timeouts.getUnit()); + long timeoutTotal = timeouts.getLongSumData().getPoints().stream() + .mapToLong(LongPointData::getValue).sum(); + Assert.assertTrue("timeouts must be > 0", timeoutTotal > 0); + } private MetricData findMetric(String name) { Collection metrics = metricReader.collectAllMetrics(); @@ -172,7 +226,6 @@ private HistogramPointData findHistogramPoint(MetricData metric, String operatio } private static String extractHost(String endpoint) { - // endpoint вида grpc://host:port или host:port String stripped = endpoint.replaceFirst("grpcs?://", ""); int colon = stripped.lastIndexOf(':'); return colon >= 0 ? stripped.substring(0, colon) : stripped; diff --git a/table/src/main/java/tech/ydb/table/impl/SimpleTableClient.java b/table/src/main/java/tech/ydb/table/impl/SimpleTableClient.java index 199d3ae73..596466acb 100644 --- a/table/src/main/java/tech/ydb/table/impl/SimpleTableClient.java +++ b/table/src/main/java/tech/ydb/table/impl/SimpleTableClient.java @@ -6,6 +6,7 @@ import tech.ydb.core.Result; import tech.ydb.core.StatusCode; +import tech.ydb.core.tracing.Tracer; import tech.ydb.table.Session; import tech.ydb.table.SessionSupplier; import tech.ydb.table.rpc.TableRpc; @@ -38,6 +39,11 @@ public ScheduledExecutorService getScheduler() { return tableRpc.getScheduler(); } + @Override + public Tracer getTracer() { + return tableRpc.getTracer(); + } + public static Builder newClient(TableRpc rpc) { return new Builder(rpc); } From d8eae34bfb6489e75a2e931cfaac1034ce59a794 Mon Sep 17 00:00:00 2001 From: KirillKurdyukov Date: Thu, 7 May 2026 13:47:00 +0300 Subject: [PATCH 4/8] refactor: align metric attributes with the C# SDK reference table Operation metrics now carry only `database`, `endpoint`, `operation.name` (+ `status_code` for failed). Session metrics carry only `ydb.query.session.pool.name` (+ `ydb.query.session.state` for count). OpenTelemetryMeter.fromOpenTelemetry takes (database, endpoint) instead of (database, host, port). Drops `db.system.name` / `db.namespace` / `server.address` / `server.port` / `db.response.status_code` keys, replaces them with `database` / `endpoint` / `status_code`. Descriptions match the C# reference table verbatim. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../ydb/core/metrics/OpenTelemetryMeter.java | 69 ++++++++----------- ...nTelemetryQueryMetricsIntegrationTest.java | 22 +++--- 2 files changed, 40 insertions(+), 51 deletions(-) diff --git a/core/src/main/java/tech/ydb/core/metrics/OpenTelemetryMeter.java b/core/src/main/java/tech/ydb/core/metrics/OpenTelemetryMeter.java index 916aeaf98..d047c7632 100644 --- a/core/src/main/java/tech/ydb/core/metrics/OpenTelemetryMeter.java +++ b/core/src/main/java/tech/ydb/core/metrics/OpenTelemetryMeter.java @@ -16,13 +16,15 @@ public final class OpenTelemetryMeter implements Meter { private static final String DEFAULT_SCOPE = "tech.ydb.sdk"; + private static final AttributeKey DATABASE = AttributeKey.stringKey("database"); + private static final AttributeKey ENDPOINT = AttributeKey.stringKey("endpoint"); private static final AttributeKey OPERATION_NAME = AttributeKey.stringKey("operation.name"); - private static final AttributeKey STATUS_CODE = AttributeKey.stringKey("db.response.status_code"); + private static final AttributeKey STATUS_CODE = AttributeKey.stringKey("status_code"); private static final AttributeKey POOL_NAME = AttributeKey.stringKey("ydb.query.session.pool.name"); private static final AttributeKey SESSION_STATE = AttributeKey.stringKey("ydb.query.session.state"); private final io.opentelemetry.api.metrics.Meter meter; - private final Attributes baseAttributes; + private final Attributes operationBaseAttributes; private final DoubleHistogram operationDuration; private final LongCounter operationFailed; @@ -30,51 +32,48 @@ public final class OpenTelemetryMeter implements Meter { private final LongCounter sessionPendingRequests; private final LongCounter sessionTimeouts; - private OpenTelemetryMeter(io.opentelemetry.api.metrics.Meter meter, - String database, String host, int port) { + private OpenTelemetryMeter(io.opentelemetry.api.metrics.Meter meter, String database, String endpoint) { this.meter = Objects.requireNonNull(meter, "meter is null"); - this.baseAttributes = Attributes.of( - AttributeKey.stringKey("db.system.name"), "ydb", - AttributeKey.stringKey("db.namespace"), database, - AttributeKey.stringKey("server.address"), host, - AttributeKey.longKey("server.port"), (long) port + this.operationBaseAttributes = Attributes.of( + DATABASE, database, + ENDPOINT, endpoint ); this.operationDuration = meter.histogramBuilder("ydb.client.operation.duration") .setUnit("s") - .setDescription("Duration of a single client operation attempt (ExecuteQuery, Commit, Rollback).") + .setDescription("Latency of each actual ExecuteQuery / Commit / Rollback attempt.") .build(); this.operationFailed = meter.counterBuilder("ydb.client.operation.failed") .setUnit("{operation}") - .setDescription("Number of failed client operation attempts.") + .setDescription("Unsuccessful operation attempts.") .build(); this.sessionCreateTime = meter.histogramBuilder("ydb.query.session.create_time") .setUnit("s") - .setDescription("Time spent creating a new session.") + .setDescription("Session creation cost (CreateSession + first AttachStream message).") .build(); this.sessionPendingRequests = meter.counterBuilder("ydb.query.session.pending_requests") .setUnit("{request}") - .setDescription("Number of session-acquire requests that had to wait for a free session.") + .setDescription("Increments when a caller starts waiting; use rate for wait pressure.") .build(); this.sessionTimeouts = meter.counterBuilder("ydb.query.session.timeouts") .setUnit("{timeout}") - .setDescription("Number of session-acquire timeouts.") + .setDescription("Session acquisition timeouts.") .build(); } public static OpenTelemetryMeter fromOpenTelemetry(OpenTelemetry openTelemetry, - String database, String host, int port) { + String database, String endpoint) { Objects.requireNonNull(openTelemetry, "openTelemetry is null"); - return new OpenTelemetryMeter(openTelemetry.getMeter(DEFAULT_SCOPE), database, host, port); + return new OpenTelemetryMeter(openTelemetry.getMeter(DEFAULT_SCOPE), database, endpoint); } - public static OpenTelemetryMeter createGlobal(String database, String host, int port) { - return fromOpenTelemetry(GlobalOpenTelemetry.get(), database, host, port); + public static OpenTelemetryMeter createGlobal(String database, String endpoint) { + return fromOpenTelemetry(GlobalOpenTelemetry.get(), database, endpoint); } @Override @@ -87,7 +86,7 @@ public void recordOperationFailed(String operationName, Status status) { if (status == null || status.isSuccess()) { return; } - Attributes attrs = baseAttributes.toBuilder() + Attributes attrs = operationBaseAttributes.toBuilder() .put(OPERATION_NAME, operationName) .put(STATUS_CODE, status.getCode().toString()) .build(); @@ -96,22 +95,14 @@ public void recordOperationFailed(String operationName, Status status) { @Override public void registerSessionPool(String poolName, SessionPoolObserver observer) { - Attributes idle = baseAttributes.toBuilder() - .put(POOL_NAME, poolName) - .put(SESSION_STATE, "idle") - .build(); - Attributes used = baseAttributes.toBuilder() - .put(POOL_NAME, poolName) - .put(SESSION_STATE, "used") - .build(); - Attributes pool = baseAttributes.toBuilder() - .put(POOL_NAME, poolName) - .build(); + Attributes idle = Attributes.of(POOL_NAME, poolName, SESSION_STATE, "idle"); + Attributes used = Attributes.of(POOL_NAME, poolName, SESSION_STATE, "used"); + Attributes pool = Attributes.of(POOL_NAME, poolName); meter.gaugeBuilder("ydb.query.session.count") .ofLongs() .setUnit("{session}") - .setDescription("Current number of sessions in the pool by state.") + .setDescription("Current pool session counts.") .buildWithCallback(measurement -> { measurement.record(observer.getIdleCount(), idle); measurement.record(observer.getUsedCount(), used); @@ -120,37 +111,37 @@ public void registerSessionPool(String poolName, SessionPoolObserver observer) { meter.gaugeBuilder("ydb.query.session.min") .ofLongs() .setUnit("{session}") - .setDescription("Configured minimum size of the session pool.") + .setDescription("Configured MinPoolSize.") .buildWithCallback(measurement -> measurement.record(observer.getMinSize(), pool)); meter.gaugeBuilder("ydb.query.session.max") .ofLongs() .setUnit("{session}") - .setDescription("Configured maximum size of the session pool.") + .setDescription("Configured MaxPoolSize.") .buildWithCallback(measurement -> measurement.record(observer.getMaxSize(), pool)); } @Override public void recordSessionCreateTime(String poolName, long durationNanos) { - sessionCreateTime.record(toSeconds(durationNanos), withPool(poolName)); + sessionCreateTime.record(toSeconds(durationNanos), poolAttributes(poolName)); } @Override public void incrementSessionPendingRequests(String poolName) { - sessionPendingRequests.add(1L, withPool(poolName)); + sessionPendingRequests.add(1L, poolAttributes(poolName)); } @Override public void incrementSessionTimeouts(String poolName) { - sessionTimeouts.add(1L, withPool(poolName)); + sessionTimeouts.add(1L, poolAttributes(poolName)); } private Attributes withOperation(String operationName) { - return baseAttributes.toBuilder().put(OPERATION_NAME, operationName).build(); + return operationBaseAttributes.toBuilder().put(OPERATION_NAME, operationName).build(); } - private Attributes withPool(String poolName) { - return baseAttributes.toBuilder().put(POOL_NAME, poolName).build(); + private static Attributes poolAttributes(String poolName) { + return Attributes.of(POOL_NAME, poolName); } private static double toSeconds(long durationNanos) { diff --git a/query/src/test/java/tech/ydb/query/opentelemetry/OpenTelemetryQueryMetricsIntegrationTest.java b/query/src/test/java/tech/ydb/query/opentelemetry/OpenTelemetryQueryMetricsIntegrationTest.java index 1111488a4..f75c366a1 100644 --- a/query/src/test/java/tech/ydb/query/opentelemetry/OpenTelemetryQueryMetricsIntegrationTest.java +++ b/query/src/test/java/tech/ydb/query/opentelemetry/OpenTelemetryQueryMetricsIntegrationTest.java @@ -35,11 +35,10 @@ public class OpenTelemetryQueryMetricsIntegrationTest { @ClassRule public static final YdbHelperRule YDB = new YdbHelperRule(); - private static final AttributeKey DB_SYSTEM_NAME = AttributeKey.stringKey("db.system.name"); - private static final AttributeKey DB_NAMESPACE = AttributeKey.stringKey("db.namespace"); - private static final AttributeKey SERVER_ADDRESS = AttributeKey.stringKey("server.address"); - private static final AttributeKey SERVER_PORT = AttributeKey.longKey("server.port"); + private static final AttributeKey DATABASE = AttributeKey.stringKey("database"); + private static final AttributeKey ENDPOINT = AttributeKey.stringKey("endpoint"); private static final AttributeKey OPERATION_NAME = AttributeKey.stringKey("operation.name"); + private static final AttributeKey STATUS_CODE = AttributeKey.stringKey("status_code"); private static final AttributeKey POOL_NAME = AttributeKey.stringKey("ydb.query.session.pool.name"); private static final AttributeKey SESSION_STATE = AttributeKey.stringKey("ydb.query.session.state"); @@ -60,12 +59,9 @@ public static void initTransport() { .setMeterProvider(meterProvider) .build(); - String host = extractHost(YDB.endpoint()); - int port = extractPort(YDB.endpoint()); - transport = GrpcTransport.forEndpoint(YDB.endpoint(), YDB.database()) .withAuthProvider(new TokenAuthProvider(YDB.authToken())) - .withMeter(OpenTelemetryMeter.fromOpenTelemetry(openTelemetry, YDB.database(), host, port)) + .withMeter(OpenTelemetryMeter.fromOpenTelemetry(openTelemetry, YDB.database(), YDB.endpoint())) .build(); } @@ -99,10 +95,8 @@ public void executeQueryRecordsOperationDuration() { HistogramPointData point = findHistogramPoint(metric, "ExecuteQuery"); Assert.assertNotNull("No histogram point for ExecuteQuery", point); Assert.assertTrue("Duration must be > 0", point.getSum() > 0); - Assert.assertEquals("ydb", point.getAttributes().get(DB_SYSTEM_NAME)); - Assert.assertEquals(YDB.database(), point.getAttributes().get(DB_NAMESPACE)); - Assert.assertNotNull(point.getAttributes().get(SERVER_ADDRESS)); - Assert.assertNotNull(point.getAttributes().get(SERVER_PORT)); + Assert.assertEquals(YDB.database(), point.getAttributes().get(DATABASE)); + Assert.assertEquals(YDB.endpoint(), point.getAttributes().get(ENDPOINT)); } @Test @@ -143,6 +137,10 @@ public void failedOperationRecordsFailedCounter() { Assert.assertFalse("Failed counter must have at least one point", points.isEmpty()); long total = points.stream().mapToLong(LongPointData::getValue).sum(); Assert.assertTrue("Failed counter must be > 0", total > 0); + Assert.assertTrue("Failed counter must carry status_code attribute", + points.stream().anyMatch(p -> p.getAttributes().get(STATUS_CODE) != null)); + Assert.assertTrue("Failed counter must carry database attribute", + points.stream().anyMatch(p -> YDB.database().equals(p.getAttributes().get(DATABASE)))); } @Test From 63be027641d7394a805fb4cb6734c4fd7deda5d2 Mon Sep 17 00:00:00 2001 From: KirillKurdyukov Date: Thu, 7 May 2026 13:59:50 +0300 Subject: [PATCH 5/8] refactor: restore descriptive metric descriptions Roll back the C#-style short descriptions to the original verbose English ones. The C# table was a reference for attribute layout, not for the description strings themselves. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../ydb/core/metrics/OpenTelemetryMeter.java | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/core/src/main/java/tech/ydb/core/metrics/OpenTelemetryMeter.java b/core/src/main/java/tech/ydb/core/metrics/OpenTelemetryMeter.java index d047c7632..67bb7017c 100644 --- a/core/src/main/java/tech/ydb/core/metrics/OpenTelemetryMeter.java +++ b/core/src/main/java/tech/ydb/core/metrics/OpenTelemetryMeter.java @@ -42,27 +42,27 @@ private OpenTelemetryMeter(io.opentelemetry.api.metrics.Meter meter, String data this.operationDuration = meter.histogramBuilder("ydb.client.operation.duration") .setUnit("s") - .setDescription("Latency of each actual ExecuteQuery / Commit / Rollback attempt.") + .setDescription("Duration of a single client operation attempt (ExecuteQuery, Commit, Rollback).") .build(); this.operationFailed = meter.counterBuilder("ydb.client.operation.failed") .setUnit("{operation}") - .setDescription("Unsuccessful operation attempts.") + .setDescription("Number of failed client operation attempts.") .build(); this.sessionCreateTime = meter.histogramBuilder("ydb.query.session.create_time") .setUnit("s") - .setDescription("Session creation cost (CreateSession + first AttachStream message).") + .setDescription("Time spent creating a new session.") .build(); this.sessionPendingRequests = meter.counterBuilder("ydb.query.session.pending_requests") .setUnit("{request}") - .setDescription("Increments when a caller starts waiting; use rate for wait pressure.") + .setDescription("Number of session-acquire requests that had to wait for a free session.") .build(); this.sessionTimeouts = meter.counterBuilder("ydb.query.session.timeouts") .setUnit("{timeout}") - .setDescription("Session acquisition timeouts.") + .setDescription("Number of session-acquire timeouts.") .build(); } @@ -102,7 +102,7 @@ public void registerSessionPool(String poolName, SessionPoolObserver observer) { meter.gaugeBuilder("ydb.query.session.count") .ofLongs() .setUnit("{session}") - .setDescription("Current pool session counts.") + .setDescription("Current number of sessions in the pool by state.") .buildWithCallback(measurement -> { measurement.record(observer.getIdleCount(), idle); measurement.record(observer.getUsedCount(), used); @@ -111,13 +111,13 @@ public void registerSessionPool(String poolName, SessionPoolObserver observer) { meter.gaugeBuilder("ydb.query.session.min") .ofLongs() .setUnit("{session}") - .setDescription("Configured MinPoolSize.") + .setDescription("Configured minimum size of the session pool.") .buildWithCallback(measurement -> measurement.record(observer.getMinSize(), pool)); meter.gaugeBuilder("ydb.query.session.max") .ofLongs() .setUnit("{session}") - .setDescription("Configured MaxPoolSize.") + .setDescription("Configured maximum size of the session pool.") .buildWithCallback(measurement -> measurement.record(observer.getMaxSize(), pool)); } From 189daf7bfddc357bd3201a40016168e80e2db51f Mon Sep 17 00:00:00 2001 From: KirillKurdyukov Date: Thu, 7 May 2026 17:19:54 +0300 Subject: [PATCH 6/8] refactor: fold commit/rollback metric recording into existing whenComplete MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Drop the recordCommitMetrics / recordRollbackMetrics helpers — they wrapped the future in an extra .whenComplete just to record duration/failed. Master already terminates both chains with a .whenComplete handling currentStatus, so the metric calls fit there directly. For early-return paths (transactionId == null) the future is already completed, so duration is recorded synchronously without a wrapper. SessionPool.Handler#create keeps its dedicated .whenComplete: master has no existing one for the createSession stage, and reusing Span.endOnResult's internal one would mean duplicating its span-finalization logic. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../java/tech/ydb/query/impl/SessionImpl.java | 105 +++++++----------- 1 file changed, 43 insertions(+), 62 deletions(-) diff --git a/query/src/main/java/tech/ydb/query/impl/SessionImpl.java b/query/src/main/java/tech/ydb/query/impl/SessionImpl.java index 3b62210ea..0fb792bf3 100644 --- a/query/src/main/java/tech/ydb/query/impl/SessionImpl.java +++ b/query/src/main/java/tech/ydb/query/impl/SessionImpl.java @@ -535,9 +535,8 @@ public CompletableFuture> commit(CommitTransactionSettings set if (transactionId == null) { Issue issue = Issue.of("Transaction is not started", Issue.Severity.WARNING); Result res = Result.success(new QueryInfo(null), Status.of(StatusCode.SUCCESS, issue)); - return recordCommitMetrics( - Span.endOnResult(span, CompletableFuture.completedFuture(res)), - startNanos); + rpc.getMeter().recordOperationDuration("Commit", System.nanoTime() - startNanos); + return Span.endOnResult(span, CompletableFuture.completedFuture(res)); } YdbQuery.CommitTransactionRequest request = YdbQuery.CommitTransactionRequest.newBuilder() @@ -545,36 +544,27 @@ public CompletableFuture> commit(CommitTransactionSettings set .setTxId(transactionId) .build(); - return recordCommitMetrics( - Span.endOnResult(span, rpc.commitTransaction(request, makeOptions(settings, span).build())) - .thenApply(res -> { - Status status = res.getStatus(); - currentStatusFuture.complete(status); - updateSessionState(status); - if (!txId.compareAndSet(transactionId, null)) { - logger.warn("{} lost commit response for transaction {}", - SessionImpl.this, transactionId); - } - // TODO: CommitTransactionResponse must contain exec_stats - return res.map(resp -> new QueryInfo(null)); - }).whenComplete(((status, th) -> { - if (th != null) { - currentStatusFuture.completeExceptionally( - new RuntimeException("Transaction commit failed with exception", th)); - } - })), - startNanos); - } - - private CompletableFuture> recordCommitMetrics( - CompletableFuture> future, long startNanos) { - return future.whenComplete((res, th) -> { - long elapsed = System.nanoTime() - startNanos; - rpc.getMeter().recordOperationDuration("Commit", elapsed); - if (res != null && !res.getStatus().isSuccess()) { - rpc.getMeter().recordOperationFailed("Commit", res.getStatus()); - } - }); + return Span.endOnResult(span, rpc.commitTransaction(request, makeOptions(settings, span).build())) + .thenApply(res -> { + Status status = res.getStatus(); + currentStatusFuture.complete(status); + updateSessionState(status); + if (!txId.compareAndSet(transactionId, null)) { + logger.warn("{} lost commit response for transaction {}", + SessionImpl.this, transactionId); + } + // TODO: CommitTransactionResponse must contain exec_stats + return res.map(resp -> new QueryInfo(null)); + }).whenComplete(((res, th) -> { + rpc.getMeter().recordOperationDuration("Commit", System.nanoTime() - startNanos); + if (res != null && !res.getStatus().isSuccess()) { + rpc.getMeter().recordOperationFailed("Commit", res.getStatus()); + } + if (th != null) { + currentStatusFuture.completeExceptionally( + new RuntimeException("Transaction commit failed with exception", th)); + } + })); } @Override @@ -587,41 +577,32 @@ public CompletableFuture rollback(RollbackTransactionSettings settings) if (transactionId == null) { Issue issue = Issue.of("Transaction is not started", Issue.Severity.WARNING); Status status = Status.of(StatusCode.SUCCESS, issue); - return recordRollbackMetrics( - Span.endOnStatus(span, CompletableFuture.completedFuture(status)), - startNanos); + rpc.getMeter().recordOperationDuration("Rollback", System.nanoTime() - startNanos); + return Span.endOnStatus(span, CompletableFuture.completedFuture(status)); } YdbQuery.RollbackTransactionRequest request = YdbQuery.RollbackTransactionRequest.newBuilder() .setSessionId(sessionId) .setTxId(transactionId) .build(); - return recordRollbackMetrics( - Span.endOnResult(span, rpc.rollbackTransaction(request, makeOptions(settings, span).build())) - .thenApply(result -> { - updateSessionState(result.getStatus()); - if (!txId.compareAndSet(transactionId, null)) { - logger.warn("{} lost rollback response for transaction {}", SessionImpl.this, - transactionId); - } - return result.getStatus(); - }) - .whenComplete((status, th) -> { - currentStatusFuture.complete(Status - .of(StatusCode.ABORTED) - .withIssues(Issue.of("Transaction was rolled back", Issue.Severity.ERROR))); - }), - startNanos); - } - - private CompletableFuture recordRollbackMetrics(CompletableFuture future, long startNanos) { - return future.whenComplete((status, th) -> { - long elapsed = System.nanoTime() - startNanos; - rpc.getMeter().recordOperationDuration("Rollback", elapsed); - if (status != null && !status.isSuccess()) { - rpc.getMeter().recordOperationFailed("Rollback", status); - } - }); + return Span.endOnResult(span, rpc.rollbackTransaction(request, makeOptions(settings, span).build())) + .thenApply(result -> { + updateSessionState(result.getStatus()); + if (!txId.compareAndSet(transactionId, null)) { + logger.warn("{} lost rollback response for transaction {}", SessionImpl.this, + transactionId); + } + return result.getStatus(); + }) + .whenComplete((status, th) -> { + currentStatusFuture.complete(Status + .of(StatusCode.ABORTED) + .withIssues(Issue.of("Transaction was rolled back", Issue.Severity.ERROR))); + rpc.getMeter().recordOperationDuration("Rollback", System.nanoTime() - startNanos); + if (status != null && !status.isSuccess()) { + rpc.getMeter().recordOperationFailed("Rollback", status); + } + }); } } } From df6e0f7d6fcdc125636febea831575aa5f56daaf Mon Sep 17 00:00:00 2001 From: KirillKurdyukov Date: Thu, 7 May 2026 17:53:13 +0300 Subject: [PATCH 7/8] refactor: move Meter wiring from GrpcTransport to QueryClient.Builder --- .../main/java/tech/ydb/core/grpc/GrpcTransport.java | 6 ------ .../tech/ydb/core/grpc/GrpcTransportBuilder.java | 12 ------------ .../java/tech/ydb/core/impl/YdbTransportImpl.java | 8 -------- query/src/main/java/tech/ydb/query/QueryClient.java | 3 +++ .../java/tech/ydb/query/impl/QueryClientImpl.java | 12 +++++++++++- .../java/tech/ydb/query/impl/QueryServiceRpc.java | 7 ++++++- .../java/tech/ydb/query/impl/TableClientImpl.java | 6 ++++++ .../OpenTelemetryQueryMetricsIntegrationTest.java | 7 +++++-- table/src/main/java/tech/ydb/table/TableClient.java | 4 ++++ 9 files changed, 35 insertions(+), 30 deletions(-) diff --git a/core/src/main/java/tech/ydb/core/grpc/GrpcTransport.java b/core/src/main/java/tech/ydb/core/grpc/GrpcTransport.java index 328786874..96816601f 100644 --- a/core/src/main/java/tech/ydb/core/grpc/GrpcTransport.java +++ b/core/src/main/java/tech/ydb/core/grpc/GrpcTransport.java @@ -12,8 +12,6 @@ import io.grpc.MethodDescriptor; import tech.ydb.core.Result; -import tech.ydb.core.metrics.Meter; -import tech.ydb.core.metrics.NoopMeter; import tech.ydb.core.tracing.NoopTracer; import tech.ydb.core.tracing.Tracer; import tech.ydb.core.utils.URITools; @@ -48,10 +46,6 @@ default Tracer getTracer() { return NoopTracer.getInstance(); } - default Meter getMeter() { - return NoopMeter.INSTANCE; - } - @Override void close(); diff --git a/core/src/main/java/tech/ydb/core/grpc/GrpcTransportBuilder.java b/core/src/main/java/tech/ydb/core/grpc/GrpcTransportBuilder.java index a77cab841..534cf08be 100644 --- a/core/src/main/java/tech/ydb/core/grpc/GrpcTransportBuilder.java +++ b/core/src/main/java/tech/ydb/core/grpc/GrpcTransportBuilder.java @@ -26,8 +26,6 @@ import tech.ydb.core.impl.auth.GrpcAuthRpc; import tech.ydb.core.impl.pool.ChannelFactoryLoader; import tech.ydb.core.impl.pool.ManagedChannelFactory; -import tech.ydb.core.metrics.Meter; -import tech.ydb.core.metrics.NoopMeter; import tech.ydb.core.tracing.NoopTracer; import tech.ydb.core.tracing.Tracer; import tech.ydb.core.utils.Version; @@ -89,7 +87,6 @@ public enum InitMode { private GrpcCompression compression = GrpcCompression.NO_COMPRESSION; private InitMode initMode = InitMode.SYNC; private Tracer tracer = NoopTracer.getInstance(); - private Meter meter = NoopMeter.getInstance(); /** * can cause leaks https://github.com/grpc/grpc-java/issues/9340 @@ -198,10 +195,6 @@ public Tracer getTracer() { return tracer; } - public Meter getMeter() { - return meter; - } - public ManagedChannelFactory getManagedChannelFactory() { if (channelFactoryBuilder == null) { channelFactoryBuilder = ChannelFactoryLoader.load(); @@ -430,11 +423,6 @@ public GrpcTransportBuilder withTracer(Tracer tracer) { return this; } - public GrpcTransportBuilder withMeter(Meter meter) { - this.meter = Objects.requireNonNull(meter, "meter is null"); - return this; - } - /** * use {@link GrpcTransportBuilder#withGrpcRetry(boolean) } instead * @return this diff --git a/core/src/main/java/tech/ydb/core/impl/YdbTransportImpl.java b/core/src/main/java/tech/ydb/core/impl/YdbTransportImpl.java index 768634801..52d4ba018 100644 --- a/core/src/main/java/tech/ydb/core/impl/YdbTransportImpl.java +++ b/core/src/main/java/tech/ydb/core/impl/YdbTransportImpl.java @@ -21,7 +21,6 @@ import tech.ydb.core.impl.pool.GrpcChannel; import tech.ydb.core.impl.pool.GrpcChannelPool; import tech.ydb.core.impl.pool.ManagedChannelFactory; -import tech.ydb.core.metrics.Meter; import tech.ydb.core.tracing.Tracer; /** @@ -38,7 +37,6 @@ public class YdbTransportImpl extends BaseGrpcTransport { private final GrpcChannelPool channelPool; private final YdbDiscovery discovery; private final Tracer tracer; - private final Meter meter; public YdbTransportImpl(GrpcTransportBuilder builder) { super(builder); @@ -47,7 +45,6 @@ public YdbTransportImpl(GrpcTransportBuilder builder) { this.database = Strings.nullToEmpty(builder.getDatabase()); this.tracer = builder.getTracer(); - this.meter = builder.getMeter(); logger.info("Create YDB transport with endpoint {} and {}", serverEndpoint, balancingSettings); @@ -127,11 +124,6 @@ public Tracer getTracer() { return tracer; } - @Override - public Meter getMeter() { - return meter; - } - @Override public AuthCallOptions getAuthCallOptions() { return callOptions; diff --git a/query/src/main/java/tech/ydb/query/QueryClient.java b/query/src/main/java/tech/ydb/query/QueryClient.java index 7a2177aa2..8cdeec529 100644 --- a/query/src/main/java/tech/ydb/query/QueryClient.java +++ b/query/src/main/java/tech/ydb/query/QueryClient.java @@ -8,6 +8,7 @@ import tech.ydb.core.Result; import tech.ydb.core.grpc.GrpcTransport; +import tech.ydb.core.metrics.Meter; import tech.ydb.core.tracing.NoopTracer; import tech.ydb.core.tracing.Tracer; import tech.ydb.query.impl.QueryClientImpl; @@ -57,6 +58,8 @@ interface Builder { Builder sessionPoolName(String poolName); + Builder withMeter(Meter meter); + QueryClient build(); } } diff --git a/query/src/main/java/tech/ydb/query/impl/QueryClientImpl.java b/query/src/main/java/tech/ydb/query/impl/QueryClientImpl.java index 1c266e0ef..b380d8bb1 100644 --- a/query/src/main/java/tech/ydb/query/impl/QueryClientImpl.java +++ b/query/src/main/java/tech/ydb/query/impl/QueryClientImpl.java @@ -9,6 +9,8 @@ import tech.ydb.core.Result; import tech.ydb.core.grpc.GrpcTransport; +import tech.ydb.core.metrics.Meter; +import tech.ydb.core.metrics.NoopMeter; import tech.ydb.core.tracing.Tracer; import tech.ydb.query.QueryClient; import tech.ydb.query.QuerySession; @@ -28,7 +30,7 @@ public QueryClientImpl(Builder builder) { ? builder.sessionPoolName : builder.transport.getDatabase(); this.pool = new SessionPool( Clock.systemUTC(), - new QueryServiceRpc(builder.transport), + new QueryServiceRpc(builder.transport, builder.meter), builder.transport.getScheduler(), builder.sessionPoolMinSize, builder.sessionPoolMaxSize, @@ -81,6 +83,7 @@ public static class Builder implements QueryClient.Builder { private int sessionPoolMaxSize = 50; private Duration sessionPoolIdleDuration = Duration.ofMinutes(5); private String sessionPoolName = null; + private Meter meter = NoopMeter.INSTANCE; Builder(GrpcTransport transport) { Preconditions.checkArgument(transport != null, "transport is null"); @@ -138,6 +141,13 @@ public Builder sessionPoolName(String poolName) { return this; } + @Override + public Builder withMeter(Meter meter) { + Preconditions.checkArgument(meter != null, "meter is null"); + this.meter = meter; + return this; + } + @Override public QueryClientImpl build() { return new QueryClientImpl(this); diff --git a/query/src/main/java/tech/ydb/query/impl/QueryServiceRpc.java b/query/src/main/java/tech/ydb/query/impl/QueryServiceRpc.java index 6038c447a..c063e637e 100644 --- a/query/src/main/java/tech/ydb/query/impl/QueryServiceRpc.java +++ b/query/src/main/java/tech/ydb/query/impl/QueryServiceRpc.java @@ -7,6 +7,7 @@ import tech.ydb.core.grpc.GrpcRequestSettings; import tech.ydb.core.grpc.GrpcTransport; import tech.ydb.core.metrics.Meter; +import tech.ydb.core.metrics.NoopMeter; import tech.ydb.core.operation.StatusExtractor; import tech.ydb.core.tracing.Span; import tech.ydb.core.tracing.SpanKind; @@ -55,9 +56,13 @@ class QueryServiceRpc { private final Meter meter; QueryServiceRpc(GrpcTransport transport) { + this(transport, NoopMeter.INSTANCE); + } + + QueryServiceRpc(GrpcTransport transport, Meter meter) { this.transport = transport; this.trace = transport.getTracer(); - this.meter = transport.getMeter(); + this.meter = meter; } Span startSpan(String spanName) { diff --git a/query/src/main/java/tech/ydb/query/impl/TableClientImpl.java b/query/src/main/java/tech/ydb/query/impl/TableClientImpl.java index d84d14063..4b932c6e5 100644 --- a/query/src/main/java/tech/ydb/query/impl/TableClientImpl.java +++ b/query/src/main/java/tech/ydb/query/impl/TableClientImpl.java @@ -326,6 +326,12 @@ public Builder sessionMaxIdleTime(Duration duration) { return this; } + @Override + public Builder withMeter(tech.ydb.core.metrics.Meter meter) { + query.withMeter(meter); + return this; + } + @Override public TableClientImpl build() { return new TableClientImpl(this); diff --git a/query/src/test/java/tech/ydb/query/opentelemetry/OpenTelemetryQueryMetricsIntegrationTest.java b/query/src/test/java/tech/ydb/query/opentelemetry/OpenTelemetryQueryMetricsIntegrationTest.java index f75c366a1..302e5df25 100644 --- a/query/src/test/java/tech/ydb/query/opentelemetry/OpenTelemetryQueryMetricsIntegrationTest.java +++ b/query/src/test/java/tech/ydb/query/opentelemetry/OpenTelemetryQueryMetricsIntegrationTest.java @@ -44,6 +44,7 @@ public class OpenTelemetryQueryMetricsIntegrationTest { private static InMemoryMetricReader metricReader; private static SdkMeterProvider meterProvider; + private static OpenTelemetryMeter ydbMeter; private static GrpcTransport transport; private QueryClient queryClient; @@ -59,9 +60,10 @@ public static void initTransport() { .setMeterProvider(meterProvider) .build(); + ydbMeter = OpenTelemetryMeter.fromOpenTelemetry(openTelemetry, YDB.database(), YDB.endpoint()); + transport = GrpcTransport.forEndpoint(YDB.endpoint(), YDB.database()) .withAuthProvider(new TokenAuthProvider(YDB.authToken())) - .withMeter(OpenTelemetryMeter.fromOpenTelemetry(openTelemetry, YDB.database(), YDB.endpoint())) .build(); } @@ -74,7 +76,7 @@ public static void closeTransport() throws IOException { @Before public void initClient() { - queryClient = QueryClient.newClient(transport).build(); + queryClient = QueryClient.newClient(transport).withMeter(ydbMeter).build(); } @After @@ -178,6 +180,7 @@ public void sessionPoolMetricsAreReported() { @Test public void sessionPendingAndTimeoutsMetricsAreCounters() { try (QueryClient tinyClient = QueryClient.newClient(transport) + .withMeter(ydbMeter) .sessionPoolMaxSize(1) .sessionPoolName("tiny") .build()) { diff --git a/table/src/main/java/tech/ydb/table/TableClient.java b/table/src/main/java/tech/ydb/table/TableClient.java index 5dcf73966..6b29d3735 100644 --- a/table/src/main/java/tech/ydb/table/TableClient.java +++ b/table/src/main/java/tech/ydb/table/TableClient.java @@ -47,6 +47,10 @@ interface Builder { Builder sessionMaxIdleTime(Duration duration); + default Builder withMeter(tech.ydb.core.metrics.Meter meter) { + return this; + } + TableClient build(); } } From 7e81c5a4b8052a3ac2f7ff614ff490e3b04331a1 Mon Sep 17 00:00:00 2001 From: KirillKurdyukov Date: Thu, 7 May 2026 18:23:02 +0300 Subject: [PATCH 8/8] test: harden sessionPendingAndTimeoutsMetricsAreCounters MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The previous version called .join() on the second createSession future and discarded the result, then asserted that the timeouts counter is non-zero. If the waiter ever completed via the queue path (e.g., a session became available before the 100ms timeout) the test would silently fail later with a misleading "ydb.query.session.timeouts metric not found" — there'd be no timeout to record. Now we explicitly assert that the waiter completed with CLIENT_DEADLINE_EXPIRED before checking the counter, give it 500ms (still short, but well above scheduler jitter), and read both pending and timeouts metrics from a single metricReader collection so the assertion order doesn't matter. Co-Authored-By: Claude Opus 4.7 (1M context) --- ...nTelemetryQueryMetricsIntegrationTest.java | 25 +++++++++++++------ 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/query/src/test/java/tech/ydb/query/opentelemetry/OpenTelemetryQueryMetricsIntegrationTest.java b/query/src/test/java/tech/ydb/query/opentelemetry/OpenTelemetryQueryMetricsIntegrationTest.java index 302e5df25..ba2bd5199 100644 --- a/query/src/test/java/tech/ydb/query/opentelemetry/OpenTelemetryQueryMetricsIntegrationTest.java +++ b/query/src/test/java/tech/ydb/query/opentelemetry/OpenTelemetryQueryMetricsIntegrationTest.java @@ -3,7 +3,6 @@ import java.io.IOException; import java.time.Duration; import java.util.Collection; -import java.util.concurrent.CompletableFuture; import io.opentelemetry.api.OpenTelemetry; import io.opentelemetry.api.common.AttributeKey; @@ -24,6 +23,7 @@ import tech.ydb.auth.TokenAuthProvider; import tech.ydb.common.transaction.TxMode; import tech.ydb.core.Result; +import tech.ydb.core.StatusCode; import tech.ydb.core.grpc.GrpcTransport; import tech.ydb.core.metrics.OpenTelemetryMeter; import tech.ydb.query.QueryClient; @@ -185,20 +185,28 @@ public void sessionPendingAndTimeoutsMetricsAreCounters() { .sessionPoolName("tiny") .build()) { try (QuerySession s1 = tinyClient.createSession(Duration.ofSeconds(5)).join().getValue()) { - CompletableFuture> waiter = - tinyClient.createSession(Duration.ofMillis(100)); - waiter.join(); + Result result = tinyClient.createSession(Duration.ofMillis(500)).join(); + Assert.assertFalse( + "waiter must time out (sessionPoolMaxSize=1 with one session held), but got " + result, + result.isSuccess()); + Assert.assertEquals( + "waiter must complete with CLIENT_DEADLINE_EXPIRED", + StatusCode.CLIENT_DEADLINE_EXPIRED, result.getStatus().getCode()); } } - MetricData pending = findMetric("ydb.query.session.pending_requests"); + // Metric reader observes counter increments synchronously, but give the runtime + // a brief moment in case other threads still hold references in flight. + Collection snapshot = metricReader.collectAllMetrics(); + + MetricData pending = findMetric(snapshot, "ydb.query.session.pending_requests"); Assert.assertNotNull("ydb.query.session.pending_requests metric not found", pending); Assert.assertEquals("{request}", pending.getUnit()); long pendingTotal = pending.getLongSumData().getPoints().stream() .mapToLong(LongPointData::getValue).sum(); Assert.assertTrue("pending_requests must be > 0", pendingTotal > 0); - MetricData timeouts = findMetric("ydb.query.session.timeouts"); + MetricData timeouts = findMetric(snapshot, "ydb.query.session.timeouts"); Assert.assertNotNull("ydb.query.session.timeouts metric not found", timeouts); Assert.assertEquals("{timeout}", timeouts.getUnit()); long timeoutTotal = timeouts.getLongSumData().getPoints().stream() @@ -207,7 +215,10 @@ public void sessionPendingAndTimeoutsMetricsAreCounters() { } private MetricData findMetric(String name) { - Collection metrics = metricReader.collectAllMetrics(); + return findMetric(metricReader.collectAllMetrics(), name); + } + + private MetricData findMetric(Collection metrics, String name) { for (MetricData m : metrics) { if (name.equals(m.getName())) { return m;