Add logger to Agent (#51)

mateusz834 · web-flow · commit 8a18fe57b086 · 2026-02-17T14:34:10.000+01:00
+ propagate logs from local tools
+ add debug logs
diff --git a/splunklib/ai/README.md b/splunklib/ai/README.md
@@ -196,6 +196,8 @@ def tool(ctx: ToolContext) -> None:
 In this example, the `Logger` instance is accessed via `ctx.logger` and used to emit an informational
 log message during tool execution.
 
+These logs are forwarded to the `logger` passed to the `Agent` constructor.
+
 ### Tool filtering
 
 Tools can be filtered, before these are made available to the LLM, via the `tool_filters` parameter.
@@ -514,6 +516,35 @@ condition (`TokenLimitExceededException`, `StepsLimitExceededException` or `Time
 
 These limits apply over the entire lifetime of an `Agent`.
 
+## Logger
+
+The `Agent` constructor accepts an optional logger parameter that enables detailed
+tracing and debugging throughout the agent’s lifecycle.
+
+```py
+from splunklib.ai import Agent, OpenAIModel
+from splunklib.ai.hooks import token_limit, step_limit, timeout_limit
+from splunklib.client import connect
+import logging
+
+model = OpenAIModel(...)
+service = connect(...)
+
+logger = logging.getLogger("test")
+logger.setLevel(logging.DEBUG)
+
+async with Agent(
+        model=model,
+        service=service,
+        system_prompt="..." ,
+        logger=logger,
+    ) as agent: ...
+```
+
+The agent emits logs for events such as: model interactions, tool calls, subagent calls.
+
+Additionally logs from local tools are also forwarded to this logger.
+
 ## Known issues
 
 ### CA - File not found
diff --git a/splunklib/ai/agent.py b/splunklib/ai/agent.py
@@ -13,6 +13,7 @@
 # License for the specific language governing permissions and limitations
 # under the License.
 
+from logging import Logger
 import os
 from collections.abc import Sequence
 from typing import Self, final, override
@@ -99,15 +100,17 @@ class Agent(BaseAgent[OutputT]):
             Description of the agent when used as a subagent. This is
             surfaced to the supervisor and used to decide whether this agent
             is appropriate for a given task. Ignored for top-level agents.
+
+        logger:
+            Optional logger instance used for tracing and debugging the agent’s execution.
+            Additionally logs from the local tools are forwarded to this logger.
     """
 
     _impl: AgentImpl[OutputT] | None
     _use_mcp_tools: bool
     _service: Service
     _tool_filters: ToolFilters | None
 
-    # TODO: We should have a logger inside of an agent, debugging and such.
-
     def __init__(
         self,
         model: PredefinedModel,
@@ -121,6 +124,7 @@ def __init__(
         hooks: Sequence[AgentHook] | None = None,
         name: str = "",  # Only used by Subgents
         description: str = "",  # Only used by Subagents
+        logger: Logger | None = None,
     ) -> None:
         super().__init__(
             model=model,
@@ -131,6 +135,7 @@ def __init__(
             input_schema=input_schema,
             output_schema=output_schema,
             hooks=hooks,
+            logger=logger,
         )
 
         if duplicate_hook_names := _find_duplicate_hook_names(self.hooks):
@@ -145,14 +150,27 @@ async def __aenter__(self) -> Self:
         if self._impl:
             raise AssertionError("Agent is already in `async with` context")
 
+        if self.name:
+            self.logger.debug(f"Creating agent {self.name}; trace_id={self.trace_id}")
+        else:
+            self.logger.debug(f"Creating agent; trace_id={self.trace_id}")
+
         if self._use_mcp_tools:
             self._tools = await _load_tools_from_mcp(
-                self._service, self._tool_filters, self.trace_id
+                self._service,
+                self._tool_filters,
+                self.trace_id,
+                self.logger,
             )
 
         backend = get_backend()
         self._impl = await backend.create_agent(self)
 
+        if self.name:
+            self.logger.debug(f"Agent {self.name} created; trace_id={self.trace_id}")
+        else:
+            self.logger.debug(f"Agent created; trace_id={self.trace_id}")
+
         return self
 
     async def __aexit__(self, exc_type, exc_value, traceback) -> None:  # noqa: ANN001  # pyright: ignore[reportUnknownParameterType, reportMissingParameterType]
@@ -171,6 +189,7 @@ async def _load_tools_from_mcp(
     service: Service,
     filters: ToolFilters | None,
     trace_id: str,
+    logger: Logger,
 ) -> list[Tool]:
     local_tools_path = _testing_local_tools_path
     app_id = _testing_app_id
@@ -186,10 +205,16 @@ async def _load_tools_from_mcp(
     if not os.path.exists(local_tools_path):
         local_tools_path = None
 
-    mcp_tools = await load_mcp_tools(service, local_tools_path, app_id, trace_id)
+    mcp_tools = await load_mcp_tools(
+        service, local_tools_path, app_id, trace_id, logger
+    )
     if filters:
         return filter_tools(mcp_tools, filters)
 
+    logger.debug(
+        f"Tools loaded & filtered successfully; tools_after_filtering={[tool.name for tool in mcp_tools]}"
+    )
+
     return mcp_tools
 
 
diff --git a/splunklib/ai/base_agent.py b/splunklib/ai/base_agent.py
@@ -15,6 +15,7 @@
 
 from abc import ABC, abstractmethod
 from collections.abc import Sequence
+import logging
 import secrets
 from typing import Generic
 
@@ -37,6 +38,7 @@ class BaseAgent(Generic[OutputT], ABC):
     _output_schema: type[OutputT] | None = None
     _hooks: Sequence[AgentHook] | None = None
     _trace_id: str
+    _logger: logging.Logger
 
     def __init__(
         self,
@@ -49,6 +51,7 @@ def __init__(
         input_schema: type[BaseModel] | None = None,
         output_schema: type[OutputT] | None = None,
         hooks: Sequence[AgentHook] | None = None,
+        logger: logging.Logger | None = None,
     ) -> None:
         self._system_prompt = system_prompt
         self._model = model
@@ -61,9 +64,19 @@ def __init__(
         self._hooks = tuple(hooks) if hooks else ()
         self._trace_id = secrets.token_hex(16)  # 32 Hex characters
 
+        if logger is None:
+            # Create a no-op logger to skip checking for its existence.
+            logger = logging.Logger(name="fake", level=logging.CRITICAL + 100)
+            assert len(logger.handlers) == 0
+        self._logger = logger
+
     @abstractmethod
     async def invoke(self, messages: list[BaseMessage]) -> AgentResponse[OutputT]: ...
 
+    @property
+    def logger(self) -> logging.Logger:
+        return self._logger
+
     @property
     def system_prompt(self) -> str:
         return self._system_prompt
diff --git a/splunklib/ai/engines/langchain.py b/splunklib/ai/engines/langchain.py
@@ -13,16 +13,18 @@
 # License for the specific language governing permissions and limitations
 # under the License.
 
+import logging
 import uuid
 from collections.abc import Sequence
 from dataclasses import dataclass
 from functools import partial
 from time import monotonic
-from typing import Any, cast, override
+from typing import Any, Awaitable, Callable, cast, override
 
 from langchain.agents import create_agent
 from langchain.agents.middleware import (
     AgentMiddleware as LC_AgentMiddleware,
+    wrap_tool_call,
 )
 from langchain.agents.middleware import (
     AgentState as LC_AgentState,
@@ -40,13 +42,15 @@
 from langchain.messages import ToolCall as LC_ToolCall
 from langchain.messages import ToolMessage as LC_ToolMessage
 from langchain.tools import ToolException as LC_ToolException
+from langchain.tools.tool_node import ToolCallRequest as LC_ToolCallRequest
 from langchain_core.language_models import BaseChatModel
 from langchain_core.messages.base import BaseMessage as LC_BaseMessage
 from langchain_core.messages.utils import count_tokens_approximately
 from langchain_core.tools import BaseTool, StructuredTool
 from langgraph.checkpoint.memory import InMemorySaver
 from langgraph.graph.state import CompiledStateGraph, RunnableConfig
 from langgraph.runtime import Runtime
+from langgraph.types import Command as LC_Command
 
 from splunklib.ai.base_agent import BaseAgent
 from splunklib.ai.core.backend import (
@@ -58,9 +62,8 @@
 from splunklib.ai.hooks import (
     AgentHook,
     AgentState,
-    StepsLimitExceededException,
-    TimeoutExceededException,
-    TokenLimitExceededException,
+    after_model as hook_after_model,
+    before_model as hook_before_model,
 )
 from splunklib.ai.messages import (
     AgentCall,
@@ -192,12 +195,28 @@ async def create_agent(
 
                 system_prompt = AGENT_AS_TOOLS_PROMPT + "\n" + system_prompt
 
-        middleware = []
+        before_user_hooks, after_user_hooks, before_user_lc_middlewares = (
+            _debugging_middleware(agent.logger)
+        )
+
+        middleware = [
+            _convert_hook_to_middleware(h, model_impl) for h in before_user_hooks
+        ]
+        middleware.extend(before_user_lc_middlewares)
+
+        # User-provided hooks go in between our hooks.
         if agent.hooks:
             middleware.extend(
-                (_convert_hook_to_middleware(h, model_impl) for h in agent.hooks)
+                (
+                    _convert_hook_to_middleware(h, model_impl, logger=agent.logger)
+                    for h in agent.hooks
+                )
             )
 
+        middleware.extend(
+            (_convert_hook_to_middleware(h, model_impl) for h in after_user_hooks)
+        )
+
         return LangChainAgentImpl(
             system_prompt=system_prompt,
             model=model_impl,
@@ -207,6 +226,73 @@ async def create_agent(
         )
 
 
+def _debugging_middleware(
+    logger: logging.Logger,
+) -> tuple[list[AgentHook], list[AgentHook], list[LC_AgentMiddleware]]:
+    # TODO: These names can conflict with user-provided names.
+
+    # TODO: replace this with ours middleware, once we add them.
+    @wrap_tool_call  # pyright: ignore[reportArgumentType, reportCallIssue, reportUntypedFunctionDecorator]
+    async def _tool_call(
+        request: LC_ToolCallRequest,
+        handler: Callable[
+            [LC_ToolCallRequest], Awaitable[LC_ToolMessage | LC_Command[None]]
+        ],
+    ) -> LC_ToolMessage | LC_Command[None]:
+        call = _map_tool_call_from_langchain(request.tool_call)
+
+        tool_or_agent = "Tool"
+        if isinstance(call, AgentCall):
+            tool_or_agent = "Agent"
+
+        logger.debug(f"{tool_or_agent} call {call.name} stared; id={call.id}")
+        try:
+            result = await handler(request)
+            assert isinstance(result, LC_ToolMessage)
+
+            if result.status == "success":
+                logger.debug(
+                    f"{tool_or_agent} call {call.name} succeeded; id={call.id}"
+                )
+            else:
+                logger.debug(f"{tool_or_agent} call {call.name} failed; id={call.id}")
+
+            return result
+        except Exception:
+            logger.debug(f"{tool_or_agent} call {call.name} failed; id={call.id}")
+            raise
+
+    before_user_lc_middlewares = [_tool_call]
+
+    @hook_after_model
+    def _debug_after_model(state: AgentState) -> None:
+        last = state.response.messages[-1]
+        if isinstance(last, AIMessage):
+            tool_calls = [
+                (call.name, call.id)
+                for call in last.calls
+                if isinstance(call, ToolCall)
+            ]
+            subagent_calls = [
+                (call.name, call.id)
+                for call in last.calls
+                if isinstance(call, AgentCall)
+            ]
+            logger.debug(
+                f"LLM model invocation ended; requested_tool_calls={tool_calls}; requested_subagent_calls={subagent_calls}"
+            )
+
+    before_user_hooks = [_debug_after_model]
+
+    @hook_before_model
+    def _debug_before_model(state: AgentState) -> None:
+        logger.debug("Invoking LLM model")
+
+    after_user_hooks = [_debug_before_model]
+
+    return before_user_hooks, after_user_hooks, before_user_lc_middlewares  # pyright: ignore[reportReturnType]
+
+
 def _create_langchain_tool(tool: Tool) -> BaseTool:
     async def _tool_call(
         **kwargs: dict[str, Any],
@@ -389,6 +475,7 @@ def _map_message_to_langchain(message: BaseMessage) -> LC_BaseMessage:
 def _convert_hook_to_middleware(
     hook: AgentHook,
     model: BaseChatModel,
+    logger: logging.Logger | None = None,
 ) -> LC_AgentMiddleware:
     match hook.type:
         case "before_model":
@@ -414,6 +501,10 @@ def _middleware(state: LC_AgentState, runtime: Runtime) -> dict[str, Any] | None
         # the token counting function as part of the Backend interface, so that
         # it's only used when needed instead.
         sdk_state = _convert_agent_state_from_langchain(state, model)
+
+        if logger:
+            logger.debug(f"Executing {hook.type} hook {hook.name}")
+
         hook(sdk_state)
 
     return wrapper(_middleware)
diff --git a/splunklib/ai/messages.py b/splunklib/ai/messages.py
@@ -27,14 +27,14 @@
 class ToolCall:
     name: str
     args: dict[str, Any]
-    id: str | None
+    id: str | None  # TODO: can be None?
 
 
 @dataclass(frozen=True)
 class AgentCall:
     name: str
     args: dict[str, Any]
-    id: str | None
+    id: str | None  # TODO: can be None?
 
 
 @dataclass(frozen=True)
diff --git a/splunklib/ai/registry.py b/splunklib/ai/registry.py
@@ -179,6 +179,9 @@ def service(self) -> Service:
     def logger(self) -> Logger:
         """
         This logger can be used by tools to emit logs during execution of a tool.
+
+        Logs emitted using this logger are forwarded to the logger
+        provided to the agent constructor.
         """
         assert self._logger is not None
         return self._logger
diff --git a/splunklib/ai/tools.py b/splunklib/ai/tools.py
diff --git a/tests/integration/ai/test_agent_logger.py b/tests/integration/ai/test_agent_logger.py
diff --git a/tests/integration/ai/testdata/weather_with_logs.py b/tests/integration/ai/testdata/weather_with_logs.py