diff --git a/.tekton/integration-tests/README.md b/.tekton/integration-tests/README.md
new file mode 100644
index 000000000..4f8737600
--- /dev/null
+++ b/.tekton/integration-tests/README.md
@@ -0,0 +1,38 @@
+# Konflux Integration Tests
+
+## Pipelines
+
+### OpenAI E2E (`lightspeed-stack-integration-test.yaml`)
+
+Standard e2e test pipeline using OpenAI as the inference provider.
+
+### RHEL AI E2E (`lightspeed-stack-rhelai-test.yaml`)
+
+E2e test pipeline using a RHEL AI instance (vLLM) as the inference provider.
+
+## RHEL AI Pipeline — MAPT
+
+[MAPT](https://github.com/redhat-developer/mapt) (Multi-Architecture Provisioning Tool) provisions bare-metal-like cloud instances with GPU support. We use it to spin up RHEL AI instances on AWS with vLLM pre-configured, since the e2e tests need a real GPU-backed inference endpoint that OpenShift ephemeral clusters don't provide.
+
+## RHEL AI Pipeline — S3 State Bucket
+
+MAPT uses an S3 bucket to store Pulumi state for provisioning RHEL AI instances. Each pipeline run creates its own prefix under the bucket (`mapt/rhel-ai/<pipelinerun-name>/`) so concurrent runs don't conflict.
+
+The bucket has a **31-day lifecycle rule** that auto-deletes stale state files from failed runs. MAPT's `CleanupState` also removes state after a successful destroy, but this doesn't always run if the pipeline is interrupted.
+
+## RHEL AI Instance Provisioning
+
+The pipeline supports two provisioning modes, controlled by the `spot` pipeline parameter (default: `true`):
+
+- **Spot (default):** MAPT searches all AWS regions for the cheapest spot instance across multiple GPU instance types (g5.12xlarge, g6.12xlarge, g5.24xlarge, g6.24xlarge). Cheaper (~$2-4/hr) but instances can be evicted.
+- **On-demand (`spot: false`):** Tries on-demand provisioning sequentially across regions (us-east-1, us-east-2, us-west-2, eu-west-1, eu-central-1, ap-northeast-1) with a 10-minute timeout per attempt. More expensive (~$5-6/hr) but guaranteed capacity once found.
+
+The model used is `meta-llama/Llama-3.1-8B-Instruct` with a 131072-token context window. The VRAM requirement depends on the combination of model size and context window — changing any of these may require a different instance type. All configured instance types provide 96GB+ total VRAM across 4 GPUs.
+
+## RHEL AI AMI Version
+
+The pipeline defaults to RHEL AI version `3.4.0` (GA). This corresponds to a specific AWS AMI that MAPT looks up by name pattern (`rhel-ai-cuda-aws-3.4.0*`). When a new RHEL AI version is released, update the `rhelai-version` default in the pipeline YAML to use the new AMI. Available versions can be listed with:
+
+```bash
+mapt aws rhel-ai list-versions
+```
\ No newline at end of file
diff --git a/.tekton/integration-tests/pipeline/lightspeed-stack-rhelai-test.yaml b/.tekton/integration-tests/pipeline/lightspeed-stack-rhelai-test.yaml
new file mode 100644
index 000000000..26a2a95ad
--- /dev/null
+++ b/.tekton/integration-tests/pipeline/lightspeed-stack-rhelai-test.yaml
@@ -0,0 +1,489 @@
+---
+apiVersion: tekton.dev/v1beta1
+kind: Pipeline
+metadata:
+  name: lightspeed-stack-rhelai-tests-pipeline
+spec:
+  description: |
+    This pipeline provisions a RHEL AI instance on AWS (vLLM), an ephemeral
+    OpenShift cluster, deploys lightspeed-stack configured to use the RHEL AI
+    vLLM as inference provider, and runs the full e2e test suite.
+  params:
+    - name: SNAPSHOT
+      description: 'The JSON string representing the snapshot of the application under test.'
+      default: '{"components": [{"name":"lightspeed-stack", "containerImage": "quay.io/example/lightspeed-stack:latest"}]}'
+      type: string
+    - name: test-name
+      description: 'The name of the test corresponding to a defined Konflux integration test.'
+      default: 'lightspeed-stack-rhelai-tests'
+    - name: rhelai-version
+      description: 'RHEL AI version to provision.'
+      default: '3.4.0'
+      type: string
+    - name: instance-type
+      description: 'Comma-separated list of EC2 instance types for MAPT.'
+      default: 'g5.12xlarge,g6.12xlarge,g5.24xlarge,g6.24xlarge'
+      type: string
+    - name: spot
+      description: 'Use spot instances (cheaper, searches all regions). If false, uses on-demand with region fallback.'
+      default: 'true'
+      type: string
+    - name: model
+      description: 'HuggingFace model to serve on RHEL AI.'
+      default: 'meta-llama/Llama-3.1-8B-Instruct'
+      type: string
+    - name: namespace
+      description: 'Namespace to run tests in'
+      default: 'lightspeed-stack'
+    - name: debug
+      description: 'Enable debug output.'
+      default: 'false'
+      type: string
+  tasks:
+    # ── RHEL AI provisioning (MAPT) ──
+    - name: provision-rhelai
+      description: Provision a RHEL AI instance with vLLM auto-started.
+      timeout: 2h
+      runAfter:
+        - provision-cluster
+      taskSpec:
+        params:
+          - name: rhelai-version
+          - name: instance-type
+          - name: model
+          - name: debug
+          - name: spot
+        results:
+          - name: host
+            description: "RHEL AI instance hostname or IP"
+          - name: username
+            description: "SSH username for the instance"
+          - name: vllm-api-key
+            description: "Random API key for vLLM authentication"
+        volumes:
+          - name: aws-credentials
+            secret:
+              secretName: aws-mapt-credentials
+          - name: host-info
+            emptyDir: {}
+          - name: oidc-token
+            projected:
+              sources:
+                - serviceAccountToken:
+                    audience: sts.amazonaws.com
+                    expirationSeconds: 3600
+                    path: token
+        steps:
+          - name: create-instance
+            # TODO(are-ces): pin to v0.14.2 once released
+            image: quay.io/redhat-developer/mapt:v1.0.0-dev
+            imagePullPolicy: Always
+            env:
+              - name: HOME
+                value: /opt/mapt/run
+              - name: AWS_WEB_IDENTITY_TOKEN_FILE
+                value: /var/run/secrets/oidc/token
+              - name: AWS_ROLE_ARN
+                valueFrom:
+                  secretKeyRef:
+                    name: aws-mapt-credentials
+                    key: AWS_ROLE_ARN
+            volumeMounts:
+              - name: aws-credentials
+                mountPath: /opt/aws-credentials
+                readOnly: true
+              - name: host-info
+                mountPath: /opt/host-info
+              - name: oidc-token
+                mountPath: /var/run/secrets/oidc
+                readOnly: true
+            resources:
+              requests:
+                cpu: "100m"
+                memory: "1Gi"
+              limits:
+                cpu: "300m"
+                memory: "2Gi"
+            script: |
+              #!/bin/sh
+              set -uo pipefail
+
+              BUCKET=$(cat /opt/aws-credentials/S3_BUCKET)
+              RUN_ID="$(context.pipelineRun.name)"
+              HF_TOKEN=$(cat /opt/aws-credentials/HUGGING_FACE_HUB_TOKEN)
+              VLLM_API_KEY=$(head -c 32 /dev/urandom | od -An -tx1 | tr -d ' \n')
+
+              echo "[mapt] ========== Version Info =========="
+              echo "[mapt] RHEL AI AMI version: $(params.rhelai-version)"
+              echo "[mapt] Model: $(params.model)"
+              echo "[mapt] Run ID: ${RUN_ID}"
+              echo "[mapt] Spot: $(params.spot)"
+              echo "[mapt] =================================="
+
+              CREATED=0
+
+              if [[ "$(params.spot)" == "true" ]]; then
+                export AWS_DEFAULT_REGION="us-east-1"
+                echo "[mapt] Using spot instances (searching all regions)..."
+                if mapt aws rhel-ai create \
+                    --project-name "mapt-rhel-ai-${RUN_ID}" \
+                    --backed-url "s3://${BUCKET}/mapt/rhel-ai/${RUN_ID}" \
+                    --conn-details-output /opt/host-info \
+                    --compute-sizes "$(params.instance-type)" \
+                    --version "$(params.rhelai-version)" \
+                    --spot --spot-eviction-tolerance highest \
+                    --auto-start \
+                    --model "$(params.model)" \
+                    --hf-token "${HF_TOKEN}" \
+                    --api-key "${VLLM_API_KEY}" \
+                    --expose-ports 8000 \
+                    --vllm-extra-args "--max-model-len 131072 --enable-auto-tool-choice --tool-call-parser llama3_json --chat-template /opt/app-root/template/tool_chat_template_llama3.1_json.jinja" \
+                    --tags "project=lightspeed-core,environment=konflux-ci"; then
+                  CREATED=1
+                else
+                  echo "[mapt] ERROR: Spot instance creation failed"
+                  exit 1
+                fi
+              else
+                REGIONS="us-east-1 us-east-2 us-west-2 eu-west-1 eu-central-1 ap-northeast-1"
+                TIMEOUT=600
+
+                for REGION in $REGIONS; do
+                  echo "[mapt] Trying on-demand in ${REGION}..."
+                  export AWS_DEFAULT_REGION="$REGION"
+
+                  if timeout $TIMEOUT mapt aws rhel-ai create \
+                      --project-name "mapt-rhel-ai-${RUN_ID}" \
+                      --backed-url "s3://${BUCKET}/mapt/rhel-ai/${RUN_ID}" \
+                      --conn-details-output /opt/host-info \
+                      --compute-sizes "$(params.instance-type)" \
+                      --version "$(params.rhelai-version)" \
+                      --auto-start \
+                      --model "$(params.model)" \
+                      --hf-token "${HF_TOKEN}" \
+                      --api-key "${VLLM_API_KEY}" \
+                      --expose-ports 8000 \
+                      --vllm-extra-args "--max-model-len 131072 --enable-auto-tool-choice --tool-call-parser llama3_json --chat-template /opt/app-root/template/tool_chat_template_llama3.1_json.jinja" \
+                      --tags "project=lightspeed-core,environment=konflux-ci"; then
+                    CREATED=1
+                    break
+                  fi
+
+                  echo "[mapt] Failed in ${REGION}, cleaning up and trying next..."
+                  mapt aws rhel-ai destroy \
+                      --project-name "mapt-rhel-ai-${RUN_ID}" \
+                      --backed-url "s3://${BUCKET}/mapt/rhel-ai/${RUN_ID}" \
+                      --force-destroy 2>/dev/null || true
+                done
+
+                if [ "$CREATED" -ne 1 ]; then
+                  echo "[mapt] ERROR: Failed to create instance in any region"
+                  exit 1
+                fi
+              fi
+
+              echo "[mapt] Instance created and vLLM started."
+              echo -n "${VLLM_API_KEY}" > $(results.vllm-api-key.path)
+              echo -n "$(cat /opt/host-info/host)" > $(results.host.path)
+              echo -n "$(cat /opt/host-info/username)" > $(results.username.path)
+              echo "[mapt] Host: $(cat /opt/host-info/host)"
+              echo "[mapt] User: $(cat /opt/host-info/username)"
+      params:
+        - name: rhelai-version
+          value: "$(params.rhelai-version)"
+        - name: instance-type
+          value: "$(params.instance-type)"
+        - name: model
+          value: "$(params.model)"
+        - name: debug
+          value: "$(params.debug)"
+        - name: spot
+          value: "$(params.spot)"
+
+    # ── OpenShift cluster provisioning (same as existing e2e pipeline) ──
+    - name: eaas-provision-space
+      taskRef:
+        resolver: git
+        params:
+          - name: url
+            value: https://github.com/konflux-ci/build-definitions.git
+          - name: revision
+            value: main
+          - name: pathInRepo
+            value: task/eaas-provision-space/0.1/eaas-provision-space.yaml
+      params:
+        - name: ownerKind
+          value: PipelineRun
+        - name: ownerName
+          value: $(context.pipelineRun.name)
+        - name: ownerUid
+          value: $(context.pipelineRun.uid)
+
+    - name: provision-cluster
+      runAfter:
+        - eaas-provision-space
+      taskSpec:
+        results:
+          - name: clusterName
+            value: "$(steps.create-cluster.results.clusterName)"
+        steps:
+          - name: pick-version
+            ref:
+              resolver: git
+              params:
+                - name: url
+                  value: https://github.com/konflux-ci/build-definitions.git
+                - name: revision
+                  value: main
+                - name: pathInRepo
+                  value: stepactions/eaas-get-latest-openshift-version-by-prefix/0.1/eaas-get-latest-openshift-version-by-prefix.yaml
+            params:
+              - name: prefix
+                value: "4.19."
+          - name: create-cluster
+            ref:
+              resolver: git
+              params:
+                - name: url
+                  value: https://github.com/konflux-ci/build-definitions.git
+                - name: revision
+                  value: main
+                - name: pathInRepo
+                  value: stepactions/eaas-create-ephemeral-cluster-hypershift-aws/0.1/eaas-create-ephemeral-cluster-hypershift-aws.yaml
+            params:
+              - name: eaasSpaceSecretRef
+                value: $(tasks.eaas-provision-space.results.secretRef)
+              - name: version
+                value: "$(steps.pick-version.results.version)"
+              - name: instanceType
+                value: "m5.large"
+
+    - name: get-stack-images
+      description: Extract lightspeed-stack image and commit from SNAPSHOT
+      runAfter:
+        - provision-cluster
+      params:
+        - name: SNAPSHOT
+          value: $(params.SNAPSHOT)
+      taskSpec:
+        results:
+          - name: lightspeed-stack-image
+            value: "$(steps.get-stack-images.results.lightspeed-stack-image)"
+          - name: commit
+            value: "$(steps.get-stack-images.results.commit)"
+        params:
+          - name: SNAPSHOT
+        steps:
+          - name: get-stack-images
+            image: registry.redhat.io/openshift4/ose-cli:latest
+            env:
+              - name: SNAPSHOT
+                value: $(params.SNAPSHOT)
+            results:
+              - name: lightspeed-stack-image
+                type: string
+              - name: commit
+                type: string
+            script: |
+              dnf -y install jq
+              echo -n "$(jq -r --arg n "lightspeed-stack" '.components[] | select(.name == $n) | .containerImage // ""' <<< "$SNAPSHOT")" > $(step.results.lightspeed-stack-image.path)
+              echo -n "$(jq -r --arg n "lightspeed-stack" '.components[] | select(.name == $n) | .source.git.revision // "latest"' <<< "$SNAPSHOT")" > $(step.results.commit.path)
+
+    # ── Full E2E tests (runs after both RHEL AI and OpenShift are ready) ──
+    - name: rhelai-e2e-tests
+      description: Run full e2e test suite with RHEL AI vLLM as inference provider
+      timeout: 3h
+      runAfter:
+        - provision-rhelai
+        - get-stack-images
+      params:
+        - name: SNAPSHOT
+          value: $(params.SNAPSHOT)
+        - name: lightspeedstackimage
+          value: $(tasks.get-stack-images.results.lightspeed-stack-image)
+        - name: commit
+          value: $(tasks.get-stack-images.results.commit)
+        - name: namespace
+          value: "$(params.namespace)"
+        - name: spaceRequestSecretName
+          value: $(tasks.eaas-provision-space.results.secretRef)
+        - name: clusterName
+          value: $(tasks.provision-cluster.results.clusterName)
+        - name: vllm-host
+          value: "$(tasks.provision-rhelai.results.host)"
+        - name: vllm-api-key
+          value: "$(tasks.provision-rhelai.results.vllm-api-key)"
+        - name: model
+          value: "$(params.model)"
+      taskSpec:
+        params:
+          - name: SNAPSHOT
+          - name: lightspeedstackimage
+          - name: commit
+          - name: namespace
+            type: string
+          - name: spaceRequestSecretName
+            type: string
+          - name: clusterName
+            type: string
+          - name: vllm-host
+            type: string
+          - name: vllm-api-key
+            type: string
+          - name: model
+            type: string
+        volumes:
+          - name: openai-api-key
+            secret:
+              secretName: openai-api-key
+          - name: quay-aipcc-name
+            secret:
+              secretName: quay-aipcc-name
+          - name: quay-aipcc-password
+            secret:
+              secretName: quay-aipcc-password
+          - name: credentials
+            emptyDir: {}
+        steps:
+          - name: get-kubeconfig
+            ref:
+              resolver: git
+              params:
+                - name: url
+                  value: https://github.com/konflux-ci/build-definitions.git
+                - name: revision
+                  value: main
+                - name: pathInRepo
+                  value: stepactions/eaas-get-ephemeral-cluster-credentials/0.1/eaas-get-ephemeral-cluster-credentials.yaml
+            params:
+              - name: eaasSpaceSecretRef
+                value: $(params.spaceRequestSecretName)
+              - name: clusterName
+                value: "$(params.clusterName)"
+              - name: credentials
+                value: credentials
+          - name: run-e2e-tests
+            onError: continue
+            resources:
+              requests:
+                cpu: '1'
+                memory: 1Gi
+              limits:
+                memory: 10Gi
+            volumeMounts:
+              - name: openai-api-key
+                mountPath: /var/run/openai
+              - name: quay-aipcc-name
+                mountPath: /var/run/quay-aipcc-name
+              - name: quay-aipcc-password
+                mountPath: /var/run/quay-aipcc-password
+              - name: credentials
+                mountPath: /credentials
+            env:
+              - name: KUBECONFIG
+                value: "/credentials/$(steps.get-kubeconfig.results.kubeconfig)"
+              - name: ARTIFACT_DIR
+                value: "/workspace/artifacts"
+              - name: KONFLUX_BOOL
+                value: "true"
+              - name: LIGHTSPEED_STACK_IMAGE
+                value: "$(params.lightspeedstackimage)"
+              - name: NAMESPACE
+                value: "$(params.namespace)"
+              - name: SNAPSHOT
+                value: $(params.SNAPSHOT)
+              - name: VLLM_URL
+                value: "http://$(params.vllm-host):8000"
+              - name: VLLM_API_KEY
+                value: "$(params.vllm-api-key)"
+              - name: VLLM_MODEL
+                value: "$(params.model)"
+            image: registry.access.redhat.com/ubi9/ubi-minimal
+            script: |
+              set +e
+              echo "[e2e] 1/8 Starting RHEL AI e2e tests"
+              echo "[e2e] 2/8 Installing deps..."
+              microdnf -y install git tar jq curl-minimal python3 gettext
+              echo "[e2e] 3/8 Downloading oc client..."
+              curl -sL -o oc.tar.gz https://mirror.openshift.com/pub/openshift-v4/x86_64/clients/ocp/latest-4.19/openshift-client-linux-amd64-rhel9.tar.gz
+              tar -xzf oc.tar.gz && chmod +x kubectl oc && mv oc kubectl /usr/local/bin/
+              echo "[e2e] 4/8 VLLM_URL=$VLLM_URL"
+              REPO_URL=$(jq -r '.components[] | select(.name == "lightspeed-stack") | .source.git.url // "https://github.com/lightspeed-core/lightspeed-stack.git"' <<< "$SNAPSHOT")
+              REPO_REV=$(jq -r '.components[] | select(.name == "lightspeed-stack") | .source.git.revision // "main"' <<< "$SNAPSHOT")
+              echo "[e2e] 5/8 Clone $REPO_URL @ $REPO_REV"
+              git clone -q "$REPO_URL" /workspace/lightspeed-stack
+              cd /workspace/lightspeed-stack && git fetch origin "$REPO_REV" && git checkout -q "$REPO_REV"
+              echo "[e2e] 6/8 Entering tests/e2e-prow/rhoai"
+              cd tests/e2e-prow/rhoai && chmod +x pipeline-konflux.sh
+              echo "[e2e] 7/8 Running pipeline-konflux.sh with RHEL AI configs..."
+              export LLAMA_STACK_CONFIG="$(cd ../../.. && pwd)/tests/e2e/configs/run-rhelai.yaml"
+              export LCS_CONFIG="$(cd ../../.. && pwd)/tests/e2e/configuration/server-mode/lightspeed-stack-rhelai.yaml"
+              ./pipeline-konflux.sh
+              PIPELINE_EXIT=$?
+              echo "[e2e] 8/8 pipeline-konflux.sh exited with code $PIPELINE_EXIT"
+              exit $PIPELINE_EXIT
+
+  finally:
+    - name: destroy-rhelai
+      description: Tear down the RHEL AI instance regardless of pipeline outcome.
+      params:
+        - name: debug
+          value: "$(params.debug)"
+      taskSpec:
+        params:
+          - name: debug
+        volumes:
+          - name: aws-credentials
+            secret:
+              secretName: aws-mapt-credentials
+          - name: oidc-token
+            projected:
+              sources:
+                - serviceAccountToken:
+                    audience: sts.amazonaws.com
+                    expirationSeconds: 3600
+                    path: token
+        steps:
+          - name: destroy-instance
+            # TODO(are-ces): pin to v0.14.2 once released
+            image: quay.io/redhat-developer/mapt:v1.0.0-dev
+            imagePullPolicy: Always
+            env:
+              - name: HOME
+                value: /opt/mapt/run
+              - name: AWS_WEB_IDENTITY_TOKEN_FILE
+                value: /var/run/secrets/oidc/token
+              - name: AWS_ROLE_ARN
+                valueFrom:
+                  secretKeyRef:
+                    name: aws-mapt-credentials
+                    key: AWS_ROLE_ARN
+            volumeMounts:
+              - name: aws-credentials
+                mountPath: /opt/aws-credentials
+                readOnly: true
+              - name: oidc-token
+                mountPath: /var/run/secrets/oidc
+                readOnly: true
+            resources:
+              requests:
+                cpu: "100m"
+                memory: "1Gi"
+              limits:
+                cpu: "300m"
+                memory: "2Gi"
+            script: |
+              #!/bin/sh
+              set -uo pipefail
+
+              export AWS_DEFAULT_REGION="us-east-1"
+              BUCKET=$(cat /opt/aws-credentials/S3_BUCKET)
+              RUN_ID="$(context.pipelineRun.name)"
+
+              echo "[mapt] Destroying RHEL AI instance (run: ${RUN_ID})..."
+              mapt aws rhel-ai destroy \
+                  --project-name "mapt-rhel-ai-${RUN_ID}" \
+                  --backed-url "s3://${BUCKET}/mapt/rhel-ai/${RUN_ID}" \
+                  --force-destroy \
+              && echo "[mapt] Destroy completed." \
+              || echo "[mapt] Destroy skipped or failed (no resources to clean up)."
diff --git a/examples/vllm-rhelai.yaml b/examples/vllm-rhelai.yaml
index c363a8b7b..4fa263387 100644
--- a/examples/vllm-rhelai.yaml
+++ b/examples/vllm-rhelai.yaml
@@ -12,7 +12,7 @@ apis:
 - scoring
 - tool_runtime
 - vector_io
-      
+
 benchmarks: []
 datasets: []
 # external_providers_dir: /opt/app-root/src/.llama/providers.d
@@ -22,14 +22,15 @@ providers:
   - provider_id: vllm
     provider_type: remote::vllm
     config:
-      url: http://${env.RHEL_AI_URL}:${env.RHEL_AI_PORT}/v1/
-      api_token: ${env.RHEL_AI_API_KEY}
+      base_url: ${env.VLLM_URL}/v1/
+      api_token: ${env.VLLM_API_KEY:=}
       tls_verify: false
       max_tokens: 2048
   - provider_id: openai
     provider_type: remote::openai
     config:
       api_key: ${env.OPENAI_API_KEY}
+      allowed_models: ["gpt-4o-mini"]
   - config: {}
     provider_id: sentence-transformers
     provider_type: inline::sentence-transformers
@@ -47,20 +48,24 @@ providers:
     provider_id: llama-guard
     provider_type: inline::llama-guard
   scoring:
-  - config: {}
-    provider_id: basic
+  - provider_id: basic
     provider_type: inline::basic
+    config: {}
+  - provider_id: llm-as-judge
+    provider_type: inline::llm-as-judge
+    config: {}
+  - provider_id: braintrust
+    provider_type: inline::braintrust
+    config:
+      openai_api_key: '********'
   tool_runtime:
-  - config: {}
+  - config: {} # Enable the RAG tool
     provider_id: rag-runtime
     provider_type: inline::rag-runtime
-  vector_io:
-  - config:
-      persistence:
-        namespace: vector_io::faiss
-        backend: kv_default
-    provider_id: faiss
-    provider_type: inline::faiss
+  - config: {} # Enable MCP (Model Context Protocol) support
+    provider_id: model-context-protocol
+    provider_type: remote::model-context-protocol
+  vector_io: []
   agents:
   - config:
       persistence:
@@ -106,7 +111,7 @@ storage:
   backends:
     kv_default:
       type: kv_sqlite
-      db_path: ${env.KV_STORE_PATH:=~/.llama/storage/rag/kv_store.db}
+      db_path: ${env.KV_STORE_PATH:=~/.llama/storage/kv_store.db}
     sql_default:
       type: sql_sqlite
       db_path: ${env.SQL_STORE_PATH:=~/.llama/storage/sql_store.db}
@@ -127,24 +132,31 @@ storage:
       backend: kv_default
 registered_resources:
   models:
-  - model_id: ${env.RHEL_AI_MODEL}
-    provider_id: vllm
+  - model_id: ${env.VLLM_MODEL}
     model_type: llm
-    provider_model_id: ${env.RHEL_AI_MODEL}
+    provider_id: vllm
+    provider_model_id: ${env.VLLM_MODEL}
+  - model_id: all-mpnet-base-v2
+    model_type: embedding
+    provider_id: sentence-transformers
+    provider_model_id: all-mpnet-base-v2
+    metadata:
+      embedding_dimension: 768
   shields:
   - shield_id: llama-guard
     provider_id: llama-guard
     provider_shield_id: openai/gpt-4o-mini
+  vector_stores: []
   datasets: []
   scoring_fns: []
   benchmarks: []
   tool_groups:
-  - toolgroup_id: builtin::rag
+  - toolgroup_id: builtin::rag # Register the RAG tool
     provider_id: rag-runtime
 vector_stores:
   default_provider_id: faiss
-  default_embedding_model:
+  default_embedding_model: # Define the default embedding model for RAG
     provider_id: sentence-transformers
-    model_id: nomic-ai/nomic-embed-text-v1.5
+    model_id: all-mpnet-base-v2
 safety:
   default_shield_id: llama-guard
diff --git a/tests/e2e-prow/rhoai/manifests/lightspeed/lightspeed-stack.yaml b/tests/e2e-prow/rhoai/manifests/lightspeed/lightspeed-stack.yaml
index c20efec8c..04fae492a 100644
--- a/tests/e2e-prow/rhoai/manifests/lightspeed/lightspeed-stack.yaml
+++ b/tests/e2e-prow/rhoai/manifests/lightspeed/lightspeed-stack.yaml
@@ -30,6 +30,12 @@ spec:
               optional: true
         - name: KV_RAG_PATH
           value: "/app-root/src/.llama/storage/rag/kv_store.db"
+        - name: VLLM_MODEL
+          valueFrom:
+            secretKeyRef:
+              name: vllm-model-secret
+              key: key
+              optional: true
       image: ${LIGHTSPEED_STACK_IMAGE}
       ports:
         - containerPort: 8080
diff --git a/tests/e2e-prow/rhoai/manifests/lightspeed/llama-stack-openai.yaml b/tests/e2e-prow/rhoai/manifests/lightspeed/llama-stack-openai.yaml
index 4a4cb261e..80651a3cb 100644
--- a/tests/e2e-prow/rhoai/manifests/lightspeed/llama-stack-openai.yaml
+++ b/tests/e2e-prow/rhoai/manifests/lightspeed/llama-stack-openai.yaml
@@ -143,8 +143,27 @@ spec:
             secretKeyRef:
               name: openai-api-key-secret
               key: key
+              optional: true
         - name: E2E_OPENAI_MODEL
           value: "gpt-4o-mini"
+        - name: VLLM_URL
+          valueFrom:
+            secretKeyRef:
+              name: vllm-url-secret
+              key: key
+              optional: true
+        - name: VLLM_API_KEY
+          valueFrom:
+            secretKeyRef:
+              name: vllm-api-key-secret
+              key: key
+              optional: true
+        - name: VLLM_MODEL
+          valueFrom:
+            secretKeyRef:
+              name: vllm-model-secret
+              key: key
+              optional: true
         - name: FAISS_VECTOR_STORE_ID
           valueFrom:
             secretKeyRef:
diff --git a/tests/e2e-prow/rhoai/pipeline-konflux.sh b/tests/e2e-prow/rhoai/pipeline-konflux.sh
index d6deb13bc..c20cbbac0 100755
--- a/tests/e2e-prow/rhoai/pipeline-konflux.sh
+++ b/tests/e2e-prow/rhoai/pipeline-konflux.sh
@@ -1,5 +1,6 @@
 #!/bin/bash
-# Konflux / OpenAI integration E2E: Llama Stack run-from-source + tests/e2e/configs/run-ci.yaml.
+# Konflux integration E2E: Llama Stack run-from-source + configurable inference provider.
+# Default: OpenAI (run-ci.yaml). For RHEL AI vLLM: set LLAMA_STACK_CONFIG and LCS_CONFIG env vars.
 # Prow (vLLM) workflow uses pipeline.sh unchanged.
 set -euo pipefail
 trap 'echo "❌ Pipeline failed at line $LINENO"; exit 1' ERR
@@ -50,7 +51,10 @@ fi
 
 [[ -n "$QUAY_ROBOT_NAME" ]] && log "✅ QUAY_ROBOT_NAME is set" || { echo "❌ Missing QUAY_ROBOT_NAME"; exit 1; }
 [[ -n "$QUAY_ROBOT_PASSWORD" ]] && log "✅ QUAY_ROBOT_PASSWORD is set" || { echo "❌ Missing QUAY_ROBOT_PASSWORD"; exit 1; }
-[[ -n "$OPENAI_API_KEY" ]] && log "✅ OPENAI_API_KEY is set" || { echo "❌ Missing OPENAI_API_KEY"; exit 1; }
+[[ -n "${OPENAI_API_KEY:-}" ]] && log "✅ OPENAI_API_KEY is set" || { echo "❌ Missing OPENAI_API_KEY"; exit 1; }
+if [[ -n "${VLLM_URL:-}" ]]; then
+  log "✅ VLLM_URL is set: $VLLM_URL (RHEL AI mode)"
+fi
 
 # Basic info (skip when QUIET to keep Konflux UI focused on test logs)
 if [ "$QUIET" != "1" ]; then ls -A || true; oc version; oc whoami; fi
@@ -68,6 +72,11 @@ create_secret() {
 }
 
 create_secret openai-api-key-secret --from-literal=key="$OPENAI_API_KEY"
+if [[ -n "${VLLM_URL:-}" ]]; then
+  create_secret vllm-url-secret --from-literal=key="$VLLM_URL"
+  create_secret vllm-api-key-secret --from-literal=key="${VLLM_API_KEY:-}"
+  create_secret vllm-model-secret --from-literal=key="${VLLM_MODEL:-meta-llama/Llama-3.2-1B-Instruct}"
+fi
 
 # MCPFileAuth E2E: secret mounted at /tmp/mcp-token in LCS pod (same as docker-compose)
 if [ -f "$REPO_ROOT/tests/e2e/secrets/mcp-token" ]; then
@@ -162,13 +171,16 @@ spec:
 EOF
 log "✅ llama-stack-app-root PVC created"
 
-# Llama run config: single source with GitHub E2E (tests/e2e/configs/run-ci.yaml).
-# Lightspeed stack: same tree as local/docker E2E (tests/e2e/configuration/server-mode).
+# Configurable config paths: default to OpenAI, override for RHEL AI / vLLM.
+LLAMA_STACK_CONFIG="${LLAMA_STACK_CONFIG:-$REPO_ROOT/tests/e2e/configs/run-ci.yaml}"
+LCS_CONFIG="${LCS_CONFIG:-$REPO_ROOT/tests/e2e/configuration/server-mode/lightspeed-stack.yaml}"
+log "Llama Stack config: $LLAMA_STACK_CONFIG"
+log "LCS config: $LCS_CONFIG"
 oc create configmap llama-stack-config -n "$NAMESPACE" \
-  --from-file=run.yaml="$REPO_ROOT/tests/e2e/configs/run-ci.yaml" \
+  --from-file=run.yaml="$LLAMA_STACK_CONFIG" \
   --dry-run=client -o yaml | oc apply -f -
 oc create configmap lightspeed-stack-config -n "$NAMESPACE" \
-  --from-file=lightspeed-stack.yaml="$REPO_ROOT/tests/e2e/configuration/server-mode/lightspeed-stack.yaml" \
+  --from-file=lightspeed-stack.yaml="$LCS_CONFIG" \
   --dry-run=client -o yaml | oc apply -f -
 
 # Create RAG data ConfigMap from the e2e test RAG data
@@ -370,8 +382,13 @@ export E2E_LLAMA_PORT="8321"
 # Same pattern as tests/e2e-prow/rhoai/pipeline.sh and .github/workflows/e2e_tests_*.yaml:
 # Behave {MODEL}/{PROVIDER} use these when set; avoids wrong fallbacks if /v1/models
 # discovery in before_all is empty (matches run-ci.yaml openai + E2E_OPENAI_MODEL).
-: "${E2E_DEFAULT_PROVIDER_OVERRIDE:=openai}"
-: "${E2E_DEFAULT_MODEL_OVERRIDE:=${E2E_OPENAI_MODEL:-gpt-4o-mini}}"
+if [[ -n "${VLLM_URL:-}" ]]; then
+  : "${E2E_DEFAULT_PROVIDER_OVERRIDE:=vllm}"
+  : "${E2E_DEFAULT_MODEL_OVERRIDE:=${VLLM_MODEL:-meta-llama/Llama-3.2-1B-Instruct}}"
+else
+  : "${E2E_DEFAULT_PROVIDER_OVERRIDE:=openai}"
+  : "${E2E_DEFAULT_MODEL_OVERRIDE:=${E2E_OPENAI_MODEL:-gpt-4o-mini}}"
+fi
 export E2E_DEFAULT_PROVIDER_OVERRIDE E2E_DEFAULT_MODEL_OVERRIDE
 log "LCS accessible at: http://$E2E_LSC_HOSTNAME:8080"
 log "Mock JWKS accessible at: http://$E2E_JWKS_HOSTNAME:8000"
diff --git a/tests/e2e/configs/run-rhelai.yaml b/tests/e2e/configs/run-rhelai.yaml
index 0edbb4d76..4fa263387 100644
--- a/tests/e2e/configs/run-rhelai.yaml
+++ b/tests/e2e/configs/run-rhelai.yaml
@@ -22,15 +22,15 @@ providers:
   - provider_id: vllm
     provider_type: remote::vllm
     config:
-      url: http://${env.RHEL_AI_URL}:${env.RHEL_AI_PORT}/v1/
-      api_token: ${env.RHEL_AI_API_KEY}
+      base_url: ${env.VLLM_URL}/v1/
+      api_token: ${env.VLLM_API_KEY:=}
       tls_verify: false
       max_tokens: 2048
   - provider_id: openai
     provider_type: remote::openai
     config:
       api_key: ${env.OPENAI_API_KEY}
-      allowed_models: ["${env.E2E_OPENAI_MODEL:=gpt-4o-mini}"]
+      allowed_models: ["gpt-4o-mini"]
   - config: {}
     provider_id: sentence-transformers
     provider_type: inline::sentence-transformers
@@ -132,10 +132,10 @@ storage:
       backend: kv_default
 registered_resources:
   models:
-  - model_id: ${env.RHEL_AI_MODEL}
-    provider_id: vllm
+  - model_id: ${env.VLLM_MODEL}
     model_type: llm
-    provider_model_id: ${env.RHEL_AI_MODEL}
+    provider_id: vllm
+    provider_model_id: ${env.VLLM_MODEL}
   - model_id: all-mpnet-base-v2
     model_type: embedding
     provider_id: sentence-transformers
diff --git a/tests/e2e/configuration/server-mode/lightspeed-stack-rhelai.yaml b/tests/e2e/configuration/server-mode/lightspeed-stack-rhelai.yaml
new file mode 100644
index 000000000..4313c7605
--- /dev/null
+++ b/tests/e2e/configuration/server-mode/lightspeed-stack-rhelai.yaml
@@ -0,0 +1,35 @@
+name: Lightspeed Core Service (LCS)
+service:
+  host: 0.0.0.0
+  port: 8080
+  auth_enabled: false
+  workers: 1
+  color_log: true
+  access_log: true
+llama_stack:
+  # Server mode - connects to separate llama-stack service
+  use_as_library_client: false
+  url: http://${env.E2E_LLAMA_HOSTNAME}:8321
+  api_key: xyzzy
+user_data_collection:
+  feedback_enabled: true
+  feedback_storage: "/tmp/data/feedback"
+  transcripts_enabled: true
+  transcripts_storage: "/tmp/data/transcripts"
+authentication:
+  module: "noop"
+inference:
+  default_provider: vllm
+  default_model: ${env.VLLM_MODEL}
+byok_rag:
+  - rag_id: e2e-test-docs
+    rag_type: inline::faiss
+    embedding_model: sentence-transformers/all-mpnet-base-v2
+    embedding_dimension: 768
+    vector_db_id: ${env.FAISS_VECTOR_STORE_ID}
+    db_path: ${env.KV_RAG_PATH:=~/.llama/storage/rag/kv_store.db}
+    score_multiplier: 1.0
+
+rag:
+  tool:
+    - e2e-test-docs