diff --git a/.tekton/integration-tests/README.md b/.tekton/integration-tests/README.md new file mode 100644 index 000000000..4f8737600 --- /dev/null +++ b/.tekton/integration-tests/README.md @@ -0,0 +1,38 @@ +# Konflux Integration Tests + +## Pipelines + +### OpenAI E2E (`lightspeed-stack-integration-test.yaml`) + +Standard e2e test pipeline using OpenAI as the inference provider. + +### RHEL AI E2E (`lightspeed-stack-rhelai-test.yaml`) + +E2e test pipeline using a RHEL AI instance (vLLM) as the inference provider. + +## RHEL AI Pipeline — MAPT + +[MAPT](https://github.com/redhat-developer/mapt) (Multi-Architecture Provisioning Tool) provisions bare-metal-like cloud instances with GPU support. We use it to spin up RHEL AI instances on AWS with vLLM pre-configured, since the e2e tests need a real GPU-backed inference endpoint that OpenShift ephemeral clusters don't provide. + +## RHEL AI Pipeline — S3 State Bucket + +MAPT uses an S3 bucket to store Pulumi state for provisioning RHEL AI instances. Each pipeline run creates its own prefix under the bucket (`mapt/rhel-ai//`) so concurrent runs don't conflict. + +The bucket has a **31-day lifecycle rule** that auto-deletes stale state files from failed runs. MAPT's `CleanupState` also removes state after a successful destroy, but this doesn't always run if the pipeline is interrupted. + +## RHEL AI Instance Provisioning + +The pipeline supports two provisioning modes, controlled by the `spot` pipeline parameter (default: `true`): + +- **Spot (default):** MAPT searches all AWS regions for the cheapest spot instance across multiple GPU instance types (g5.12xlarge, g6.12xlarge, g5.24xlarge, g6.24xlarge). Cheaper (~$2-4/hr) but instances can be evicted. +- **On-demand (`spot: false`):** Tries on-demand provisioning sequentially across regions (us-east-1, us-east-2, us-west-2, eu-west-1, eu-central-1, ap-northeast-1) with a 10-minute timeout per attempt. More expensive (~$5-6/hr) but guaranteed capacity once found. + +The model used is `meta-llama/Llama-3.1-8B-Instruct` with a 131072-token context window. The VRAM requirement depends on the combination of model size and context window — changing any of these may require a different instance type. All configured instance types provide 96GB+ total VRAM across 4 GPUs. + +## RHEL AI AMI Version + +The pipeline defaults to RHEL AI version `3.4.0` (GA). This corresponds to a specific AWS AMI that MAPT looks up by name pattern (`rhel-ai-cuda-aws-3.4.0*`). When a new RHEL AI version is released, update the `rhelai-version` default in the pipeline YAML to use the new AMI. Available versions can be listed with: + +```bash +mapt aws rhel-ai list-versions +``` \ No newline at end of file diff --git a/.tekton/integration-tests/pipeline/lightspeed-stack-rhelai-test.yaml b/.tekton/integration-tests/pipeline/lightspeed-stack-rhelai-test.yaml new file mode 100644 index 000000000..26a2a95ad --- /dev/null +++ b/.tekton/integration-tests/pipeline/lightspeed-stack-rhelai-test.yaml @@ -0,0 +1,489 @@ +--- +apiVersion: tekton.dev/v1beta1 +kind: Pipeline +metadata: + name: lightspeed-stack-rhelai-tests-pipeline +spec: + description: | + This pipeline provisions a RHEL AI instance on AWS (vLLM), an ephemeral + OpenShift cluster, deploys lightspeed-stack configured to use the RHEL AI + vLLM as inference provider, and runs the full e2e test suite. + params: + - name: SNAPSHOT + description: 'The JSON string representing the snapshot of the application under test.' + default: '{"components": [{"name":"lightspeed-stack", "containerImage": "quay.io/example/lightspeed-stack:latest"}]}' + type: string + - name: test-name + description: 'The name of the test corresponding to a defined Konflux integration test.' + default: 'lightspeed-stack-rhelai-tests' + - name: rhelai-version + description: 'RHEL AI version to provision.' + default: '3.4.0' + type: string + - name: instance-type + description: 'Comma-separated list of EC2 instance types for MAPT.' + default: 'g5.12xlarge,g6.12xlarge,g5.24xlarge,g6.24xlarge' + type: string + - name: spot + description: 'Use spot instances (cheaper, searches all regions). If false, uses on-demand with region fallback.' + default: 'true' + type: string + - name: model + description: 'HuggingFace model to serve on RHEL AI.' + default: 'meta-llama/Llama-3.1-8B-Instruct' + type: string + - name: namespace + description: 'Namespace to run tests in' + default: 'lightspeed-stack' + - name: debug + description: 'Enable debug output.' + default: 'false' + type: string + tasks: + # ── RHEL AI provisioning (MAPT) ── + - name: provision-rhelai + description: Provision a RHEL AI instance with vLLM auto-started. + timeout: 2h + runAfter: + - provision-cluster + taskSpec: + params: + - name: rhelai-version + - name: instance-type + - name: model + - name: debug + - name: spot + results: + - name: host + description: "RHEL AI instance hostname or IP" + - name: username + description: "SSH username for the instance" + - name: vllm-api-key + description: "Random API key for vLLM authentication" + volumes: + - name: aws-credentials + secret: + secretName: aws-mapt-credentials + - name: host-info + emptyDir: {} + - name: oidc-token + projected: + sources: + - serviceAccountToken: + audience: sts.amazonaws.com + expirationSeconds: 3600 + path: token + steps: + - name: create-instance + # TODO(are-ces): pin to v0.14.2 once released + image: quay.io/redhat-developer/mapt:v1.0.0-dev + imagePullPolicy: Always + env: + - name: HOME + value: /opt/mapt/run + - name: AWS_WEB_IDENTITY_TOKEN_FILE + value: /var/run/secrets/oidc/token + - name: AWS_ROLE_ARN + valueFrom: + secretKeyRef: + name: aws-mapt-credentials + key: AWS_ROLE_ARN + volumeMounts: + - name: aws-credentials + mountPath: /opt/aws-credentials + readOnly: true + - name: host-info + mountPath: /opt/host-info + - name: oidc-token + mountPath: /var/run/secrets/oidc + readOnly: true + resources: + requests: + cpu: "100m" + memory: "1Gi" + limits: + cpu: "300m" + memory: "2Gi" + script: | + #!/bin/sh + set -uo pipefail + + BUCKET=$(cat /opt/aws-credentials/S3_BUCKET) + RUN_ID="$(context.pipelineRun.name)" + HF_TOKEN=$(cat /opt/aws-credentials/HUGGING_FACE_HUB_TOKEN) + VLLM_API_KEY=$(head -c 32 /dev/urandom | od -An -tx1 | tr -d ' \n') + + echo "[mapt] ========== Version Info ==========" + echo "[mapt] RHEL AI AMI version: $(params.rhelai-version)" + echo "[mapt] Model: $(params.model)" + echo "[mapt] Run ID: ${RUN_ID}" + echo "[mapt] Spot: $(params.spot)" + echo "[mapt] ==================================" + + CREATED=0 + + if [[ "$(params.spot)" == "true" ]]; then + export AWS_DEFAULT_REGION="us-east-1" + echo "[mapt] Using spot instances (searching all regions)..." + if mapt aws rhel-ai create \ + --project-name "mapt-rhel-ai-${RUN_ID}" \ + --backed-url "s3://${BUCKET}/mapt/rhel-ai/${RUN_ID}" \ + --conn-details-output /opt/host-info \ + --compute-sizes "$(params.instance-type)" \ + --version "$(params.rhelai-version)" \ + --spot --spot-eviction-tolerance highest \ + --auto-start \ + --model "$(params.model)" \ + --hf-token "${HF_TOKEN}" \ + --api-key "${VLLM_API_KEY}" \ + --expose-ports 8000 \ + --vllm-extra-args "--max-model-len 131072 --enable-auto-tool-choice --tool-call-parser llama3_json --chat-template /opt/app-root/template/tool_chat_template_llama3.1_json.jinja" \ + --tags "project=lightspeed-core,environment=konflux-ci"; then + CREATED=1 + else + echo "[mapt] ERROR: Spot instance creation failed" + exit 1 + fi + else + REGIONS="us-east-1 us-east-2 us-west-2 eu-west-1 eu-central-1 ap-northeast-1" + TIMEOUT=600 + + for REGION in $REGIONS; do + echo "[mapt] Trying on-demand in ${REGION}..." + export AWS_DEFAULT_REGION="$REGION" + + if timeout $TIMEOUT mapt aws rhel-ai create \ + --project-name "mapt-rhel-ai-${RUN_ID}" \ + --backed-url "s3://${BUCKET}/mapt/rhel-ai/${RUN_ID}" \ + --conn-details-output /opt/host-info \ + --compute-sizes "$(params.instance-type)" \ + --version "$(params.rhelai-version)" \ + --auto-start \ + --model "$(params.model)" \ + --hf-token "${HF_TOKEN}" \ + --api-key "${VLLM_API_KEY}" \ + --expose-ports 8000 \ + --vllm-extra-args "--max-model-len 131072 --enable-auto-tool-choice --tool-call-parser llama3_json --chat-template /opt/app-root/template/tool_chat_template_llama3.1_json.jinja" \ + --tags "project=lightspeed-core,environment=konflux-ci"; then + CREATED=1 + break + fi + + echo "[mapt] Failed in ${REGION}, cleaning up and trying next..." + mapt aws rhel-ai destroy \ + --project-name "mapt-rhel-ai-${RUN_ID}" \ + --backed-url "s3://${BUCKET}/mapt/rhel-ai/${RUN_ID}" \ + --force-destroy 2>/dev/null || true + done + + if [ "$CREATED" -ne 1 ]; then + echo "[mapt] ERROR: Failed to create instance in any region" + exit 1 + fi + fi + + echo "[mapt] Instance created and vLLM started." + echo -n "${VLLM_API_KEY}" > $(results.vllm-api-key.path) + echo -n "$(cat /opt/host-info/host)" > $(results.host.path) + echo -n "$(cat /opt/host-info/username)" > $(results.username.path) + echo "[mapt] Host: $(cat /opt/host-info/host)" + echo "[mapt] User: $(cat /opt/host-info/username)" + params: + - name: rhelai-version + value: "$(params.rhelai-version)" + - name: instance-type + value: "$(params.instance-type)" + - name: model + value: "$(params.model)" + - name: debug + value: "$(params.debug)" + - name: spot + value: "$(params.spot)" + + # ── OpenShift cluster provisioning (same as existing e2e pipeline) ── + - name: eaas-provision-space + taskRef: + resolver: git + params: + - name: url + value: https://github.com/konflux-ci/build-definitions.git + - name: revision + value: main + - name: pathInRepo + value: task/eaas-provision-space/0.1/eaas-provision-space.yaml + params: + - name: ownerKind + value: PipelineRun + - name: ownerName + value: $(context.pipelineRun.name) + - name: ownerUid + value: $(context.pipelineRun.uid) + + - name: provision-cluster + runAfter: + - eaas-provision-space + taskSpec: + results: + - name: clusterName + value: "$(steps.create-cluster.results.clusterName)" + steps: + - name: pick-version + ref: + resolver: git + params: + - name: url + value: https://github.com/konflux-ci/build-definitions.git + - name: revision + value: main + - name: pathInRepo + value: stepactions/eaas-get-latest-openshift-version-by-prefix/0.1/eaas-get-latest-openshift-version-by-prefix.yaml + params: + - name: prefix + value: "4.19." + - name: create-cluster + ref: + resolver: git + params: + - name: url + value: https://github.com/konflux-ci/build-definitions.git + - name: revision + value: main + - name: pathInRepo + value: stepactions/eaas-create-ephemeral-cluster-hypershift-aws/0.1/eaas-create-ephemeral-cluster-hypershift-aws.yaml + params: + - name: eaasSpaceSecretRef + value: $(tasks.eaas-provision-space.results.secretRef) + - name: version + value: "$(steps.pick-version.results.version)" + - name: instanceType + value: "m5.large" + + - name: get-stack-images + description: Extract lightspeed-stack image and commit from SNAPSHOT + runAfter: + - provision-cluster + params: + - name: SNAPSHOT + value: $(params.SNAPSHOT) + taskSpec: + results: + - name: lightspeed-stack-image + value: "$(steps.get-stack-images.results.lightspeed-stack-image)" + - name: commit + value: "$(steps.get-stack-images.results.commit)" + params: + - name: SNAPSHOT + steps: + - name: get-stack-images + image: registry.redhat.io/openshift4/ose-cli:latest + env: + - name: SNAPSHOT + value: $(params.SNAPSHOT) + results: + - name: lightspeed-stack-image + type: string + - name: commit + type: string + script: | + dnf -y install jq + echo -n "$(jq -r --arg n "lightspeed-stack" '.components[] | select(.name == $n) | .containerImage // ""' <<< "$SNAPSHOT")" > $(step.results.lightspeed-stack-image.path) + echo -n "$(jq -r --arg n "lightspeed-stack" '.components[] | select(.name == $n) | .source.git.revision // "latest"' <<< "$SNAPSHOT")" > $(step.results.commit.path) + + # ── Full E2E tests (runs after both RHEL AI and OpenShift are ready) ── + - name: rhelai-e2e-tests + description: Run full e2e test suite with RHEL AI vLLM as inference provider + timeout: 3h + runAfter: + - provision-rhelai + - get-stack-images + params: + - name: SNAPSHOT + value: $(params.SNAPSHOT) + - name: lightspeedstackimage + value: $(tasks.get-stack-images.results.lightspeed-stack-image) + - name: commit + value: $(tasks.get-stack-images.results.commit) + - name: namespace + value: "$(params.namespace)" + - name: spaceRequestSecretName + value: $(tasks.eaas-provision-space.results.secretRef) + - name: clusterName + value: $(tasks.provision-cluster.results.clusterName) + - name: vllm-host + value: "$(tasks.provision-rhelai.results.host)" + - name: vllm-api-key + value: "$(tasks.provision-rhelai.results.vllm-api-key)" + - name: model + value: "$(params.model)" + taskSpec: + params: + - name: SNAPSHOT + - name: lightspeedstackimage + - name: commit + - name: namespace + type: string + - name: spaceRequestSecretName + type: string + - name: clusterName + type: string + - name: vllm-host + type: string + - name: vllm-api-key + type: string + - name: model + type: string + volumes: + - name: openai-api-key + secret: + secretName: openai-api-key + - name: quay-aipcc-name + secret: + secretName: quay-aipcc-name + - name: quay-aipcc-password + secret: + secretName: quay-aipcc-password + - name: credentials + emptyDir: {} + steps: + - name: get-kubeconfig + ref: + resolver: git + params: + - name: url + value: https://github.com/konflux-ci/build-definitions.git + - name: revision + value: main + - name: pathInRepo + value: stepactions/eaas-get-ephemeral-cluster-credentials/0.1/eaas-get-ephemeral-cluster-credentials.yaml + params: + - name: eaasSpaceSecretRef + value: $(params.spaceRequestSecretName) + - name: clusterName + value: "$(params.clusterName)" + - name: credentials + value: credentials + - name: run-e2e-tests + onError: continue + resources: + requests: + cpu: '1' + memory: 1Gi + limits: + memory: 10Gi + volumeMounts: + - name: openai-api-key + mountPath: /var/run/openai + - name: quay-aipcc-name + mountPath: /var/run/quay-aipcc-name + - name: quay-aipcc-password + mountPath: /var/run/quay-aipcc-password + - name: credentials + mountPath: /credentials + env: + - name: KUBECONFIG + value: "/credentials/$(steps.get-kubeconfig.results.kubeconfig)" + - name: ARTIFACT_DIR + value: "/workspace/artifacts" + - name: KONFLUX_BOOL + value: "true" + - name: LIGHTSPEED_STACK_IMAGE + value: "$(params.lightspeedstackimage)" + - name: NAMESPACE + value: "$(params.namespace)" + - name: SNAPSHOT + value: $(params.SNAPSHOT) + - name: VLLM_URL + value: "http://$(params.vllm-host):8000" + - name: VLLM_API_KEY + value: "$(params.vllm-api-key)" + - name: VLLM_MODEL + value: "$(params.model)" + image: registry.access.redhat.com/ubi9/ubi-minimal + script: | + set +e + echo "[e2e] 1/8 Starting RHEL AI e2e tests" + echo "[e2e] 2/8 Installing deps..." + microdnf -y install git tar jq curl-minimal python3 gettext + echo "[e2e] 3/8 Downloading oc client..." + curl -sL -o oc.tar.gz https://mirror.openshift.com/pub/openshift-v4/x86_64/clients/ocp/latest-4.19/openshift-client-linux-amd64-rhel9.tar.gz + tar -xzf oc.tar.gz && chmod +x kubectl oc && mv oc kubectl /usr/local/bin/ + echo "[e2e] 4/8 VLLM_URL=$VLLM_URL" + REPO_URL=$(jq -r '.components[] | select(.name == "lightspeed-stack") | .source.git.url // "https://github.com/lightspeed-core/lightspeed-stack.git"' <<< "$SNAPSHOT") + REPO_REV=$(jq -r '.components[] | select(.name == "lightspeed-stack") | .source.git.revision // "main"' <<< "$SNAPSHOT") + echo "[e2e] 5/8 Clone $REPO_URL @ $REPO_REV" + git clone -q "$REPO_URL" /workspace/lightspeed-stack + cd /workspace/lightspeed-stack && git fetch origin "$REPO_REV" && git checkout -q "$REPO_REV" + echo "[e2e] 6/8 Entering tests/e2e-prow/rhoai" + cd tests/e2e-prow/rhoai && chmod +x pipeline-konflux.sh + echo "[e2e] 7/8 Running pipeline-konflux.sh with RHEL AI configs..." + export LLAMA_STACK_CONFIG="$(cd ../../.. && pwd)/tests/e2e/configs/run-rhelai.yaml" + export LCS_CONFIG="$(cd ../../.. && pwd)/tests/e2e/configuration/server-mode/lightspeed-stack-rhelai.yaml" + ./pipeline-konflux.sh + PIPELINE_EXIT=$? + echo "[e2e] 8/8 pipeline-konflux.sh exited with code $PIPELINE_EXIT" + exit $PIPELINE_EXIT + + finally: + - name: destroy-rhelai + description: Tear down the RHEL AI instance regardless of pipeline outcome. + params: + - name: debug + value: "$(params.debug)" + taskSpec: + params: + - name: debug + volumes: + - name: aws-credentials + secret: + secretName: aws-mapt-credentials + - name: oidc-token + projected: + sources: + - serviceAccountToken: + audience: sts.amazonaws.com + expirationSeconds: 3600 + path: token + steps: + - name: destroy-instance + # TODO(are-ces): pin to v0.14.2 once released + image: quay.io/redhat-developer/mapt:v1.0.0-dev + imagePullPolicy: Always + env: + - name: HOME + value: /opt/mapt/run + - name: AWS_WEB_IDENTITY_TOKEN_FILE + value: /var/run/secrets/oidc/token + - name: AWS_ROLE_ARN + valueFrom: + secretKeyRef: + name: aws-mapt-credentials + key: AWS_ROLE_ARN + volumeMounts: + - name: aws-credentials + mountPath: /opt/aws-credentials + readOnly: true + - name: oidc-token + mountPath: /var/run/secrets/oidc + readOnly: true + resources: + requests: + cpu: "100m" + memory: "1Gi" + limits: + cpu: "300m" + memory: "2Gi" + script: | + #!/bin/sh + set -uo pipefail + + export AWS_DEFAULT_REGION="us-east-1" + BUCKET=$(cat /opt/aws-credentials/S3_BUCKET) + RUN_ID="$(context.pipelineRun.name)" + + echo "[mapt] Destroying RHEL AI instance (run: ${RUN_ID})..." + mapt aws rhel-ai destroy \ + --project-name "mapt-rhel-ai-${RUN_ID}" \ + --backed-url "s3://${BUCKET}/mapt/rhel-ai/${RUN_ID}" \ + --force-destroy \ + && echo "[mapt] Destroy completed." \ + || echo "[mapt] Destroy skipped or failed (no resources to clean up)." diff --git a/examples/vllm-rhelai.yaml b/examples/vllm-rhelai.yaml index c363a8b7b..4fa263387 100644 --- a/examples/vllm-rhelai.yaml +++ b/examples/vllm-rhelai.yaml @@ -12,7 +12,7 @@ apis: - scoring - tool_runtime - vector_io - + benchmarks: [] datasets: [] # external_providers_dir: /opt/app-root/src/.llama/providers.d @@ -22,14 +22,15 @@ providers: - provider_id: vllm provider_type: remote::vllm config: - url: http://${env.RHEL_AI_URL}:${env.RHEL_AI_PORT}/v1/ - api_token: ${env.RHEL_AI_API_KEY} + base_url: ${env.VLLM_URL}/v1/ + api_token: ${env.VLLM_API_KEY:=} tls_verify: false max_tokens: 2048 - provider_id: openai provider_type: remote::openai config: api_key: ${env.OPENAI_API_KEY} + allowed_models: ["gpt-4o-mini"] - config: {} provider_id: sentence-transformers provider_type: inline::sentence-transformers @@ -47,20 +48,24 @@ providers: provider_id: llama-guard provider_type: inline::llama-guard scoring: - - config: {} - provider_id: basic + - provider_id: basic provider_type: inline::basic + config: {} + - provider_id: llm-as-judge + provider_type: inline::llm-as-judge + config: {} + - provider_id: braintrust + provider_type: inline::braintrust + config: + openai_api_key: '********' tool_runtime: - - config: {} + - config: {} # Enable the RAG tool provider_id: rag-runtime provider_type: inline::rag-runtime - vector_io: - - config: - persistence: - namespace: vector_io::faiss - backend: kv_default - provider_id: faiss - provider_type: inline::faiss + - config: {} # Enable MCP (Model Context Protocol) support + provider_id: model-context-protocol + provider_type: remote::model-context-protocol + vector_io: [] agents: - config: persistence: @@ -106,7 +111,7 @@ storage: backends: kv_default: type: kv_sqlite - db_path: ${env.KV_STORE_PATH:=~/.llama/storage/rag/kv_store.db} + db_path: ${env.KV_STORE_PATH:=~/.llama/storage/kv_store.db} sql_default: type: sql_sqlite db_path: ${env.SQL_STORE_PATH:=~/.llama/storage/sql_store.db} @@ -127,24 +132,31 @@ storage: backend: kv_default registered_resources: models: - - model_id: ${env.RHEL_AI_MODEL} - provider_id: vllm + - model_id: ${env.VLLM_MODEL} model_type: llm - provider_model_id: ${env.RHEL_AI_MODEL} + provider_id: vllm + provider_model_id: ${env.VLLM_MODEL} + - model_id: all-mpnet-base-v2 + model_type: embedding + provider_id: sentence-transformers + provider_model_id: all-mpnet-base-v2 + metadata: + embedding_dimension: 768 shields: - shield_id: llama-guard provider_id: llama-guard provider_shield_id: openai/gpt-4o-mini + vector_stores: [] datasets: [] scoring_fns: [] benchmarks: [] tool_groups: - - toolgroup_id: builtin::rag + - toolgroup_id: builtin::rag # Register the RAG tool provider_id: rag-runtime vector_stores: default_provider_id: faiss - default_embedding_model: + default_embedding_model: # Define the default embedding model for RAG provider_id: sentence-transformers - model_id: nomic-ai/nomic-embed-text-v1.5 + model_id: all-mpnet-base-v2 safety: default_shield_id: llama-guard diff --git a/tests/e2e-prow/rhoai/manifests/lightspeed/lightspeed-stack.yaml b/tests/e2e-prow/rhoai/manifests/lightspeed/lightspeed-stack.yaml index c20efec8c..04fae492a 100644 --- a/tests/e2e-prow/rhoai/manifests/lightspeed/lightspeed-stack.yaml +++ b/tests/e2e-prow/rhoai/manifests/lightspeed/lightspeed-stack.yaml @@ -30,6 +30,12 @@ spec: optional: true - name: KV_RAG_PATH value: "/app-root/src/.llama/storage/rag/kv_store.db" + - name: VLLM_MODEL + valueFrom: + secretKeyRef: + name: vllm-model-secret + key: key + optional: true image: ${LIGHTSPEED_STACK_IMAGE} ports: - containerPort: 8080 diff --git a/tests/e2e-prow/rhoai/manifests/lightspeed/llama-stack-openai.yaml b/tests/e2e-prow/rhoai/manifests/lightspeed/llama-stack-openai.yaml index 4a4cb261e..80651a3cb 100644 --- a/tests/e2e-prow/rhoai/manifests/lightspeed/llama-stack-openai.yaml +++ b/tests/e2e-prow/rhoai/manifests/lightspeed/llama-stack-openai.yaml @@ -143,8 +143,27 @@ spec: secretKeyRef: name: openai-api-key-secret key: key + optional: true - name: E2E_OPENAI_MODEL value: "gpt-4o-mini" + - name: VLLM_URL + valueFrom: + secretKeyRef: + name: vllm-url-secret + key: key + optional: true + - name: VLLM_API_KEY + valueFrom: + secretKeyRef: + name: vllm-api-key-secret + key: key + optional: true + - name: VLLM_MODEL + valueFrom: + secretKeyRef: + name: vllm-model-secret + key: key + optional: true - name: FAISS_VECTOR_STORE_ID valueFrom: secretKeyRef: diff --git a/tests/e2e-prow/rhoai/pipeline-konflux.sh b/tests/e2e-prow/rhoai/pipeline-konflux.sh index d6deb13bc..c20cbbac0 100755 --- a/tests/e2e-prow/rhoai/pipeline-konflux.sh +++ b/tests/e2e-prow/rhoai/pipeline-konflux.sh @@ -1,5 +1,6 @@ #!/bin/bash -# Konflux / OpenAI integration E2E: Llama Stack run-from-source + tests/e2e/configs/run-ci.yaml. +# Konflux integration E2E: Llama Stack run-from-source + configurable inference provider. +# Default: OpenAI (run-ci.yaml). For RHEL AI vLLM: set LLAMA_STACK_CONFIG and LCS_CONFIG env vars. # Prow (vLLM) workflow uses pipeline.sh unchanged. set -euo pipefail trap 'echo "❌ Pipeline failed at line $LINENO"; exit 1' ERR @@ -50,7 +51,10 @@ fi [[ -n "$QUAY_ROBOT_NAME" ]] && log "✅ QUAY_ROBOT_NAME is set" || { echo "❌ Missing QUAY_ROBOT_NAME"; exit 1; } [[ -n "$QUAY_ROBOT_PASSWORD" ]] && log "✅ QUAY_ROBOT_PASSWORD is set" || { echo "❌ Missing QUAY_ROBOT_PASSWORD"; exit 1; } -[[ -n "$OPENAI_API_KEY" ]] && log "✅ OPENAI_API_KEY is set" || { echo "❌ Missing OPENAI_API_KEY"; exit 1; } +[[ -n "${OPENAI_API_KEY:-}" ]] && log "✅ OPENAI_API_KEY is set" || { echo "❌ Missing OPENAI_API_KEY"; exit 1; } +if [[ -n "${VLLM_URL:-}" ]]; then + log "✅ VLLM_URL is set: $VLLM_URL (RHEL AI mode)" +fi # Basic info (skip when QUIET to keep Konflux UI focused on test logs) if [ "$QUIET" != "1" ]; then ls -A || true; oc version; oc whoami; fi @@ -68,6 +72,11 @@ create_secret() { } create_secret openai-api-key-secret --from-literal=key="$OPENAI_API_KEY" +if [[ -n "${VLLM_URL:-}" ]]; then + create_secret vllm-url-secret --from-literal=key="$VLLM_URL" + create_secret vllm-api-key-secret --from-literal=key="${VLLM_API_KEY:-}" + create_secret vllm-model-secret --from-literal=key="${VLLM_MODEL:-meta-llama/Llama-3.2-1B-Instruct}" +fi # MCPFileAuth E2E: secret mounted at /tmp/mcp-token in LCS pod (same as docker-compose) if [ -f "$REPO_ROOT/tests/e2e/secrets/mcp-token" ]; then @@ -162,13 +171,16 @@ spec: EOF log "✅ llama-stack-app-root PVC created" -# Llama run config: single source with GitHub E2E (tests/e2e/configs/run-ci.yaml). -# Lightspeed stack: same tree as local/docker E2E (tests/e2e/configuration/server-mode). +# Configurable config paths: default to OpenAI, override for RHEL AI / vLLM. +LLAMA_STACK_CONFIG="${LLAMA_STACK_CONFIG:-$REPO_ROOT/tests/e2e/configs/run-ci.yaml}" +LCS_CONFIG="${LCS_CONFIG:-$REPO_ROOT/tests/e2e/configuration/server-mode/lightspeed-stack.yaml}" +log "Llama Stack config: $LLAMA_STACK_CONFIG" +log "LCS config: $LCS_CONFIG" oc create configmap llama-stack-config -n "$NAMESPACE" \ - --from-file=run.yaml="$REPO_ROOT/tests/e2e/configs/run-ci.yaml" \ + --from-file=run.yaml="$LLAMA_STACK_CONFIG" \ --dry-run=client -o yaml | oc apply -f - oc create configmap lightspeed-stack-config -n "$NAMESPACE" \ - --from-file=lightspeed-stack.yaml="$REPO_ROOT/tests/e2e/configuration/server-mode/lightspeed-stack.yaml" \ + --from-file=lightspeed-stack.yaml="$LCS_CONFIG" \ --dry-run=client -o yaml | oc apply -f - # Create RAG data ConfigMap from the e2e test RAG data @@ -370,8 +382,13 @@ export E2E_LLAMA_PORT="8321" # Same pattern as tests/e2e-prow/rhoai/pipeline.sh and .github/workflows/e2e_tests_*.yaml: # Behave {MODEL}/{PROVIDER} use these when set; avoids wrong fallbacks if /v1/models # discovery in before_all is empty (matches run-ci.yaml openai + E2E_OPENAI_MODEL). -: "${E2E_DEFAULT_PROVIDER_OVERRIDE:=openai}" -: "${E2E_DEFAULT_MODEL_OVERRIDE:=${E2E_OPENAI_MODEL:-gpt-4o-mini}}" +if [[ -n "${VLLM_URL:-}" ]]; then + : "${E2E_DEFAULT_PROVIDER_OVERRIDE:=vllm}" + : "${E2E_DEFAULT_MODEL_OVERRIDE:=${VLLM_MODEL:-meta-llama/Llama-3.2-1B-Instruct}}" +else + : "${E2E_DEFAULT_PROVIDER_OVERRIDE:=openai}" + : "${E2E_DEFAULT_MODEL_OVERRIDE:=${E2E_OPENAI_MODEL:-gpt-4o-mini}}" +fi export E2E_DEFAULT_PROVIDER_OVERRIDE E2E_DEFAULT_MODEL_OVERRIDE log "LCS accessible at: http://$E2E_LSC_HOSTNAME:8080" log "Mock JWKS accessible at: http://$E2E_JWKS_HOSTNAME:8000" diff --git a/tests/e2e/configs/run-rhelai.yaml b/tests/e2e/configs/run-rhelai.yaml index 0edbb4d76..4fa263387 100644 --- a/tests/e2e/configs/run-rhelai.yaml +++ b/tests/e2e/configs/run-rhelai.yaml @@ -22,15 +22,15 @@ providers: - provider_id: vllm provider_type: remote::vllm config: - url: http://${env.RHEL_AI_URL}:${env.RHEL_AI_PORT}/v1/ - api_token: ${env.RHEL_AI_API_KEY} + base_url: ${env.VLLM_URL}/v1/ + api_token: ${env.VLLM_API_KEY:=} tls_verify: false max_tokens: 2048 - provider_id: openai provider_type: remote::openai config: api_key: ${env.OPENAI_API_KEY} - allowed_models: ["${env.E2E_OPENAI_MODEL:=gpt-4o-mini}"] + allowed_models: ["gpt-4o-mini"] - config: {} provider_id: sentence-transformers provider_type: inline::sentence-transformers @@ -132,10 +132,10 @@ storage: backend: kv_default registered_resources: models: - - model_id: ${env.RHEL_AI_MODEL} - provider_id: vllm + - model_id: ${env.VLLM_MODEL} model_type: llm - provider_model_id: ${env.RHEL_AI_MODEL} + provider_id: vllm + provider_model_id: ${env.VLLM_MODEL} - model_id: all-mpnet-base-v2 model_type: embedding provider_id: sentence-transformers diff --git a/tests/e2e/configuration/server-mode/lightspeed-stack-rhelai.yaml b/tests/e2e/configuration/server-mode/lightspeed-stack-rhelai.yaml new file mode 100644 index 000000000..4313c7605 --- /dev/null +++ b/tests/e2e/configuration/server-mode/lightspeed-stack-rhelai.yaml @@ -0,0 +1,35 @@ +name: Lightspeed Core Service (LCS) +service: + host: 0.0.0.0 + port: 8080 + auth_enabled: false + workers: 1 + color_log: true + access_log: true +llama_stack: + # Server mode - connects to separate llama-stack service + use_as_library_client: false + url: http://${env.E2E_LLAMA_HOSTNAME}:8321 + api_key: xyzzy +user_data_collection: + feedback_enabled: true + feedback_storage: "/tmp/data/feedback" + transcripts_enabled: true + transcripts_storage: "/tmp/data/transcripts" +authentication: + module: "noop" +inference: + default_provider: vllm + default_model: ${env.VLLM_MODEL} +byok_rag: + - rag_id: e2e-test-docs + rag_type: inline::faiss + embedding_model: sentence-transformers/all-mpnet-base-v2 + embedding_dimension: 768 + vector_db_id: ${env.FAISS_VECTOR_STORE_ID} + db_path: ${env.KV_RAG_PATH:=~/.llama/storage/rag/kv_store.db} + score_multiplier: 1.0 + +rag: + tool: + - e2e-test-docs