diff --git a/changelog/unreleased/PR#4419-lenguage-model-test-refactoring.yml b/changelog/unreleased/PR#4419-lenguage-model-test-refactoring.yml new file mode 100644 index 000000000000..f74147d15203 --- /dev/null +++ b/changelog/unreleased/PR#4419-lenguage-model-test-refactoring.yml @@ -0,0 +1,9 @@ +title: Refactor the tests of the language-model module to accomodate new upcoming LLM oriented features avoiding code duplication. +type: fixed +authors: + - name: Nicolò Rinaldi + - name: Anna Ruggero + - name: Alessandro benedetti +links: + - name: PR#4419 + url: https://github.com/apache/solr/pull/4419 diff --git a/changelog/unreleased/SOLR-18187-llm-document-enrichment.yml b/changelog/unreleased/SOLR-18187-llm-document-enrichment.yml new file mode 100644 index 000000000000..dc8566985145 --- /dev/null +++ b/changelog/unreleased/SOLR-18187-llm-document-enrichment.yml @@ -0,0 +1,7 @@ +title: Add DocumentEnrichmentUpdateProcessorFactory for LLM-based document enrichment at index time +type: added # added, changed, fixed, deprecated, removed, dependency_update, security, other +authors: +- name: Nicolò Rinaldi, Anna Ruggero, Alessandro Benedetti +links: + - name: SOLR-18187 + url: https://issues.apache.org/jira/browse/SOLR-18187 diff --git a/gradle/libs.versions.toml b/gradle/libs.versions.toml index 7018c17a31ec..091e7e3fc5ff 100644 --- a/gradle/libs.versions.toml +++ b/gradle/libs.versions.toml @@ -240,8 +240,8 @@ amazon-awssdk-s3 = { module = "software.amazon.awssdk:s3", version.ref = "amazon amazon-awssdk-sdkcore = { module = "software.amazon.awssdk:sdk-core", version.ref = "amazon-awssdk" } amazon-awssdk-sts = { module = "software.amazon.awssdk:sts", version.ref = "amazon-awssdk" } androidx-lifecycle-runtimeCompose = { module = "org.jetbrains.androidx.lifecycle:lifecycle-runtime-compose", version.ref = "androidx-lifecycle" } -androidx-lifecycle-viewmodelCompose = { module = "org.jetbrains.androidx.lifecycle:lifecycle-viewmodel-compose", version.ref = "androidx-lifecycle" } androidx-lifecycle-viewModelNav3 = { module = "org.jetbrains.androidx.lifecycle:lifecycle-viewmodel-navigation3", version.ref = "androidx-lifecycle" } +androidx-lifecycle-viewmodelCompose = { module = "org.jetbrains.androidx.lifecycle:lifecycle-viewmodel-compose", version.ref = "androidx-lifecycle" } androidx-material3-adaptive = { module = "org.jetbrains.compose.material3.adaptive:adaptive", version.ref = "androidx-adaptive" } androidx-material3-adaptive-nav3 = { module = "org.jetbrains.compose.material3.adaptive:adaptive-navigation3", version.ref = "androidx-adaptive" } androidx-navigation3-ui = { module = "org.jetbrains.androidx.navigation3:navigation3-ui", version.ref = "androidx-navigation3" } @@ -459,11 +459,14 @@ ktor-client-serialization-json = { module = "io.ktor:ktor-serialization-kotlinx- ktor-server-cio = { module = "io.ktor:ktor-server-cio" } ktor-server-core = { module = "io.ktor:ktor-server-core" } ktor-server-htmlBuilder = { module = "io.ktor:ktor-server-html-builder" } +langchain4j-anthropic = { module = "dev.langchain4j:langchain4j-anthropic" } langchain4j-bom = { module = "dev.langchain4j:langchain4j-bom", version.ref = "langchain4j-bom" } langchain4j-cohere = { module = "dev.langchain4j:langchain4j-cohere" } langchain4j-core = { module = "dev.langchain4j:langchain4j-core" } +langchain4j-google-ai-gemini = { module = "dev.langchain4j:langchain4j-google-ai-gemini" } langchain4j-hugging-face = { module = "dev.langchain4j:langchain4j-hugging-face" } langchain4j-mistral-ai = { module = "dev.langchain4j:langchain4j-mistral-ai" } +langchain4j-ollama = { module = "dev.langchain4j:langchain4j-ollama" } langchain4j-open-ai = { module = "dev.langchain4j:langchain4j-open-ai" } lmax-disruptor = { module = "com.lmax:disruptor", version.ref = "lmax-disruptor" } locationtech-spatial4j = { module = "org.locationtech.spatial4j:spatial4j", version.ref = "spatial4j" } diff --git a/solr/licenses/langchain4j-anthropic-1.9.1.jar.sha1 b/solr/licenses/langchain4j-anthropic-1.9.1.jar.sha1 new file mode 100644 index 000000000000..83fcaa412e36 --- /dev/null +++ b/solr/licenses/langchain4j-anthropic-1.9.1.jar.sha1 @@ -0,0 +1 @@ +905f570cd38c2ebd94fa159f2e88bca06bbf71c8 diff --git a/solr/licenses/langchain4j-google-ai-gemini-1.9.1.jar.sha1 b/solr/licenses/langchain4j-google-ai-gemini-1.9.1.jar.sha1 new file mode 100644 index 000000000000..41991a7dac6f --- /dev/null +++ b/solr/licenses/langchain4j-google-ai-gemini-1.9.1.jar.sha1 @@ -0,0 +1 @@ +49973974543318de23f3f09b21c79d5e45815d8c diff --git a/solr/licenses/langchain4j-ollama-1.9.1.jar.sha1 b/solr/licenses/langchain4j-ollama-1.9.1.jar.sha1 new file mode 100644 index 000000000000..8a1828b24320 --- /dev/null +++ b/solr/licenses/langchain4j-ollama-1.9.1.jar.sha1 @@ -0,0 +1 @@ +e3c631fa2fce6e79ad50fc86adb724656992263a diff --git a/solr/modules/language-models/build.gradle b/solr/modules/language-models/build.gradle index a4dc82fc15cb..17d9716cfd20 100644 --- a/solr/modules/language-models/build.gradle +++ b/solr/modules/language-models/build.gradle @@ -29,9 +29,12 @@ dependencies { implementation libs.apache.lucene.core implementation libs.langchain4j.core + runtimeOnly libs.langchain4j.anthropic runtimeOnly libs.langchain4j.cohere + runtimeOnly libs.langchain4j.google.ai.gemini runtimeOnly libs.langchain4j.hugging.face runtimeOnly libs.langchain4j.mistral.ai + runtimeOnly libs.langchain4j.ollama runtimeOnly libs.langchain4j.open.ai implementation libs.slf4j.api diff --git a/solr/modules/language-models/gradle.lockfile b/solr/modules/language-models/gradle.lockfile index d2cdfd08dd84..99cad6670d11 100644 --- a/solr/modules/language-models/gradle.lockfile +++ b/solr/modules/language-models/gradle.lockfile @@ -41,13 +41,16 @@ com.tdunning:t-digest:3.3=jarValidation,runtimeClasspath,runtimeLibs,solrPlatfor commons-cli:commons-cli:1.11.0=jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testRuntimeClasspath commons-codec:commons-codec:1.21.0=jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testRuntimeClasspath commons-io:commons-io:2.21.0=compileClasspath,jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testCompileClasspath,testRuntimeClasspath +dev.langchain4j:langchain4j-anthropic:1.9.1=jarValidation,runtimeClasspath,runtimeLibs,testRuntimeClasspath dev.langchain4j:langchain4j-bom:1.9.1=compileClasspath,jarValidation,runtimeClasspath,runtimeLibs,testCompileClasspath,testRuntimeClasspath dev.langchain4j:langchain4j-cohere:1.9.1-beta17=jarValidation,runtimeClasspath,runtimeLibs,testRuntimeClasspath dev.langchain4j:langchain4j-core:1.9.1=compileClasspath,jarValidation,runtimeClasspath,runtimeLibs,testCompileClasspath,testRuntimeClasspath +dev.langchain4j:langchain4j-google-ai-gemini:1.9.1=jarValidation,runtimeClasspath,runtimeLibs,testRuntimeClasspath dev.langchain4j:langchain4j-http-client-jdk:1.9.1=jarValidation,runtimeClasspath,runtimeLibs,testRuntimeClasspath dev.langchain4j:langchain4j-http-client:1.9.1=jarValidation,runtimeClasspath,runtimeLibs,testRuntimeClasspath dev.langchain4j:langchain4j-hugging-face:1.9.1-beta17=jarValidation,runtimeClasspath,runtimeLibs,testRuntimeClasspath dev.langchain4j:langchain4j-mistral-ai:1.9.1=jarValidation,runtimeClasspath,runtimeLibs,testRuntimeClasspath +dev.langchain4j:langchain4j-ollama:1.9.1=jarValidation,runtimeClasspath,runtimeLibs,testRuntimeClasspath dev.langchain4j:langchain4j-open-ai:1.9.1=jarValidation,runtimeClasspath,runtimeLibs,testRuntimeClasspath io.dropwizard.metrics:metrics-annotation:4.2.33=jarValidation,testRuntimeClasspath io.dropwizard.metrics:metrics-core:4.2.33=jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testRuntimeClasspath diff --git a/solr/modules/language-models/src/java/org/apache/solr/languagemodels/model/SolrLargeLanguageModel.java b/solr/modules/language-models/src/java/org/apache/solr/languagemodels/model/SolrLargeLanguageModel.java new file mode 100644 index 000000000000..93c3ad62328a --- /dev/null +++ b/solr/modules/language-models/src/java/org/apache/solr/languagemodels/model/SolrLargeLanguageModel.java @@ -0,0 +1,206 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.languagemodels.model; + +import dev.langchain4j.data.message.UserMessage; +import dev.langchain4j.model.chat.ChatModel; +import dev.langchain4j.model.chat.request.ChatRequest; +import dev.langchain4j.model.chat.request.ResponseFormat; +import java.lang.invoke.MethodHandles; +import java.lang.reflect.Method; +import java.time.Duration; +import java.util.ArrayList; +import java.util.Map; +import java.util.Objects; +import org.apache.lucene.util.Accountable; +import org.apache.lucene.util.RamUsageEstimator; +import org.apache.solr.common.SolrException; +import org.apache.solr.core.SolrResourceLoader; +import org.apache.solr.languagemodels.LanguageModelException; +import org.apache.solr.languagemodels.store.rest.LargeLanguageModelStore; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * This object wraps a {@link dev.langchain4j.model.chat.ChatModel} to some content given a prompt + * and a {@link ResponseFormat}. It's meant to be used as a managed resource with the {@link + * LargeLanguageModelStore} + */ +public class SolrLargeLanguageModel extends SolrLanguageModel implements Accountable { + private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); + private static final long BASE_RAM_BYTES = + RamUsageEstimator.shallowSizeOfInstance(SolrLargeLanguageModel.class); + + private static final String THINKING_BUDGET_TOKENS = "thinkingBudgetTokens"; + private static final String RANDOM_SEED = "randomSeed"; + + private final ChatModel chatModel; + private final int hashCode; + + public static SolrLargeLanguageModel getInstance( + SolrResourceLoader solrResourceLoader, + String className, + String name, + Map params) + throws LanguageModelException { + try { + /* + * The idea here is to build a {@link dev.langchain4j.model.chat.ChatModel} using inversion + * of control. + * Each model has its own list of parameters we don't know beforehand, but each {@link dev.langchain4j.model.chat.ChatModel} class + * has its own builder that uses setters with the same name of the parameter in input. + * */ + ChatModel chatModel; + Class modelClass = solrResourceLoader.findClass(className, ChatModel.class); + var builder = modelClass.getMethod("builder").invoke(null); + if (params != null) { + /* + * This block of code has the responsibility of instantiate a {@link + * dev.langchain4j.model.chat.ChatModel} using the params provided. Classes have + * params of the specific implementation of {@link + * dev.langchain4j.model.chat.ChatModel}, which is not known beforehand. So we benefit of + * the design choice in langchain4j that each subclass implementing {@link + * dev.langchain4j.model.chat.ChatModel} uses setters with the same name of the + * param. + */ + for (String paramName : params.keySet()) { + /* + * When a param is not primitive, we need to instantiate the object explicitly and then call the + * setter method. + * N.B. when adding support to new models, pay attention to all the parameters they + * support, some of them may require to be handled in here as separate switch cases + */ + switch (paramName) { + case TIMEOUT_PARAM -> builder + .getClass() + .getMethod(paramName, Duration.class) + .invoke(builder, Duration.ofSeconds((Long) params.get(paramName))); + + case MAX_RETRIES_PARAM, THINKING_BUDGET_TOKENS, RANDOM_SEED -> builder + .getClass() + .getMethod(paramName, Integer.class) + .invoke(builder, ((Long) params.get(paramName)).intValue()); + + /* + * For primitive params if there's only one setter available, we call it. + * If there's choice we default to the string one + */ + default -> { + ArrayList paramNameMatches = new ArrayList<>(); + for (var method : builder.getClass().getMethods()) { + if (paramName.equals(method.getName()) && method.getParameterCount() == 1) { + paramNameMatches.add(method); + } + } + if (paramNameMatches.size() == 1) { + paramNameMatches.getFirst().invoke(builder, params.get(paramName)); + } else { + try { + builder + .getClass() + .getMethod(paramName, String.class) + .invoke(builder, params.get(paramName).toString()); + } catch (NoSuchMethodException e) { + log.error("Parameter {} not supported by model {}", paramName, className); + throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, e.getMessage(), e); + } + } + } + } + } + } + + // Always enforce strict schema adherence where supported. For Anthropic and Google it's + // enabled by default + if (!"dev.langchain4j.model.anthropic.AnthropicChatModel".equals(className) + && !"dev.langchain4j.model.googleai.GoogleAiGeminiChatModel".equals(className)) { + try { + builder.getClass().getMethod("strictJsonSchema", Boolean.class).invoke(builder, true); + } catch (NoSuchMethodException ignored) { + log.debug( + "Model {} does not have strictJsonSchema param, structured output is not enforced", + className); + } + } + chatModel = (ChatModel) builder.getClass().getMethod("build").invoke(builder); + return new SolrLargeLanguageModel(name, chatModel, params); + } catch (final Exception e) { + throw new LanguageModelException("Model loading failed for " + className, e); + } + } + + public SolrLargeLanguageModel(String name, ChatModel chatModel, Map params) { + super(name, params); + this.chatModel = chatModel; + this.hashCode = calculateHashCode(); + } + + /** + * Sends a structured chat request to the language model and returns the raw text response. + * + * @param prompt the user prompt to send to the language model + * @param responseFormat the format specification that instructs the model to produce structured + * JSON output + * @return the raw text response from the language model + */ + public String generate(String prompt, ResponseFormat responseFormat) { + ChatRequest chatRequest = + ChatRequest.builder() + .responseFormat(responseFormat) + .messages(UserMessage.from(prompt)) + .build(); + return chatModel.chat(chatRequest).aiMessage().text(); + } + + @Override + public String toString() { + return getClass().getSimpleName() + "(name=" + getName() + ")"; + } + + @Override + public long ramBytesUsed() { + return BASE_RAM_BYTES + + RamUsageEstimator.sizeOfObject(name) + + RamUsageEstimator.sizeOfObject(chatModel); + } + + @Override + public int hashCode() { + return hashCode; + } + + private int calculateHashCode() { + final int prime = 31; + int result = 1; + result = (prime * result) + Objects.hashCode(name); + result = (prime * result) + Objects.hashCode(chatModel); + return result; + } + + @Override + public boolean equals(Object obj) { + if (this == obj) return true; + if (!(obj instanceof SolrLargeLanguageModel)) return false; + final SolrLargeLanguageModel other = (SolrLargeLanguageModel) obj; + return Objects.equals(chatModel, other.chatModel) && Objects.equals(name, other.name); + } + + @Override + public String getModelClassName() { + return chatModel.getClass().getName(); + } +} diff --git a/solr/modules/language-models/src/java/org/apache/solr/languagemodels/model/SolrTextToVectorModel.java b/solr/modules/language-models/src/java/org/apache/solr/languagemodels/model/SolrTextToVectorModel.java index cb34d7b621b0..9b171d2e51de 100644 --- a/solr/modules/language-models/src/java/org/apache/solr/languagemodels/model/SolrTextToVectorModel.java +++ b/solr/modules/language-models/src/java/org/apache/solr/languagemodels/model/SolrTextToVectorModel.java @@ -29,13 +29,13 @@ import org.apache.solr.common.SolrException; import org.apache.solr.core.SolrResourceLoader; import org.apache.solr.languagemodels.LanguageModelException; -import org.apache.solr.languagemodels.store.rest.ManagedTextToVectorModelStore; +import org.apache.solr.languagemodels.store.rest.TextToVectorModelStore; import org.slf4j.Logger; import org.slf4j.LoggerFactory; /** * This object wraps a {@link EmbeddingModel} to encode text to vector. It's meant to be used as a - * managed resource with the {@link ManagedTextToVectorModelStore} + * managed resource with the {@link TextToVectorModelStore} */ public class SolrTextToVectorModel extends SolrLanguageModel implements Accountable { private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); diff --git a/solr/modules/language-models/src/java/org/apache/solr/languagemodels/search/TextToVectorQParserPlugin.java b/solr/modules/language-models/src/java/org/apache/solr/languagemodels/search/TextToVectorQParserPlugin.java index 9cbfbf5dc1f4..01d7b0c05541 100644 --- a/solr/modules/language-models/src/java/org/apache/solr/languagemodels/search/TextToVectorQParserPlugin.java +++ b/solr/modules/language-models/src/java/org/apache/solr/languagemodels/search/TextToVectorQParserPlugin.java @@ -27,7 +27,7 @@ import org.apache.solr.common.util.NamedList; import org.apache.solr.core.SolrResourceLoader; import org.apache.solr.languagemodels.model.SolrTextToVectorModel; -import org.apache.solr.languagemodels.store.rest.ManagedTextToVectorModelStore; +import org.apache.solr.languagemodels.store.rest.TextToVectorModelStore; import org.apache.solr.request.SolrQueryRequest; import org.apache.solr.rest.ManagedResource; import org.apache.solr.rest.ManagedResourceObserver; @@ -44,7 +44,7 @@ public class TextToVectorQParserPlugin extends QParserPlugin implements ResourceLoaderAware, ManagedResourceObserver { public static final String EMBEDDING_MODEL_PARAM = "model"; - private ManagedTextToVectorModelStore modelStore = null; + private TextToVectorModelStore modelStore = null; @Override public QParser createParser( @@ -55,14 +55,14 @@ public QParser createParser( @Override public void inform(ResourceLoader loader) throws IOException { final SolrResourceLoader solrResourceLoader = (SolrResourceLoader) loader; - ManagedTextToVectorModelStore.registerManagedTextToVectorModelStore(solrResourceLoader, this); + TextToVectorModelStore.registerManagedTextToVectorModelStore(solrResourceLoader, this); } @Override public void onManagedResourceInitialized(NamedList args, ManagedResource res) throws SolrException { - if (res instanceof ManagedTextToVectorModelStore) { - modelStore = (ManagedTextToVectorModelStore) res; + if (res instanceof TextToVectorModelStore) { + modelStore = (TextToVectorModelStore) res; } if (modelStore != null) { modelStore.loadStoredModels(); @@ -105,7 +105,7 @@ public Query parse() throws SyntaxError { "The model requested '" + embeddingModelName + "' can't be found in the store: " - + ManagedTextToVectorModelStore.REST_END_POINT); + + TextToVectorModelStore.REST_END_POINT); } } } diff --git a/solr/modules/language-models/src/java/org/apache/solr/languagemodels/store/rest/LargeLanguageModelStore.java b/solr/modules/language-models/src/java/org/apache/solr/languagemodels/store/rest/LargeLanguageModelStore.java new file mode 100644 index 000000000000..7a4713e3059a --- /dev/null +++ b/solr/modules/language-models/src/java/org/apache/solr/languagemodels/store/rest/LargeLanguageModelStore.java @@ -0,0 +1,62 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.languagemodels.store.rest; + +import java.util.Map; +import net.jcip.annotations.ThreadSafe; +import org.apache.solr.common.SolrException; +import org.apache.solr.core.SolrCore; +import org.apache.solr.core.SolrResourceLoader; +import org.apache.solr.languagemodels.model.SolrLargeLanguageModel; +import org.apache.solr.rest.ManagedResourceObserver; +import org.apache.solr.rest.ManagedResourceStorage; + +/** Managed Resource wrapper for the large language model store, exposed via REST */ +@ThreadSafe +public class LargeLanguageModelStore extends ManagedLanguageModelStore { + /** the model store rest endpoint */ + public static final String REST_END_POINT = "/schema/large-language-model-store"; + + public static void registerManagedLargeLanguageModelStore( + SolrResourceLoader solrResourceLoader, ManagedResourceObserver managedResourceObserver) { + solrResourceLoader + .getManagedResourceRegistry() + .registerManagedResource( + REST_END_POINT, LargeLanguageModelStore.class, managedResourceObserver); + } + + public static LargeLanguageModelStore getManagedModelStore(SolrCore core) { + return (LargeLanguageModelStore) core.getRestManager().getManagedResource(REST_END_POINT); + } + + @Override + @SuppressWarnings("unchecked") + protected SolrLargeLanguageModel fromModelMap( + SolrResourceLoader solrResourceLoader, Map modelMap) { + return SolrLargeLanguageModel.getInstance( + solrResourceLoader, + (String) modelMap.get(CLASS_KEY), + (String) modelMap.get(NAME_KEY), + (Map) modelMap.get(PARAMS_KEY)); + } + + public LargeLanguageModelStore( + String resourceId, SolrResourceLoader loader, ManagedResourceStorage.StorageIO storageIO) + throws SolrException { + super(resourceId, loader, storageIO); + } +} diff --git a/solr/modules/language-models/src/java/org/apache/solr/languagemodels/store/rest/ManagedTextToVectorModelStore.java b/solr/modules/language-models/src/java/org/apache/solr/languagemodels/store/rest/TextToVectorModelStore.java similarity index 84% rename from solr/modules/language-models/src/java/org/apache/solr/languagemodels/store/rest/ManagedTextToVectorModelStore.java rename to solr/modules/language-models/src/java/org/apache/solr/languagemodels/store/rest/TextToVectorModelStore.java index d0f364062e7a..f9136a0f7242 100644 --- a/solr/modules/language-models/src/java/org/apache/solr/languagemodels/store/rest/ManagedTextToVectorModelStore.java +++ b/solr/modules/language-models/src/java/org/apache/solr/languagemodels/store/rest/TextToVectorModelStore.java @@ -27,8 +27,7 @@ /** Managed Resource wrapper for the text-to-vector model store, exposed via REST */ @ThreadSafe -public class ManagedTextToVectorModelStore - extends ManagedLanguageModelStore { +public class TextToVectorModelStore extends ManagedLanguageModelStore { /** the model store rest endpoint */ public static final String REST_END_POINT = "/schema/text-to-vector-model-store"; @@ -38,11 +37,11 @@ public static void registerManagedTextToVectorModelStore( solrResourceLoader .getManagedResourceRegistry() .registerManagedResource( - REST_END_POINT, ManagedTextToVectorModelStore.class, managedResourceObserver); + REST_END_POINT, TextToVectorModelStore.class, managedResourceObserver); } - public static ManagedTextToVectorModelStore getManagedModelStore(SolrCore core) { - return (ManagedTextToVectorModelStore) core.getRestManager().getManagedResource(REST_END_POINT); + public static TextToVectorModelStore getManagedModelStore(SolrCore core) { + return (TextToVectorModelStore) core.getRestManager().getManagedResource(REST_END_POINT); } @Override @@ -56,7 +55,7 @@ protected SolrTextToVectorModel fromModelMap( (Map) textToVectorModel.get(PARAMS_KEY)); } - public ManagedTextToVectorModelStore( + public TextToVectorModelStore( String resourceId, SolrResourceLoader loader, ManagedResourceStorage.StorageIO storageIO) throws SolrException { super(resourceId, loader, storageIO); diff --git a/solr/modules/language-models/src/java/org/apache/solr/languagemodels/update/processor/DocumentEnrichmentUpdateProcessor.java b/solr/modules/language-models/src/java/org/apache/solr/languagemodels/update/processor/DocumentEnrichmentUpdateProcessor.java new file mode 100644 index 000000000000..1359a504a3c5 --- /dev/null +++ b/solr/modules/language-models/src/java/org/apache/solr/languagemodels/update/processor/DocumentEnrichmentUpdateProcessor.java @@ -0,0 +1,159 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.solr.languagemodels.update.processor; + +import dev.langchain4j.model.chat.request.ResponseFormat; +import dev.langchain4j.model.chat.request.json.JsonArraySchema; +import dev.langchain4j.model.chat.request.json.JsonBooleanSchema; +import dev.langchain4j.model.chat.request.json.JsonIntegerSchema; +import dev.langchain4j.model.chat.request.json.JsonNumberSchema; +import dev.langchain4j.model.chat.request.json.JsonObjectSchema; +import dev.langchain4j.model.chat.request.json.JsonSchemaElement; +import dev.langchain4j.model.chat.request.json.JsonStringSchema; +import java.io.IOException; +import java.lang.invoke.MethodHandles; +import java.util.List; +import java.util.Map; +import org.apache.solr.common.SolrException; +import org.apache.solr.common.SolrInputDocument; +import org.apache.solr.common.SolrInputField; +import org.apache.solr.common.util.Utils; +import org.apache.solr.languagemodels.model.SolrLargeLanguageModel; +import org.apache.solr.languagemodels.update.processor.factory.DocumentEnrichmentUpdateProcessorFactory; +import org.apache.solr.request.SolrQueryRequest; +import org.apache.solr.schema.IndexSchema; +import org.apache.solr.schema.SchemaField; +import org.apache.solr.update.AddUpdateCommand; +import org.apache.solr.update.processor.UpdateRequestProcessor; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class DocumentEnrichmentUpdateProcessor extends UpdateRequestProcessor { + private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); + + private final IndexSchema schema; + private final List inputFields; + private final String outputField; + private final String prompt; + private final SolrLargeLanguageModel largeLanguageModel; + private final boolean multiValued; + private final ResponseFormat responseFormat; + + public DocumentEnrichmentUpdateProcessor( + List inputFields, + String outputField, + String prompt, + SolrLargeLanguageModel largeLanguageModel, + boolean multiValued, + ResponseFormat responseFormat, + SolrQueryRequest req, + UpdateRequestProcessor next) { + super(next); + this.schema = req.getSchema(); + this.inputFields = inputFields; + this.outputField = outputField; + this.prompt = prompt; + this.largeLanguageModel = largeLanguageModel; + this.multiValued = multiValued; + this.responseFormat = responseFormat; + } + + /** + * @param cmd the update command in input containing the Document to process + * @throws IOException If there is a low-level I/O error + */ + @Override + public void processAdd(AddUpdateCommand cmd) throws IOException { + SolrInputDocument doc = cmd.getSolrInputDocument(); + + // Collect all field values; skip enrichment if any declared field is null or empty + String injectedPrompt = prompt; + for (String fieldName : inputFields) { + SolrInputField field = doc.get(fieldName); + if (isNullOrEmpty(field)) { + super.processAdd(cmd); + return; + } + injectedPrompt = injectedPrompt.replace("{" + fieldName + "}", field.getValue().toString()); + } + + try { + // as for now, only a plain text as prompt is sent to the model (no support for + // tools/skills/agents) + // chatModel.chat returns the parsed value from the structured JSON response + Object returnValue = + Utils.fromJSONString(largeLanguageModel.generate(injectedPrompt, responseFormat)); + // Guardrails for OllamaChatModel, since it doesn't support strict JSON structured output mode + if (!(returnValue instanceof Map map) + || !map.containsKey(DocumentEnrichmentUpdateProcessorFactory.JSON_FIELD_PROPERTY)) { + throw new SolrException( + SolrException.ErrorCode.SERVER_ERROR, + "LLM was not able to format the response correctly: " + returnValue); + } + Object generatedFieldValue = + map.get(DocumentEnrichmentUpdateProcessorFactory.JSON_FIELD_PROPERTY); + JsonSchemaElement valueSchema = + ((JsonObjectSchema) responseFormat.jsonSchema().rootElement()) + .properties() + .get(DocumentEnrichmentUpdateProcessorFactory.JSON_FIELD_PROPERTY); + boolean typeOk = + switch (valueSchema) { + case JsonStringSchema ignored -> generatedFieldValue instanceof String; + case JsonIntegerSchema ignored -> generatedFieldValue instanceof Integer + || generatedFieldValue instanceof Long; + case JsonNumberSchema ignored -> generatedFieldValue instanceof Number; + case JsonBooleanSchema ignored -> generatedFieldValue instanceof Boolean; + case JsonArraySchema ignored -> generatedFieldValue instanceof List; + default -> true; + }; + if (!typeOk) { + throw new SolrException( + SolrException.ErrorCode.SERVER_ERROR, + "LLM returned wrong value type: expected " + + valueSchema.getClass().getSimpleName() + + " but got " + + generatedFieldValue.getClass().getSimpleName()); + } + if (multiValued && generatedFieldValue instanceof List generatedFieldValueList) { + for (Object item : generatedFieldValueList) { + doc.addField(outputField, item); + } + } else { + doc.setField(outputField, generatedFieldValue); + } + } catch (RuntimeException largeLanguageModelFailure) { + if (log.isErrorEnabled()) { + SchemaField uniqueKeyField = schema.getUniqueKeyField(); + String uniqueKeyFieldName = uniqueKeyField.getName(); + log.error( + "Could not process fields {} for the document with {}: {}", + inputFields, + uniqueKeyFieldName, + doc.getFieldValue(uniqueKeyFieldName), + largeLanguageModelFailure); + } + } + super.processAdd(cmd); + } + + protected boolean isNullOrEmpty(SolrInputField inputFieldContent) { + return (inputFieldContent == null + || inputFieldContent.getValue() == null + || inputFieldContent.getValue().toString().isEmpty()); + } +} diff --git a/solr/modules/language-models/src/java/org/apache/solr/languagemodels/update/processor/factory/DocumentEnrichmentUpdateProcessorFactory.java b/solr/modules/language-models/src/java/org/apache/solr/languagemodels/update/processor/factory/DocumentEnrichmentUpdateProcessorFactory.java new file mode 100644 index 000000000000..a65e1306ecbd --- /dev/null +++ b/solr/modules/language-models/src/java/org/apache/solr/languagemodels/update/processor/factory/DocumentEnrichmentUpdateProcessorFactory.java @@ -0,0 +1,333 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.solr.languagemodels.update.processor.factory; + +import dev.langchain4j.model.chat.request.ResponseFormat; +import dev.langchain4j.model.chat.request.ResponseFormatType; +import dev.langchain4j.model.chat.request.json.JsonArraySchema; +import dev.langchain4j.model.chat.request.json.JsonBooleanSchema; +import dev.langchain4j.model.chat.request.json.JsonIntegerSchema; +import dev.langchain4j.model.chat.request.json.JsonNumberSchema; +import dev.langchain4j.model.chat.request.json.JsonObjectSchema; +import dev.langchain4j.model.chat.request.json.JsonSchema; +import dev.langchain4j.model.chat.request.json.JsonSchemaElement; +import dev.langchain4j.model.chat.request.json.JsonStringSchema; +import java.io.IOException; +import java.io.InputStream; +import java.nio.charset.StandardCharsets; +import java.util.Collection; +import java.util.HashSet; +import java.util.List; +import java.util.Set; +import java.util.regex.Matcher; +import java.util.regex.Pattern; +import org.apache.solr.common.SolrException; +import org.apache.solr.common.params.RequiredSolrParams; +import org.apache.solr.common.params.SolrParams; +import org.apache.solr.common.util.NamedList; +import org.apache.solr.core.SolrCore; +import org.apache.solr.core.SolrResourceLoader; +import org.apache.solr.languagemodels.model.SolrLargeLanguageModel; +import org.apache.solr.languagemodels.store.rest.LargeLanguageModelStore; +import org.apache.solr.languagemodels.update.processor.DocumentEnrichmentUpdateProcessor; +import org.apache.solr.request.SolrQueryRequest; +import org.apache.solr.response.SolrQueryResponse; +import org.apache.solr.rest.ManagedResource; +import org.apache.solr.rest.ManagedResourceObserver; +import org.apache.solr.schema.BoolField; +import org.apache.solr.schema.DatePointField; +import org.apache.solr.schema.DenseVectorField; +import org.apache.solr.schema.DoublePointField; +import org.apache.solr.schema.FieldType; +import org.apache.solr.schema.FloatPointField; +import org.apache.solr.schema.IndexSchema; +import org.apache.solr.schema.IntPointField; +import org.apache.solr.schema.LongPointField; +import org.apache.solr.schema.NestPathField; +import org.apache.solr.schema.SchemaField; +import org.apache.solr.schema.StrField; +import org.apache.solr.schema.TextField; +import org.apache.solr.schema.UUIDField; +import org.apache.solr.update.processor.UpdateRequestProcessor; +import org.apache.solr.update.processor.UpdateRequestProcessorFactory; +import org.apache.solr.util.plugin.SolrCoreAware; + +/** + * Generate the content of {@code outputField} based on other fields specified as {@code + * inputField}s. + * + *

The following validation rules are applied: + * + *

    + *
  • At least one {@code inputField} must be declared. + *
  • Exactly one of {@code prompt} or {@code promptFile} must be provided. + *
  • Every declared {@code inputField} must have a corresponding {@code {fieldName}} placeholder + * in the prompt. + *
  • Every {@code {placeholder}} in the prompt must correspond to a declared {@code inputField}. + *
  • One and only one {@code outputField} is allowed. + *
+ * + *
+ * <processor class="solr.languagemodels.update.processor.factory.DocumentEnrichmentUpdateProcessorFactory">
+ *   <str name="inputField">title_field</str>
+ *   <str name="inputField">body_field</str>
+ *   <str name="outputField">enriched_field</str>
+ *   <str name="prompt">Title: {title_field}. Body: {body_field}.</str> // or <str name="promptFile">prompt.txt</str>
+ *   <str name="model">model-name</str>
+ * </processor>
+ * 
+ * + *

Multiple {@code inputField} values can also be declared as an array using {@code arr}: + * + *

+ * <arr name="inputField">
+ *   <str>title_field</str>
+ *   <str>body_field</str>
+ * </arr>
+ * 
+ */ +public class DocumentEnrichmentUpdateProcessorFactory extends UpdateRequestProcessorFactory + implements SolrCoreAware, ManagedResourceObserver { + private static final String INPUT_FIELD_PARAM = "inputField"; + private static final String OUTPUT_FIELD_PARAM = "outputField"; + private static final String PROMPT = "prompt"; + private static final String PROMPT_FILE = "promptFile"; + private static final String MODEL_NAME = "model"; + private static final String JSON_SCHEMA_NAME = "output"; + public static final String JSON_FIELD_PROPERTY = "value"; + private static final Pattern PLACEHOLDER_PATTERN = Pattern.compile("\\{([^}]+)\\}"); + + private List inputFields; + private String outputField; + private String promptText; + private String promptFile; + private String modelName; + + @Override + public void init(final NamedList args) { + // removeConfigArgs handles both multiple and + // and must be called before toSolrParams() since it mutates args in place + Collection fieldNames = args.removeConfigArgs(INPUT_FIELD_PARAM); + if (fieldNames.isEmpty()) { + throw new SolrException( + SolrException.ErrorCode.SERVER_ERROR, "At least one 'inputField' must be provided"); + } + inputFields = List.copyOf(fieldNames); + + Collection outputFields = args.removeConfigArgs(OUTPUT_FIELD_PARAM); + if (outputFields.isEmpty()) { + throw new SolrException( + SolrException.ErrorCode.SERVER_ERROR, "Exactly one 'outputField' must be provided"); + } + if (outputFields.size() > 1) { + throw new SolrException( + SolrException.ErrorCode.SERVER_ERROR, + "Only one 'outputField' can be provided, but found: " + outputFields); + } + outputField = outputFields.iterator().next(); + + SolrParams params = args.toSolrParams(); + RequiredSolrParams required = params.required(); + modelName = required.get(MODEL_NAME); + + String inlinePrompt = params.get(PROMPT); + String promptFilePath = params.get(PROMPT_FILE); + + if (inlinePrompt == null && promptFilePath == null) { + throw new SolrException( + SolrException.ErrorCode.SERVER_ERROR, "Either 'prompt' or 'promptFile' must be provided"); + } + if (inlinePrompt != null && promptFilePath != null) { + throw new SolrException( + SolrException.ErrorCode.SERVER_ERROR, + "Only one of 'prompt' or 'promptFile' can be provided, not both"); + } + if (inlinePrompt != null) { + validatePromptPlaceholders(inlinePrompt, inputFields); + this.promptText = inlinePrompt; + } + this.promptFile = promptFilePath; + } + + @Override + public void inform(SolrCore core) { + final SolrResourceLoader solrResourceLoader = core.getResourceLoader(); + LargeLanguageModelStore.registerManagedLargeLanguageModelStore(solrResourceLoader, this); + if (promptFile != null) { + try (InputStream is = solrResourceLoader.openResource(promptFile)) { + promptText = new String(is.readAllBytes(), StandardCharsets.UTF_8).trim(); + } catch (IOException e) { + throw new SolrException( + SolrException.ErrorCode.SERVER_ERROR, "Cannot read prompt file: " + promptFile, e); + } + validatePromptPlaceholders(promptText, inputFields); + } + } + + @Override + public void onManagedResourceInitialized(NamedList args, ManagedResource res) + throws SolrException { + if (res instanceof LargeLanguageModelStore store) { + store.loadStoredModels(); + } + } + + @Override + public UpdateRequestProcessor getInstance( + SolrQueryRequest req, SolrQueryResponse rsp, UpdateRequestProcessor next) { + IndexSchema latestSchema = req.getCore().getLatestSchema(); + + for (String fieldName : inputFields) { + if (!latestSchema.isDynamicField(fieldName) && !latestSchema.hasExplicitField(fieldName)) { + throw new SolrException( + SolrException.ErrorCode.SERVER_ERROR, "undefined field: \"" + fieldName + "\""); + } + } + + final SchemaField outputFieldSchema = latestSchema.getField(outputField); + + ResponseFormat responseFormat = getJsonSchema(outputFieldSchema); + boolean multiValued = outputFieldSchema.multiValued(); + + LargeLanguageModelStore store = LargeLanguageModelStore.getManagedModelStore(req.getCore()); + SolrLargeLanguageModel fieldGenerationModel = store.getModel(modelName); + if (fieldGenerationModel == null) { + throw new SolrException( + SolrException.ErrorCode.SERVER_ERROR, + "The model configured in the Update Request Processor '" + + modelName + + "' can't be found in the store: " + + LargeLanguageModelStore.REST_END_POINT); + } + + return new DocumentEnrichmentUpdateProcessor( + inputFields, + outputField, + promptText, + fieldGenerationModel, + multiValued, + responseFormat, + req, + next); + } + + /** + * Builds a {@link ResponseFormat} that instructs the model to return a JSON object {@code + * {"value": ...}} whose value type matches the Solr field type. For multivalued fields the value + * is wrapped in a {@link JsonArraySchema} nested inside the root {@link JsonObjectSchema}. + * + *

Nesting {@link JsonArraySchema} inside a {@link JsonObjectSchema} property is supported by + * all langchain4j providers that implement structured outputs with {@link JsonObjectSchema} + * (OpenAI, Azure OpenAI, Google AI, Gemini, Mistral, Ollama, Amazon Bedrock, Watsonx). + */ + static ResponseFormat getJsonSchema(SchemaField schemaField) { + JsonSchemaElement valueElement = toJsonSchemaElement(schemaField.getType()); + JsonSchemaElement valueSchema = + schemaField.multiValued() + ? JsonArraySchema.builder().items(valueElement).build() + : valueElement; + return ResponseFormat.builder() + .type(ResponseFormatType.JSON) + .jsonSchema( + JsonSchema.builder() + .name(JSON_SCHEMA_NAME) + .rootElement( + JsonObjectSchema.builder() + .addProperty(JSON_FIELD_PROPERTY, valueSchema) + .required(JSON_FIELD_PROPERTY) + .build()) + .build()) + .build(); + } + + private static JsonSchemaElement toJsonSchemaElement(FieldType fieldType) { + SolrException unsupportedFieldTypeException = + new SolrException( + SolrException.ErrorCode.SERVER_ERROR, + "field type is not supported by Document Enrichment: " + + fieldType.getClass().getSimpleName()); + + return switch (fieldType) { + // first check unsupported types and throw SolrException + case DenseVectorField f -> throw unsupportedFieldTypeException; + case UUIDField f -> throw unsupportedFieldTypeException; + case NestPathField f -> throw unsupportedFieldTypeException; + + // build JsonSchemaElement for supported types + case StrField f -> new JsonStringSchema(); + case TextField f -> new JsonStringSchema(); + case DatePointField f -> new JsonStringSchema(); + + case IntPointField f -> new JsonIntegerSchema(); + case LongPointField f -> new JsonIntegerSchema(); + + case FloatPointField f -> new JsonNumberSchema(); + case DoublePointField f -> new JsonNumberSchema(); + + case BoolField f -> new JsonBooleanSchema(); + + // fall-back to SolrException + default -> throw unsupportedFieldTypeException; + }; + } + + private static void validatePromptPlaceholders(String prompt, List inputFields) { + Set promptPlaceholders = new HashSet<>(); + Matcher matcher = PLACEHOLDER_PATTERN.matcher(prompt); + while (matcher.find()) { + promptPlaceholders.add(matcher.group(1)); + } + + Set fieldsWithoutPlaceholderInPrompt = new HashSet<>(inputFields); + fieldsWithoutPlaceholderInPrompt.removeAll(promptPlaceholders); + if (!fieldsWithoutPlaceholderInPrompt.isEmpty()) { + throw new SolrException( + SolrException.ErrorCode.SERVER_ERROR, + "prompt is missing placeholders for inputField(s): " + fieldsWithoutPlaceholderInPrompt); + } + + Set placeholdersInPromptWithoutField = new HashSet<>(promptPlaceholders); + placeholdersInPromptWithoutField.removeAll(new HashSet<>(inputFields)); + if (!placeholdersInPromptWithoutField.isEmpty()) { + throw new SolrException( + SolrException.ErrorCode.SERVER_ERROR, + "prompt contains placeholders not declared as inputField(s): " + + placeholdersInPromptWithoutField); + } + } + + public List getInputFields() { + return inputFields; + } + + public String getOutputField() { + return outputField; + } + + public String getPrompt() { + return promptText; + } + + public String getModelName() { + return modelName; + } + + public String getPromptFile() { + return promptFile; + } +} diff --git a/solr/modules/language-models/src/java/org/apache/solr/languagemodels/update/processor/factory/TextToVectorUpdateProcessorFactory.java b/solr/modules/language-models/src/java/org/apache/solr/languagemodels/update/processor/factory/TextToVectorUpdateProcessorFactory.java index f8c35906093c..68f2e17e85a9 100644 --- a/solr/modules/language-models/src/java/org/apache/solr/languagemodels/update/processor/factory/TextToVectorUpdateProcessorFactory.java +++ b/solr/modules/language-models/src/java/org/apache/solr/languagemodels/update/processor/factory/TextToVectorUpdateProcessorFactory.java @@ -24,7 +24,7 @@ import org.apache.solr.core.SolrCore; import org.apache.solr.core.SolrResourceLoader; import org.apache.solr.languagemodels.model.SolrTextToVectorModel; -import org.apache.solr.languagemodels.store.rest.ManagedTextToVectorModelStore; +import org.apache.solr.languagemodels.store.rest.TextToVectorModelStore; import org.apache.solr.languagemodels.update.processor.TextToVectorUpdateProcessor; import org.apache.solr.request.SolrQueryRequest; import org.apache.solr.response.SolrQueryResponse; @@ -44,7 +44,7 @@ *

The parameters supported are: * *

- * <processor class="solr.llm.textvectorisation.update.processor.TextToVectorUpdateProcessorFactory">
+ * <processor class="solr.languagemodels.update.processor.factory.TextToVectorUpdateProcessorFactory">
  *   <str name="inputField">textualField</str>
  *   <str name="outputField">vectorField</str>
  *   <str name="model">textToVectorModel</str>
@@ -58,7 +58,7 @@ public class TextToVectorUpdateProcessorFactory extends UpdateRequestProcessorFa
   private static final String INPUT_FIELD_PARAM = "inputField";
   private static final String OUTPUT_FIELD_PARAM = "outputField";
   private static final String MODEL_NAME = "model";
-  private ManagedTextToVectorModelStore modelStore = null;
+  private TextToVectorModelStore modelStore = null;
 
   private String inputField;
   private String outputField;
@@ -77,14 +77,14 @@ public void init(final NamedList args) {
   @Override
   public void inform(SolrCore core) {
     final SolrResourceLoader solrResourceLoader = core.getResourceLoader();
-    ManagedTextToVectorModelStore.registerManagedTextToVectorModelStore(solrResourceLoader, this);
+    TextToVectorModelStore.registerManagedTextToVectorModelStore(solrResourceLoader, this);
   }
 
   @Override
   public void onManagedResourceInitialized(NamedList args, ManagedResource res)
       throws SolrException {
-    if (res instanceof ManagedTextToVectorModelStore) {
-      modelStore = (ManagedTextToVectorModelStore) res;
+    if (res instanceof TextToVectorModelStore) {
+      modelStore = (TextToVectorModelStore) res;
     }
     if (modelStore != null) {
       modelStore.loadStoredModels();
@@ -104,8 +104,7 @@ public UpdateRequestProcessor getInstance(
     final SchemaField outputFieldSchema = latestSchema.getField(outputField);
     assertIsDenseVectorField(outputFieldSchema);
 
-    ManagedTextToVectorModelStore modelStore =
-        ManagedTextToVectorModelStore.getManagedModelStore(req.getCore());
+    TextToVectorModelStore modelStore = TextToVectorModelStore.getManagedModelStore(req.getCore());
     SolrTextToVectorModel textToVector = modelStore.getModel(modelName);
     if (textToVector == null) {
       throw new SolrException(
@@ -113,7 +112,7 @@ public UpdateRequestProcessor getInstance(
           "The model configured in the Update Request Processor '"
               + modelName
               + "' can't be found in the store: "
-              + ManagedTextToVectorModelStore.REST_END_POINT);
+              + TextToVectorModelStore.REST_END_POINT);
     }
 
     return new TextToVectorUpdateProcessor(inputField, outputField, textToVector, req, next);
diff --git a/solr/modules/language-models/src/test-files/largeLanguageModelExamples/anthropic-model.json b/solr/modules/language-models/src/test-files/largeLanguageModelExamples/anthropic-model.json
new file mode 100644
index 000000000000..88e58edcf473
--- /dev/null
+++ b/solr/modules/language-models/src/test-files/largeLanguageModelExamples/anthropic-model.json
@@ -0,0 +1,13 @@
+{
+  "class": "dev.langchain4j.model.anthropic.AnthropicChatModel",
+  "name": "anthropic-1",
+  "params": {
+    "baseUrl": "https://api.anthropic.com/v1",
+    "apiKey": "apiKey-anthropic",
+    "modelName": "claude-3-5-haiku-latest",
+    "timeout": 60,
+    "logRequests": true,
+    "logResponses": true,
+    "maxRetries": 5
+  }
+}
diff --git a/solr/modules/language-models/src/test-files/largeLanguageModelExamples/dummy-model-ambiguous.json b/solr/modules/language-models/src/test-files/largeLanguageModelExamples/dummy-model-ambiguous.json
new file mode 100644
index 000000000000..d7e0e2b98784
--- /dev/null
+++ b/solr/modules/language-models/src/test-files/largeLanguageModelExamples/dummy-model-ambiguous.json
@@ -0,0 +1,8 @@
+{
+  "class": "org.apache.solr.languagemodels.model.DummyChatModel",
+  "name": "dummy-1",
+  "params": {
+    "response": "enriched content",
+    "ambiguousTypeParam": 10
+  }
+}
diff --git a/solr/modules/language-models/src/test-files/largeLanguageModelExamples/dummy-model-unsupported.json b/solr/modules/language-models/src/test-files/largeLanguageModelExamples/dummy-model-unsupported.json
new file mode 100644
index 000000000000..36905c9d1ca7
--- /dev/null
+++ b/solr/modules/language-models/src/test-files/largeLanguageModelExamples/dummy-model-unsupported.json
@@ -0,0 +1,8 @@
+{
+  "class": "org.apache.solr.languagemodels.model.DummyChatModel",
+  "name": "dummy-1",
+  "params": {
+    "response": "enriched content",
+    "unsupportedParam": 10
+  }
+}
diff --git a/solr/modules/language-models/src/test-files/largeLanguageModelExamples/dummy-model.json b/solr/modules/language-models/src/test-files/largeLanguageModelExamples/dummy-model.json
new file mode 100644
index 000000000000..b8d6b5c20514
--- /dev/null
+++ b/solr/modules/language-models/src/test-files/largeLanguageModelExamples/dummy-model.json
@@ -0,0 +1,7 @@
+{
+  "class": "org.apache.solr.languagemodels.model.DummyChatModel",
+  "name": "dummy-1",
+  "params": {
+    "response": "{\"value\": \"enriched content\"}"
+  }
+}
diff --git a/solr/modules/language-models/src/test-files/largeLanguageModelExamples/exception-throwing-model.json b/solr/modules/language-models/src/test-files/largeLanguageModelExamples/exception-throwing-model.json
new file mode 100644
index 000000000000..8c3954701225
--- /dev/null
+++ b/solr/modules/language-models/src/test-files/largeLanguageModelExamples/exception-throwing-model.json
@@ -0,0 +1,6 @@
+{
+  "class": "org.apache.solr.languagemodels.model.ExceptionThrowingChatModel",
+  "name": "exception-throwing-model",
+  "params": {
+  }
+}
diff --git a/solr/modules/language-models/src/test-files/largeLanguageModelExamples/gemini-model.json b/solr/modules/language-models/src/test-files/largeLanguageModelExamples/gemini-model.json
new file mode 100644
index 000000000000..f075a1e402d4
--- /dev/null
+++ b/solr/modules/language-models/src/test-files/largeLanguageModelExamples/gemini-model.json
@@ -0,0 +1,12 @@
+{
+  "class": "dev.langchain4j.model.googleai.GoogleAiGeminiChatModel",
+  "name": "gemini-1",
+  "params": {
+    "apiKey": "apiKey-gemini",
+    "modelName": "gemini-2.0-flash",
+    "timeout": 60,
+    "logRequests": true,
+    "logResponses": true,
+    "maxRetries": 5
+  }
+}
diff --git a/solr/modules/language-models/src/test-files/largeLanguageModelExamples/mistralai-model.json b/solr/modules/language-models/src/test-files/largeLanguageModelExamples/mistralai-model.json
new file mode 100644
index 000000000000..ac75dbf1a2b5
--- /dev/null
+++ b/solr/modules/language-models/src/test-files/largeLanguageModelExamples/mistralai-model.json
@@ -0,0 +1,13 @@
+{
+  "class": "dev.langchain4j.model.mistralai.MistralAiChatModel",
+  "name": "mistralai-1",
+  "params": {
+    "baseUrl": "https://api.mistral.ai/v1",
+    "apiKey": "apiKey-mistralAI",
+    "modelName": "mistral-small-latest",
+    "timeout": 60,
+    "logRequests": true,
+    "logResponses": true,
+    "maxRetries": 5
+  }
+}
diff --git a/solr/modules/language-models/src/test-files/largeLanguageModelExamples/not-a-chat-model.json b/solr/modules/language-models/src/test-files/largeLanguageModelExamples/not-a-chat-model.json
new file mode 100644
index 000000000000..f223c61fb352
--- /dev/null
+++ b/solr/modules/language-models/src/test-files/largeLanguageModelExamples/not-a-chat-model.json
@@ -0,0 +1,7 @@
+{
+  "class": "com.example.NonExistentChatModel",
+  "name": "not-a-field-generation-model-1",
+  "params": {
+    "apiKey": "test-api-key"
+  }
+}
diff --git a/solr/modules/language-models/src/test-files/largeLanguageModelExamples/ollama-model.json b/solr/modules/language-models/src/test-files/largeLanguageModelExamples/ollama-model.json
new file mode 100644
index 000000000000..bb6f8081efe9
--- /dev/null
+++ b/solr/modules/language-models/src/test-files/largeLanguageModelExamples/ollama-model.json
@@ -0,0 +1,11 @@
+{
+  "class": "dev.langchain4j.model.ollama.OllamaChatModel",
+  "name": "ollama-1",
+  "params": {
+    "baseUrl": "http://localhost:11434",
+    "modelName": "llama3.2",
+    "timeout": 60,
+    "logRequests": true,
+    "logResponses": true
+  }
+}
diff --git a/solr/modules/language-models/src/test-files/largeLanguageModelExamples/openai-model.json b/solr/modules/language-models/src/test-files/largeLanguageModelExamples/openai-model.json
new file mode 100644
index 000000000000..74ffde65e3b6
--- /dev/null
+++ b/solr/modules/language-models/src/test-files/largeLanguageModelExamples/openai-model.json
@@ -0,0 +1,13 @@
+{
+  "class": "dev.langchain4j.model.openai.OpenAiChatModel",
+  "name": "openai-1",
+  "params": {
+    "baseUrl": "https://api.openai.com/v1",
+    "apiKey": "apiKey-openAI",
+    "modelName": "gpt-5.4-nano",
+    "timeout": 60,
+    "logRequests": true,
+    "logResponses": true,
+    "maxRetries": 5
+  }
+}
diff --git a/solr/modules/language-models/src/test-files/solr/collection1/conf/prompt-multi-field.txt b/solr/modules/language-models/src/test-files/solr/collection1/conf/prompt-multi-field.txt
new file mode 100644
index 000000000000..65c2f125e36c
--- /dev/null
+++ b/solr/modules/language-models/src/test-files/solr/collection1/conf/prompt-multi-field.txt
@@ -0,0 +1 @@
+Title: {string_field}. Body: {body_field}.
\ No newline at end of file
diff --git a/solr/modules/language-models/src/test-files/solr/collection1/conf/prompt-no-placeholder.txt b/solr/modules/language-models/src/test-files/solr/collection1/conf/prompt-no-placeholder.txt
new file mode 100644
index 000000000000..c43c5399dc07
--- /dev/null
+++ b/solr/modules/language-models/src/test-files/solr/collection1/conf/prompt-no-placeholder.txt
@@ -0,0 +1 @@
+Summarize this content without the placeholder.
\ No newline at end of file
diff --git a/solr/modules/language-models/src/test-files/solr/collection1/conf/prompt.txt b/solr/modules/language-models/src/test-files/solr/collection1/conf/prompt.txt
new file mode 100644
index 000000000000..502449a5cf5d
--- /dev/null
+++ b/solr/modules/language-models/src/test-files/solr/collection1/conf/prompt.txt
@@ -0,0 +1 @@
+Summarize this content: {string_field}
\ No newline at end of file
diff --git a/solr/modules/language-models/src/test-files/solr/collection1/conf/schema-language-models.xml b/solr/modules/language-models/src/test-files/solr/collection1/conf/schema-language-models.xml
index ef93fbc057dd..4ff52fb1340f 100644
--- a/solr/modules/language-models/src/test-files/solr/collection1/conf/schema-language-models.xml
+++ b/solr/modules/language-models/src/test-files/solr/collection1/conf/schema-language-models.xml
@@ -25,7 +25,11 @@
   
   
   
-  
+  
+  
+  
+  
+  
   
   
 
@@ -36,11 +40,49 @@
   
   
   
+  
+  
+  
+  
+  
+
+  
+  
+  
+  
+  
+  
+  
+
+  
+  
+  
+  
+  
+  
+  
+
+  
+  
+
+  
+  
+  
 
   
   
   
   
+  
+
+  
+  
+  
+  
+    
+      
+    
+  
 
   
     
diff --git a/solr/modules/language-models/src/test-files/solr/collection1/conf/solrconfig-document-enrichment-update-request-processor-only.xml b/solr/modules/language-models/src/test-files/solr/collection1/conf/solrconfig-document-enrichment-update-request-processor-only.xml
new file mode 100644
index 000000000000..d19dfc4d787c
--- /dev/null
+++ b/solr/modules/language-models/src/test-files/solr/collection1/conf/solrconfig-document-enrichment-update-request-processor-only.xml
@@ -0,0 +1,62 @@
+
+
+
+
+    ${tests.luceneMatchVersion:LATEST}
+ ${solr.data.dir:}
+ 
+ 
+
+ 
+   
+ 
+
+ 
+  
+ 
+ 
+
+ 
+  
+   15000
+   false
+  
+  
+   1000
+  
+  
+   ${solr.data.dir:}
+  
+ 
+
+ 
+  
+   explicit
+   json
+   true
+   id
+  
+ 
+
+ 
+   
+   string_field
+   enriched_field
+   Summarize this content: {string_field}
+   dummy-1
+  
+  
+ 
+
+
diff --git a/solr/modules/language-models/src/test-files/solr/collection1/conf/solrconfig-document-enrichment.xml b/solr/modules/language-models/src/test-files/solr/collection1/conf/solrconfig-document-enrichment.xml
new file mode 100644
index 000000000000..dfcd2b1327b0
--- /dev/null
+++ b/solr/modules/language-models/src/test-files/solr/collection1/conf/solrconfig-document-enrichment.xml
@@ -0,0 +1,258 @@
+
+
+
+
+    ${tests.luceneMatchVersion:LATEST}
+ ${solr.data.dir:}
+ 
+ 
+
+ 
+   
+ 
+
+ 
+  
+ 
+ 
+
+ 
+  
+   15000
+   false
+  
+  
+   1000
+  
+  
+   ${solr.data.dir:}
+  
+ 
+
+ 
+  
+   explicit
+   json
+   true
+   id
+  
+ 
+
+ 
+  
+   string_field
+   enriched_field
+   Summarize this content: {string_field}
+   dummy-1
+  
+  
+ 
+
+  
+    
+      
+        string_field
+        body_field
+      
+      enriched_field
+      Title: {string_field}. Body: {body_field}.
+      dummy-1
+    
+    
+  
+
+ 
+   
+   string_field
+   enriched_field
+   Summarize this content: {string_field}
+   exception-throwing-model
+  
+  
+ 
+
+ 
+  
+   
+   string_field
+   enriched_field
+   Summarize this content: {string_field}
+   dummy-1
+  
+  
+ 
+
+ 
+   
+   string_field
+   body_field
+   enriched_field
+   Title: {string_field}. Body: {body_field}.
+   dummy-1
+  
+  
+ 
+
+ 
+   
+   string_field
+   enriched_field_multi
+   Extract tags from: {string_field}
+   dummy-multivalued-1
+  
+  
+ 
+
+ 
+   
+   string_field
+   body_field
+   enriched_field
+   Title: {string_field}. Body: {body_field}.
+   exception-throwing-model
+  
+  
+ 
+
+ 
+   
+   string_field
+   output_long
+   Extract a number from: {string_field}
+   dummy-long
+  
+  
+ 
+
+ 
+   
+   string_field
+   output_int
+   Extract a number from: {string_field}
+   dummy-int
+  
+  
+ 
+
+ 
+   
+   string_field
+   output_float
+   Extract a number from: {string_field}
+   dummy-float
+  
+  
+ 
+
+ 
+   
+   string_field
+   output_double
+   Extract a number from: {string_field}
+   dummy-double
+  
+  
+ 
+
+ 
+   
+   string_field
+   output_boolean
+   Is this true or false: {string_field}
+   dummy-boolean
+  
+  
+ 
+
+ 
+   
+   string_field
+   output_date
+   Extract a date from: {string_field}
+   dummy-date
+  
+  
+ 
+
+ 
+   
+   string_field
+   output_long_multi
+   Extract numbers from: {string_field}
+   dummy-long-multi
+  
+  
+ 
+
+ 
+   
+   string_field
+   output_int_multi
+   Extract numbers from: {string_field}
+   dummy-int-multi
+  
+  
+ 
+
+ 
+   
+   string_field
+   output_float_multi
+   Extract numbers from: {string_field}
+   dummy-float-multi
+  
+  
+ 
+
+ 
+   
+   string_field
+   output_double_multi
+   Extract numbers from: {string_field}
+   dummy-double-multi
+  
+  
+ 
+
+ 
+   
+   string_field
+   output_boolean_multi
+   Extract boolean values from: {string_field}
+   dummy-boolean-multi
+  
+  
+ 
+
+ 
+   
+   string_field
+   output_date_multi
+   Extract dates from: {string_field}
+   dummy-date-multi
+  
+  
+ 
+
+  
+    
+      tags_field
+      enriched_field
+      Classify these tags: {tags_field}
+      dummy-1
+    
+    
+  
+
+
diff --git a/solr/modules/language-models/src/test/org/apache/solr/languagemodels/TestLanguageModelBase.java b/solr/modules/language-models/src/test/org/apache/solr/languagemodels/TestLanguageModelBase.java
index f555f2e3b973..99d241fdde58 100644
--- a/solr/modules/language-models/src/test/org/apache/solr/languagemodels/TestLanguageModelBase.java
+++ b/solr/modules/language-models/src/test/org/apache/solr/languagemodels/TestLanguageModelBase.java
@@ -26,7 +26,8 @@
 import java.util.List;
 import org.apache.commons.io.file.PathUtils;
 import org.apache.solr.common.SolrInputDocument;
-import org.apache.solr.languagemodels.store.rest.ManagedTextToVectorModelStore;
+import org.apache.solr.languagemodels.store.rest.LargeLanguageModelStore;
+import org.apache.solr.languagemodels.store.rest.TextToVectorModelStore;
 import org.apache.solr.util.RestTestBase;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -40,10 +41,13 @@ public class TestLanguageModelBase extends RestTestBase {
 
   public static final String TEXT_TO_VECTOR_MODEL_FILE_NAME =
       "_schema_text-to-vector-model-store.json";
+  public static final String LARGE_LANGUAGE_MODEL_FILE_NAME =
+      "_schema_large-language-model-store.json";
   protected static final String COLLECTION = "collection1";
   protected static final String CONF_DIR = COLLECTION + "/conf";
 
   protected static Path textToVectorModelStoreFile = null;
+  protected static Path largeLanguageModelStoreFile = null;
 
   protected static String IDField = "id";
   protected static String vectorField = "vector";
@@ -62,17 +66,30 @@ protected static void initFolders(boolean isPersistent) throws Exception {
     tmpSolrHome = createTempDir();
     tmpConfDir = tmpSolrHome.resolve(CONF_DIR);
     PathUtils.copyDirectory(TEST_PATH(), tmpSolrHome.toAbsolutePath());
-    final Path textToVectorStore = tmpConfDir.resolve(TEXT_TO_VECTOR_MODEL_FILE_NAME);
+    final Path largeLanguageModelStore = tmpConfDir.resolve(LARGE_LANGUAGE_MODEL_FILE_NAME);
+    final Path textToVectorModelStore = tmpConfDir.resolve(TEXT_TO_VECTOR_MODEL_FILE_NAME);
 
     if (isPersistent) {
-      textToVectorModelStoreFile = textToVectorStore;
+      textToVectorModelStoreFile = textToVectorModelStore;
+      largeLanguageModelStoreFile = largeLanguageModelStore;
     }
 
-    if (Files.exists(textToVectorStore)) {
+    if (Files.exists(textToVectorModelStore)) {
       if (log.isInfoEnabled()) {
-        log.info("remove model store config file in {}", textToVectorStore.toAbsolutePath());
+        log.info("remove model store config file in {}", textToVectorModelStore.toAbsolutePath());
       }
-      Files.delete(textToVectorStore);
+      Files.delete(textToVectorModelStore);
+    }
+
+    if (Files.exists(largeLanguageModelStore)) {
+
+      if (log.isInfoEnabled()) {
+        log.info(
+            "remove large language model store config file in {}",
+            largeLanguageModelStore.toAbsolutePath());
+      }
+
+      Files.delete(largeLanguageModelStore);
     }
 
     System.setProperty("managed.schema.mutable", "true");
@@ -87,24 +104,50 @@ protected static void afterTest() throws Exception {
     }
   }
 
-  public static void loadModel(String fileName, String status) throws Exception {
+  public static void loadTextToVectorModel(String fileName, String status) throws Exception {
     final URL url =
         TestLanguageModelBase.class.getResource("/textToVectorModelExamples/" + fileName);
     final String multipleModels = Files.readString(Path.of(url.toURI()), StandardCharsets.UTF_8);
 
     assertJPut(
-        ManagedTextToVectorModelStore.REST_END_POINT,
-        multipleModels,
-        "/responseHeader/status==" + status);
+        TextToVectorModelStore.REST_END_POINT, multipleModels, "/responseHeader/status==" + status);
   }
 
-  public static void loadModel(String fileName) throws Exception {
+  public static void loadTextToVectorModel(String fileName) throws Exception {
     final URL url =
         TestLanguageModelBase.class.getResource("/textToVectorModelExamples/" + fileName);
     final String multipleModels = Files.readString(Path.of(url.toURI()), StandardCharsets.UTF_8);
 
+    assertJPut(TextToVectorModelStore.REST_END_POINT, multipleModels, "/responseHeader/status==0");
+  }
+
+  public static void loadLargeLanguageModel(String fileName, String status) throws Exception {
+    final URL url =
+        TestLanguageModelBase.class.getResource("/largeLanguageModelExamples/" + fileName);
+    final String model = Files.readString(Path.of(url.toURI()), StandardCharsets.UTF_8);
+
+    assertJPut(LargeLanguageModelStore.REST_END_POINT, model, "/responseHeader/status==" + status);
+  }
+
+  public static void loadLargeLanguageModel(String fileName, String status, String message)
+      throws Exception {
+    final URL url =
+        TestLanguageModelBase.class.getResource("/largeLanguageModelExamples/" + fileName);
+    final String model = Files.readString(Path.of(url.toURI()), StandardCharsets.UTF_8);
+
     assertJPut(
-        ManagedTextToVectorModelStore.REST_END_POINT, multipleModels, "/responseHeader/status==0");
+        LargeLanguageModelStore.REST_END_POINT,
+        model,
+        "/responseHeader/status==" + status,
+        message);
+  }
+
+  public static void loadLargeLanguageModel(String fileName) throws Exception {
+    final URL url =
+        TestLanguageModelBase.class.getResource("/largeLanguageModelExamples/" + fileName);
+    final String model = Files.readString(Path.of(url.toURI()), StandardCharsets.UTF_8);
+
+    assertJPut(LargeLanguageModelStore.REST_END_POINT, model, "/responseHeader/status==0");
   }
 
   protected static void prepareIndex() throws Exception {
diff --git a/solr/modules/language-models/src/test/org/apache/solr/languagemodels/model/DummyChatModel.java b/solr/modules/language-models/src/test/org/apache/solr/languagemodels/model/DummyChatModel.java
new file mode 100644
index 000000000000..89feb4b59e61
--- /dev/null
+++ b/solr/modules/language-models/src/test/org/apache/solr/languagemodels/model/DummyChatModel.java
@@ -0,0 +1,84 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.languagemodels.model;
+
+import dev.langchain4j.data.message.AiMessage;
+import dev.langchain4j.data.message.UserMessage;
+import dev.langchain4j.model.chat.ChatModel;
+import dev.langchain4j.model.chat.request.ChatRequest;
+import dev.langchain4j.model.chat.response.ChatResponse;
+
+/**
+ * A deterministic {@link ChatModel} for testing. It returns a fixed response string regardless of
+ * the input, allowing tests to assert exact enriched-field values without real API calls.
+ *
+ * 

The builder also exposes {@code unsupported} and {@code ambiguous} setter methods to exercise + * the reflection-based parameter handling in {@link SolrLargeLanguageModel#getInstance}. + */ +public class DummyChatModel implements ChatModel { + + /** The text of the last prompt received by any instance. Useful for test assertions. */ + public static String lastReceivedPrompt; + + private final String response; + + public DummyChatModel(String response) { + this.response = response; + } + + @Override + public ChatResponse chat(ChatRequest chatRequest) { + lastReceivedPrompt = ((UserMessage) chatRequest.messages().getFirst()).singleText(); + return ChatResponse.builder().aiMessage(AiMessage.from(response)).build(); + } + + public static DummyChatModelBuilder builder() { + return new DummyChatModelBuilder(); + } + + public static class DummyChatModelBuilder { + private String response = "dummy response"; + private int intValue; + + public DummyChatModelBuilder() {} + + public DummyChatModelBuilder response(String response) { + this.response = response; + return this; + } + + /** Intentionally has no String overload so the reflection code raises a BAD_REQUEST error. */ + public DummyChatModelBuilder unsupportedParam(Integer input) { + return this; + } + + /** Two overloads make this param "ambiguous": the reflection code should default to String. */ + public DummyChatModelBuilder ambiguousTypeParam(int input) { + this.intValue = input; + return this; + } + + public DummyChatModelBuilder ambiguousTypeParam(String input) { + this.intValue = Integer.valueOf(input); + return this; + } + + public DummyChatModel build() { + return new DummyChatModel(this.response); + } + } +} diff --git a/solr/modules/language-models/src/test/org/apache/solr/languagemodels/model/DummyChatModelTest.java b/solr/modules/language-models/src/test/org/apache/solr/languagemodels/model/DummyChatModelTest.java new file mode 100644 index 000000000000..70cd5d8e7a93 --- /dev/null +++ b/solr/modules/language-models/src/test/org/apache/solr/languagemodels/model/DummyChatModelTest.java @@ -0,0 +1,48 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.languagemodels.model; + +import dev.langchain4j.data.message.UserMessage; +import dev.langchain4j.model.chat.request.ChatRequest; +import org.apache.solr.SolrTestCase; +import org.junit.Test; + +public class DummyChatModelTest extends SolrTestCase { + + @Test + public void constructAndChat() throws Exception { + assertEquals( + "hello world", + new DummyChatModel("hello world") + .chat(ChatRequest.builder().messages(UserMessage.from("any input")).build()) + .aiMessage() + .text()); + assertEquals( + "fixed response", + new DummyChatModel("fixed response") + .chat(ChatRequest.builder().messages(UserMessage.from("another input")).build()) + .aiMessage() + .text()); + assertEquals( + "dummy response", + DummyChatModel.builder() + .build() + .chat(ChatRequest.builder().messages(UserMessage.from("default")).build()) + .aiMessage() + .text()); + } +} diff --git a/solr/modules/language-models/src/test/org/apache/solr/languagemodels/model/ExceptionThrowingChatModel.java b/solr/modules/language-models/src/test/org/apache/solr/languagemodels/model/ExceptionThrowingChatModel.java new file mode 100644 index 000000000000..0873b8ced42b --- /dev/null +++ b/solr/modules/language-models/src/test/org/apache/solr/languagemodels/model/ExceptionThrowingChatModel.java @@ -0,0 +1,48 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.languagemodels.model; + +import dev.langchain4j.model.chat.ChatModel; +import dev.langchain4j.model.chat.request.ChatRequest; +import dev.langchain4j.model.chat.response.ChatResponse; + +/** + * A {@link ChatModel} that always throws a {@link RuntimeException}. Used to verify that {@link + * org.apache.solr.languagemodels.update.processor.DocumentEnrichmentUpdateProcessor} handles + * chat-model failures gracefully (logs the error and continues indexing without the enriched + * field). + */ +public class ExceptionThrowingChatModel implements ChatModel { + + @Override + public ChatResponse chat(ChatRequest chatRequest) { + throw new RuntimeException("Failed to enrich"); + } + + public static ExceptionThrowingChatModelBuilder builder() { + return new ExceptionThrowingChatModelBuilder(); + } + + public static class ExceptionThrowingChatModelBuilder { + + public ExceptionThrowingChatModelBuilder() {} + + public ExceptionThrowingChatModel build() { + return new ExceptionThrowingChatModel(); + } + } +} diff --git a/solr/modules/language-models/src/test/org/apache/solr/languagemodels/search/TextToVectorQParserTest.java b/solr/modules/language-models/src/test/org/apache/solr/languagemodels/search/TextToVectorQParserTest.java index 3f0b0a1f0c6d..ee58c8d1bd3d 100644 --- a/solr/modules/language-models/src/test/org/apache/solr/languagemodels/search/TextToVectorQParserTest.java +++ b/solr/modules/language-models/src/test/org/apache/solr/languagemodels/search/TextToVectorQParserTest.java @@ -28,7 +28,7 @@ public class TextToVectorQParserTest extends TestLanguageModelBase { @BeforeClass public static void init() throws Exception { setupTest("solrconfig-language-models.xml", "schema-language-models.xml", true, false); - loadModel("dummy-model.json"); + loadTextToVectorModel("dummy-model.json"); } @AfterClass diff --git a/solr/modules/language-models/src/test/org/apache/solr/languagemodels/store/rest/TestLargeLanguageModelManager.java b/solr/modules/language-models/src/test/org/apache/solr/languagemodels/store/rest/TestLargeLanguageModelManager.java new file mode 100644 index 000000000000..e0715ce4193e --- /dev/null +++ b/solr/modules/language-models/src/test/org/apache/solr/languagemodels/store/rest/TestLargeLanguageModelManager.java @@ -0,0 +1,246 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.languagemodels.store.rest; + +import org.apache.solr.common.util.NamedList; +import org.apache.solr.core.SolrResourceLoader; +import org.apache.solr.languagemodels.TestLanguageModelBase; +import org.apache.solr.languagemodels.update.processor.factory.DocumentEnrichmentUpdateProcessorFactory; +import org.apache.solr.rest.ManagedResource; +import org.apache.solr.rest.ManagedResourceStorage; +import org.apache.solr.rest.RestManager; +import org.junit.AfterClass; +import org.junit.BeforeClass; +import org.junit.Test; + +public class TestLargeLanguageModelManager extends TestLanguageModelBase { + + @BeforeClass + public static void init() throws Exception { + setupTest("solrconfig-document-enrichment.xml", "schema-language-models.xml", false, false); + } + + @AfterClass + public static void cleanup() throws Exception { + afterTest(); + } + + @Test + public void test() throws Exception { + final SolrResourceLoader loader = new SolrResourceLoader(tmpSolrHome); + + final RestManager.Registry registry = loader.getManagedResourceRegistry(); + assertNotNull( + "Expected a non-null RestManager.Registry from the SolrResourceLoader!", registry); + + final String resourceId = "/schema/mstore1"; + registry.registerManagedResource( + resourceId, LargeLanguageModelStore.class, new DocumentEnrichmentUpdateProcessorFactory()); + + final NamedList initArgs = new NamedList<>(); + + final RestManager restManager = new RestManager(); + restManager.init(loader, initArgs, new ManagedResourceStorage.InMemoryStorageIO()); + + final ManagedResource res = restManager.getManagedResource(resourceId); + assertTrue(res instanceof LargeLanguageModelStore); + assertEquals(res.getResourceId(), resourceId); + } + + @Test + public void testRestManagerEndpoints() throws Exception { + assertJQ("/schema/managed", "/responseHeader/status==0"); + + final String openAiClassName = "dev.langchain4j.model.openai.OpenAiChatModel"; + + // success + String model = + "{ name:\"test-model-1\", class:\"" + + openAiClassName + + "\"," + + "params:{" + + "baseUrl:\"https://api.openai.com/v1\"," + + "apiKey:\"testApiKey2\"," + + "modelName:\"gpt-4o-mini\"," + + "logRequests:true," + + "logResponses:false" + + "}}"; + assertJPut(LargeLanguageModelStore.REST_END_POINT, model, "/responseHeader/status==0"); + + // success — multiple models in one PUT + final String multipleModels = + "[{ name:\"test-model-2\", class:\"" + + openAiClassName + + "\"," + + "params:{baseUrl:\"https://api.openai.com/v1\"," + + "apiKey:\"testApiKey3\"," + + "modelName:\"gpt-4o-mini\"," + + "logRequests:true," + + "logResponses:false" + + "}}\n" + + ",{ name:\"test-model-3\", class:\"" + + openAiClassName + + "\"," + + "params:{baseUrl:\"https://api.openai.com/v1\"," + + "apiKey:\"testApiKey4\"," + + "modelName:\"gpt-4o-mini\"," + + "logRequests:true," + + "logResponses:false" + + "}}]"; + assertJPut(LargeLanguageModelStore.REST_END_POINT, multipleModels, "/responseHeader/status==0"); + + final String qryResult = JQ(LargeLanguageModelStore.REST_END_POINT); + assertTrue( + qryResult.contains("\"name\":\"test-model-1\"") + && qryResult.contains("\"name\":\"test-model-2\"") + && qryResult.contains("\"name\":\"test-model-3\"")); + + assertJQ(LargeLanguageModelStore.REST_END_POINT, "/models/[0]/name=='test-model-1'"); + assertJQ(LargeLanguageModelStore.REST_END_POINT, "/models/[1]/name=='test-model-2'"); + assertJQ(LargeLanguageModelStore.REST_END_POINT, "/models/[2]/name=='test-model-3'"); + restTestHarness.delete(LargeLanguageModelStore.REST_END_POINT + "/test-model-1"); + restTestHarness.delete(LargeLanguageModelStore.REST_END_POINT + "/test-model-2"); + restTestHarness.delete(LargeLanguageModelStore.REST_END_POINT + "/test-model-3"); + assertJQ(LargeLanguageModelStore.REST_END_POINT, "/models==[]"); + } + + @Test + public void loadFieldGenerationModel_openAi_shouldLoadModelConfig() throws Exception { + loadLargeLanguageModel("openai-model.json"); + + final String modelName = "openai-1"; + assertJQ(LargeLanguageModelStore.REST_END_POINT, "/models/[0]/name=='" + modelName + "'"); + assertJQ( + LargeLanguageModelStore.REST_END_POINT, + "/models/[0]/params/baseUrl=='https://api.openai.com/v1'"); + assertJQ(LargeLanguageModelStore.REST_END_POINT, "/models/[0]/params/apiKey=='apiKey-openAI'"); + assertJQ( + LargeLanguageModelStore.REST_END_POINT, "/models/[0]/params/modelName=='gpt-5.4-nano'"); + assertJQ(LargeLanguageModelStore.REST_END_POINT, "/models/[0]/params/timeout==60"); + assertJQ(LargeLanguageModelStore.REST_END_POINT, "/models/[0]/params/logRequests==true"); + assertJQ(LargeLanguageModelStore.REST_END_POINT, "/models/[0]/params/logResponses==true"); + assertJQ(LargeLanguageModelStore.REST_END_POINT, "/models/[0]/params/maxRetries==5"); + + restTestHarness.delete(LargeLanguageModelStore.REST_END_POINT + "/" + modelName); + } + + @Test + public void loadFieldGenerationModel_mistralAi_shouldLoadModelConfig() throws Exception { + loadLargeLanguageModel("mistralai-model.json"); + + final String modelName = "mistralai-1"; + assertJQ(LargeLanguageModelStore.REST_END_POINT, "/models/[0]/name=='" + modelName + "'"); + assertJQ( + LargeLanguageModelStore.REST_END_POINT, + "/models/[0]/params/baseUrl=='https://api.mistral.ai/v1'"); + assertJQ( + LargeLanguageModelStore.REST_END_POINT, "/models/[0]/params/apiKey=='apiKey-mistralAI'"); + assertJQ( + LargeLanguageModelStore.REST_END_POINT, + "/models/[0]/params/modelName=='mistral-small-latest'"); + assertJQ(LargeLanguageModelStore.REST_END_POINT, "/models/[0]/params/timeout==60"); + assertJQ(LargeLanguageModelStore.REST_END_POINT, "/models/[0]/params/logRequests==true"); + assertJQ(LargeLanguageModelStore.REST_END_POINT, "/models/[0]/params/logResponses==true"); + assertJQ(LargeLanguageModelStore.REST_END_POINT, "/models/[0]/params/maxRetries==5"); + + restTestHarness.delete(LargeLanguageModelStore.REST_END_POINT + "/" + modelName); + } + + @Test + public void loadFieldGenerationModel_anthropic_shouldLoadModelConfig() throws Exception { + loadLargeLanguageModel("anthropic-model.json"); + + final String modelName = "anthropic-1"; + assertJQ(LargeLanguageModelStore.REST_END_POINT, "/models/[0]/name=='" + modelName + "'"); + assertJQ( + LargeLanguageModelStore.REST_END_POINT, + "/models/[0]/params/baseUrl=='https://api.anthropic.com/v1'"); + assertJQ( + LargeLanguageModelStore.REST_END_POINT, "/models/[0]/params/apiKey=='apiKey-anthropic'"); + assertJQ( + LargeLanguageModelStore.REST_END_POINT, + "/models/[0]/params/modelName=='claude-3-5-haiku-latest'"); + assertJQ(LargeLanguageModelStore.REST_END_POINT, "/models/[0]/params/timeout==60"); + assertJQ(LargeLanguageModelStore.REST_END_POINT, "/models/[0]/params/logRequests==true"); + assertJQ(LargeLanguageModelStore.REST_END_POINT, "/models/[0]/params/logResponses==true"); + assertJQ(LargeLanguageModelStore.REST_END_POINT, "/models/[0]/params/maxRetries==5"); + + restTestHarness.delete(LargeLanguageModelStore.REST_END_POINT + "/" + modelName); + } + + @Test + public void loadFieldGenerationModel_ollama_shouldLoadModelConfig() throws Exception { + loadLargeLanguageModel("ollama-model.json"); + + final String modelName = "ollama-1"; + assertJQ(LargeLanguageModelStore.REST_END_POINT, "/models/[0]/name=='" + modelName + "'"); + assertJQ( + LargeLanguageModelStore.REST_END_POINT, + "/models/[0]/params/baseUrl=='http://localhost:11434'"); + assertJQ(LargeLanguageModelStore.REST_END_POINT, "/models/[0]/params/modelName=='llama3.2'"); + assertJQ(LargeLanguageModelStore.REST_END_POINT, "/models/[0]/params/timeout==60"); + assertJQ(LargeLanguageModelStore.REST_END_POINT, "/models/[0]/params/logRequests==true"); + assertJQ(LargeLanguageModelStore.REST_END_POINT, "/models/[0]/params/logResponses==true"); + + restTestHarness.delete(LargeLanguageModelStore.REST_END_POINT + "/" + modelName); + } + + @Test + public void loadFieldGenerationModel_gemini_shouldLoadModelConfig() throws Exception { + loadLargeLanguageModel("gemini-model.json"); + + final String modelName = "gemini-1"; + assertJQ(LargeLanguageModelStore.REST_END_POINT, "/models/[0]/name=='" + modelName + "'"); + assertJQ(LargeLanguageModelStore.REST_END_POINT, "/models/[0]/params/apiKey=='apiKey-gemini'"); + assertJQ( + LargeLanguageModelStore.REST_END_POINT, "/models/[0]/params/modelName=='gemini-2.0-flash'"); + assertJQ(LargeLanguageModelStore.REST_END_POINT, "/models/[0]/params/timeout==60"); + assertJQ(LargeLanguageModelStore.REST_END_POINT, "/models/[0]/params/logRequests==true"); + assertJQ(LargeLanguageModelStore.REST_END_POINT, "/models/[0]/params/logResponses==true"); + assertJQ(LargeLanguageModelStore.REST_END_POINT, "/models/[0]/params/maxRetries==5"); + + restTestHarness.delete(LargeLanguageModelStore.REST_END_POINT + "/" + modelName); + } + + @Test + public void loadFieldGenerationModel_dummyUnsupportedParam_shouldRaiseError() throws Exception { + loadLargeLanguageModel( + "dummy-model-unsupported.json", + "400", + "/error/msg=='org.apache.solr.languagemodels.LanguageModelException: Model loading failed for org.apache.solr.languagemodels.model.DummyChatModel'"); + } + + @Test + public void loadFieldGenerationModel_notAChatModel_shouldRaiseError() throws Exception { + loadLargeLanguageModel( + "not-a-chat-model.json", + "400", + "/error/msg=='org.apache.solr.languagemodels.LanguageModelException: Model loading failed for com.example.NonExistentChatModel'"); + } + + @Test + public void loadFieldGenerationModel_dummyAmbiguousParam_shouldDefaultToString() + throws Exception { + loadLargeLanguageModel("dummy-model-ambiguous.json"); + + final String modelName = "dummy-1"; + assertJQ(LargeLanguageModelStore.REST_END_POINT, "/models/[0]/name=='" + modelName + "'"); + assertJQ(LargeLanguageModelStore.REST_END_POINT, "/models/[0]/params/ambiguousTypeParam==10"); + + restTestHarness.delete(LargeLanguageModelStore.REST_END_POINT + "/" + modelName); + } +} diff --git a/solr/modules/language-models/src/test/org/apache/solr/languagemodels/store/rest/TestLargeLanguageModelManagerPersistence.java b/solr/modules/language-models/src/test/org/apache/solr/languagemodels/store/rest/TestLargeLanguageModelManagerPersistence.java new file mode 100644 index 000000000000..9d9e4084dac8 --- /dev/null +++ b/solr/modules/language-models/src/test/org/apache/solr/languagemodels/store/rest/TestLargeLanguageModelManagerPersistence.java @@ -0,0 +1,87 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.languagemodels.store.rest; + +import org.apache.solr.languagemodels.TestLanguageModelBase; +import org.junit.After; +import org.junit.Before; +import org.junit.Test; + +public class TestLargeLanguageModelManagerPersistence extends TestLanguageModelBase { + + @Before + public void init() throws Exception { + setupTest("solrconfig-document-enrichment.xml", "schema-language-models.xml", false, true); + } + + @After + public void cleanup() throws Exception { + afterTest(); + } + + @Test + public void testLargeLanguageModelStorePersistence() throws Exception { + // check store is empty at start + assertJQ(LargeLanguageModelStore.REST_END_POINT, "/models/==[]"); + + // load a model + loadLargeLanguageModel("openai-model.json"); + + final String modelName = "openai-1"; + assertJQ(LargeLanguageModelStore.REST_END_POINT, "/models/[0]/name=='" + modelName + "'"); + assertJQ( + LargeLanguageModelStore.REST_END_POINT, + "/models/[0]/params/baseUrl=='https://api.openai.com/v1'"); + assertJQ(LargeLanguageModelStore.REST_END_POINT, "/models/[0]/params/apiKey=='apiKey-openAI'"); + assertJQ( + LargeLanguageModelStore.REST_END_POINT, "/models/[0]/params/modelName=='gpt-5.4-nano'"); + assertJQ(LargeLanguageModelStore.REST_END_POINT, "/models/[0]/params/timeout==60"); + assertJQ(LargeLanguageModelStore.REST_END_POINT, "/models/[0]/params/logRequests==true"); + assertJQ(LargeLanguageModelStore.REST_END_POINT, "/models/[0]/params/logResponses==true"); + assertJQ(LargeLanguageModelStore.REST_END_POINT, "/models/[0]/params/maxRetries==5"); + + // check persistence after reload + restTestHarness.reload(); + assertJQ(LargeLanguageModelStore.REST_END_POINT, "/models/[0]/name=='" + modelName + "'"); + assertJQ( + LargeLanguageModelStore.REST_END_POINT, + "/models/[0]/params/baseUrl=='https://api.openai.com/v1'"); + assertJQ(LargeLanguageModelStore.REST_END_POINT, "/models/[0]/params/apiKey=='apiKey-openAI'"); + assertJQ( + LargeLanguageModelStore.REST_END_POINT, "/models/[0]/params/modelName=='gpt-5.4-nano'"); + assertJQ(LargeLanguageModelStore.REST_END_POINT, "/models/[0]/params/timeout==60"); + assertJQ(LargeLanguageModelStore.REST_END_POINT, "/models/[0]/params/logRequests==true"); + assertJQ(LargeLanguageModelStore.REST_END_POINT, "/models/[0]/params/logResponses==true"); + assertJQ(LargeLanguageModelStore.REST_END_POINT, "/models/[0]/params/maxRetries==5"); + + // check persistence after restart + restartJetty(); + assertJQ(LargeLanguageModelStore.REST_END_POINT, "/models/[0]/name=='" + modelName + "'"); + assertJQ( + LargeLanguageModelStore.REST_END_POINT, "/models/[0]/params/modelName=='gpt-5.4-nano'"); + + // delete model and verify persistence of the empty state + restTestHarness.delete(LargeLanguageModelStore.REST_END_POINT + "/" + modelName); + assertJQ(LargeLanguageModelStore.REST_END_POINT, "/models/==[]"); + + restTestHarness.reload(); + assertJQ(LargeLanguageModelStore.REST_END_POINT, "/models/==[]"); + + restartJetty(); + assertJQ(LargeLanguageModelStore.REST_END_POINT, "/models/==[]"); + } +} diff --git a/solr/modules/language-models/src/test/org/apache/solr/languagemodels/store/rest/TestLargeLanguageModelStoreInitialization.java b/solr/modules/language-models/src/test/org/apache/solr/languagemodels/store/rest/TestLargeLanguageModelStoreInitialization.java new file mode 100644 index 000000000000..902723b4af9f --- /dev/null +++ b/solr/modules/language-models/src/test/org/apache/solr/languagemodels/store/rest/TestLargeLanguageModelStoreInitialization.java @@ -0,0 +1,50 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.languagemodels.store.rest; + +import org.apache.solr.languagemodels.TestLanguageModelBase; +import org.junit.After; +import org.junit.Test; + +public class TestLargeLanguageModelStoreInitialization extends TestLanguageModelBase { + + @After + public void cleanUp() throws Exception { + afterTest(); + } + + @Test + public void largeLanguageModelStore_whenUpdateRequestComponentConfigured_shouldBeInitialized() + throws Exception { + setupTest("solrconfig-document-enrichment.xml", "schema-language-models.xml", false, false); + + assertJQ(LargeLanguageModelStore.REST_END_POINT, "/responseHeader/status==0"); + assertJQ(LargeLanguageModelStore.REST_END_POINT, "/models==[]"); + } + + @Test + public void largeLanguageModelStore_whenNoComponents_shouldNotBeInitialized() throws Exception { + setupTest( + "solrconfig-language-models-no-components.xml", "schema-language-models.xml", false, false); + assertJQ( + LargeLanguageModelStore.REST_END_POINT, + "/responseHeader/status==400", + "/error/msg=='No REST managed resource registered for path " + + LargeLanguageModelStore.REST_END_POINT + + "'"); + } +} diff --git a/solr/modules/language-models/src/test/org/apache/solr/languagemodels/store/rest/TestModelManagerPersistence.java b/solr/modules/language-models/src/test/org/apache/solr/languagemodels/store/rest/TestModelManagerPersistence.java deleted file mode 100644 index 21f8ba4b6dc9..000000000000 --- a/solr/modules/language-models/src/test/org/apache/solr/languagemodels/store/rest/TestModelManagerPersistence.java +++ /dev/null @@ -1,109 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.solr.languagemodels.store.rest; - -import org.apache.solr.languagemodels.TestLanguageModelBase; -import org.junit.After; -import org.junit.Before; -import org.junit.Test; - -public class TestModelManagerPersistence extends TestLanguageModelBase { - - @Before - public void init() throws Exception { - setupTest("solrconfig-language-models.xml", "schema-language-models.xml", false, true); - } - - @After - public void cleanup() throws Exception { - afterTest(); - } - - @Test - public void testModelStorePersistence() throws Exception { - // check models are empty - assertJQ(ManagedTextToVectorModelStore.REST_END_POINT, "/models/==[]"); - - // load models and features from files - loadModel("cohere-model.json"); - - final String modelName = "cohere-1"; - assertJQ(ManagedTextToVectorModelStore.REST_END_POINT, "/models/[0]/name=='" + modelName + "'"); - assertJQ( - ManagedTextToVectorModelStore.REST_END_POINT, - "/models/[0]/params/baseUrl=='https://api.cohere.ai/v1/'"); - assertJQ( - ManagedTextToVectorModelStore.REST_END_POINT, "/models/[0]/params/apiKey=='apiKey-cohere'"); - assertJQ( - ManagedTextToVectorModelStore.REST_END_POINT, - "/models/[0]/params/modelName=='embed-english-light-v3.0'"); - assertJQ( - ManagedTextToVectorModelStore.REST_END_POINT, - "/models/[0]/params/inputType=='search_document'"); - assertJQ(ManagedTextToVectorModelStore.REST_END_POINT, "/models/[0]/params/timeout==60"); - assertJQ(ManagedTextToVectorModelStore.REST_END_POINT, "/models/[0]/params/logRequests==true"); - assertJQ(ManagedTextToVectorModelStore.REST_END_POINT, "/models/[0]/params/logResponses==true"); - - // check persistence after reload - restTestHarness.reload(); - assertJQ(ManagedTextToVectorModelStore.REST_END_POINT, "/models/[0]/name=='" + modelName + "'"); - assertJQ( - ManagedTextToVectorModelStore.REST_END_POINT, - "/models/[0]/params/baseUrl=='https://api.cohere.ai/v1/'"); - assertJQ( - ManagedTextToVectorModelStore.REST_END_POINT, "/models/[0]/params/apiKey=='apiKey-cohere'"); - assertJQ( - ManagedTextToVectorModelStore.REST_END_POINT, - "/models/[0]/params/modelName=='embed-english-light-v3.0'"); - assertJQ( - ManagedTextToVectorModelStore.REST_END_POINT, - "/models/[0]/params/inputType=='search_document'"); - assertJQ(ManagedTextToVectorModelStore.REST_END_POINT, "/models/[0]/params/timeout==60"); - assertJQ(ManagedTextToVectorModelStore.REST_END_POINT, "/models/[0]/params/logRequests==true"); - assertJQ(ManagedTextToVectorModelStore.REST_END_POINT, "/models/[0]/params/logResponses==true"); - - // check persistence after restart - restartJetty(); - assertJQ(ManagedTextToVectorModelStore.REST_END_POINT, "/models/[0]/name=='" + modelName + "'"); - assertJQ( - ManagedTextToVectorModelStore.REST_END_POINT, - "/models/[0]/params/baseUrl=='https://api.cohere.ai/v1/'"); - assertJQ( - ManagedTextToVectorModelStore.REST_END_POINT, "/models/[0]/params/apiKey=='apiKey-cohere'"); - assertJQ( - ManagedTextToVectorModelStore.REST_END_POINT, - "/models/[0]/params/modelName=='embed-english-light-v3.0'"); - assertJQ( - ManagedTextToVectorModelStore.REST_END_POINT, - "/models/[0]/params/inputType=='search_document'"); - assertJQ(ManagedTextToVectorModelStore.REST_END_POINT, "/models/[0]/params/timeout==60"); - assertJQ(ManagedTextToVectorModelStore.REST_END_POINT, "/models/[0]/params/logRequests==true"); - assertJQ(ManagedTextToVectorModelStore.REST_END_POINT, "/models/[0]/params/logResponses==true"); - - // delete loaded models and features - restTestHarness.delete(ManagedTextToVectorModelStore.REST_END_POINT + "/" + modelName); - assertJQ(ManagedTextToVectorModelStore.REST_END_POINT, "/models/==[]"); - - // check persistence after reload - restTestHarness.reload(); - assertJQ(ManagedTextToVectorModelStore.REST_END_POINT, "/models/==[]"); - - // check persistence after restart - restartJetty(); - assertJQ(ManagedTextToVectorModelStore.REST_END_POINT, "/models/==[]"); - } -} diff --git a/solr/modules/language-models/src/test/org/apache/solr/languagemodels/store/rest/TestModelManager.java b/solr/modules/language-models/src/test/org/apache/solr/languagemodels/store/rest/TestTextToVectorModelManager.java similarity index 50% rename from solr/modules/language-models/src/test/org/apache/solr/languagemodels/store/rest/TestModelManager.java rename to solr/modules/language-models/src/test/org/apache/solr/languagemodels/store/rest/TestTextToVectorModelManager.java index b3dd51a59445..0dba83a8d97b 100644 --- a/solr/modules/language-models/src/test/org/apache/solr/languagemodels/store/rest/TestModelManager.java +++ b/solr/modules/language-models/src/test/org/apache/solr/languagemodels/store/rest/TestTextToVectorModelManager.java @@ -27,7 +27,7 @@ import org.junit.BeforeClass; import org.junit.Test; -public class TestModelManager extends TestLanguageModelBase { +public class TestTextToVectorModelManager extends TestLanguageModelBase { @BeforeClass public static void init() throws Exception { @@ -49,7 +49,7 @@ public void test() throws Exception { final String resourceId = "/schema/mstore1"; registry.registerManagedResource( - resourceId, ManagedTextToVectorModelStore.class, new TextToVectorQParserPlugin()); + resourceId, TextToVectorModelStore.class, new TextToVectorQParserPlugin()); final NamedList initArgs = new NamedList<>(); @@ -57,7 +57,7 @@ public void test() throws Exception { restManager.init(loader, initArgs, new ManagedResourceStorage.InMemoryStorageIO()); final ManagedResource res = restManager.getManagedResource(resourceId); - assertTrue(res instanceof ManagedTextToVectorModelStore); + assertTrue(res instanceof TextToVectorModelStore); assertEquals(res.getResourceId(), resourceId); } @@ -70,7 +70,7 @@ public void testRestManagerEndpoints() throws Exception { // Add models String model = "{ \"name\":\"testModel1\", \"class\":\"" + cohereModelClassName + "\"}"; // fails since it does not have params - assertJPut(ManagedTextToVectorModelStore.REST_END_POINT, model, "/responseHeader/status==400"); + assertJPut(TextToVectorModelStore.REST_END_POINT, model, "/responseHeader/status==400"); // success model = "{ name:\"testModel2\", class:\"" @@ -84,7 +84,7 @@ public void testRestManagerEndpoints() throws Exception { + "logRequests:true," + "logResponses:false" + "}}"; - assertJPut(ManagedTextToVectorModelStore.REST_END_POINT, model, "/responseHeader/status==0"); + assertJPut(TextToVectorModelStore.REST_END_POINT, model, "/responseHeader/status==0"); // success final String multipleModels = "[{ name:\"testModel3\", class:\"" @@ -107,122 +107,116 @@ public void testRestManagerEndpoints() throws Exception { + "logRequests:true," + "logResponses:false" + "}}]"; - assertJPut( - ManagedTextToVectorModelStore.REST_END_POINT, multipleModels, "/responseHeader/status==0"); - final String qryResult = JQ(ManagedTextToVectorModelStore.REST_END_POINT); + assertJPut(TextToVectorModelStore.REST_END_POINT, multipleModels, "/responseHeader/status==0"); + final String qryResult = JQ(TextToVectorModelStore.REST_END_POINT); assertTrue( qryResult.contains("\"name\":\"testModel2\"") && qryResult.contains("\"name\":\"testModel3\"") && qryResult.contains("\"name\":\"testModel4\"")); - assertJQ(ManagedTextToVectorModelStore.REST_END_POINT, "/models/[0]/name=='testModel2'"); - assertJQ(ManagedTextToVectorModelStore.REST_END_POINT, "/models/[1]/name=='testModel3'"); - assertJQ(ManagedTextToVectorModelStore.REST_END_POINT, "/models/[2]/name=='testModel4'"); - restTestHarness.delete(ManagedTextToVectorModelStore.REST_END_POINT + "/testModel2"); - restTestHarness.delete(ManagedTextToVectorModelStore.REST_END_POINT + "/testModel3"); - restTestHarness.delete(ManagedTextToVectorModelStore.REST_END_POINT + "/testModel4"); - assertJQ(ManagedTextToVectorModelStore.REST_END_POINT, "/models==[]'"); + assertJQ(TextToVectorModelStore.REST_END_POINT, "/models/[0]/name=='testModel2'"); + assertJQ(TextToVectorModelStore.REST_END_POINT, "/models/[1]/name=='testModel3'"); + assertJQ(TextToVectorModelStore.REST_END_POINT, "/models/[2]/name=='testModel4'"); + restTestHarness.delete(TextToVectorModelStore.REST_END_POINT + "/testModel2"); + restTestHarness.delete(TextToVectorModelStore.REST_END_POINT + "/testModel3"); + restTestHarness.delete(TextToVectorModelStore.REST_END_POINT + "/testModel4"); + assertJQ(TextToVectorModelStore.REST_END_POINT, "/models==[]'"); } @Test - public void loadModel_cohere_shouldLoadModelConfig() throws Exception { - loadModel("cohere-model.json"); + public void loadTextToVectorModel_cohere_shouldLoadModelConfig() throws Exception { + loadTextToVectorModel("cohere-model.json"); final String modelName = "cohere-1"; - assertJQ(ManagedTextToVectorModelStore.REST_END_POINT, "/models/[0]/name=='" + modelName + "'"); + assertJQ(TextToVectorModelStore.REST_END_POINT, "/models/[0]/name=='" + modelName + "'"); assertJQ( - ManagedTextToVectorModelStore.REST_END_POINT, + TextToVectorModelStore.REST_END_POINT, "/models/[0]/params/baseUrl=='https://api.cohere.ai/v1/'"); + assertJQ(TextToVectorModelStore.REST_END_POINT, "/models/[0]/params/apiKey=='apiKey-cohere'"); assertJQ( - ManagedTextToVectorModelStore.REST_END_POINT, "/models/[0]/params/apiKey=='apiKey-cohere'"); - assertJQ( - ManagedTextToVectorModelStore.REST_END_POINT, + TextToVectorModelStore.REST_END_POINT, "/models/[0]/params/modelName=='embed-english-light-v3.0'"); assertJQ( - ManagedTextToVectorModelStore.REST_END_POINT, - "/models/[0]/params/inputType=='search_document'"); - assertJQ(ManagedTextToVectorModelStore.REST_END_POINT, "/models/[0]/params/timeout==60"); - assertJQ(ManagedTextToVectorModelStore.REST_END_POINT, "/models/[0]/params/logRequests==true"); - assertJQ(ManagedTextToVectorModelStore.REST_END_POINT, "/models/[0]/params/logResponses==true"); + TextToVectorModelStore.REST_END_POINT, "/models/[0]/params/inputType=='search_document'"); + assertJQ(TextToVectorModelStore.REST_END_POINT, "/models/[0]/params/timeout==60"); + assertJQ(TextToVectorModelStore.REST_END_POINT, "/models/[0]/params/logRequests==true"); + assertJQ(TextToVectorModelStore.REST_END_POINT, "/models/[0]/params/logResponses==true"); - restTestHarness.delete(ManagedTextToVectorModelStore.REST_END_POINT + "/" + modelName); + restTestHarness.delete(TextToVectorModelStore.REST_END_POINT + "/" + modelName); } @Test - public void loadModel_openAi_shouldLoadModelConfig() throws Exception { - loadModel("openai-model.json"); + public void loadTextToVectorModel_openAi_shouldLoadModelConfig() throws Exception { + loadTextToVectorModel("openai-model.json"); final String modelName = "openai-1"; - assertJQ(ManagedTextToVectorModelStore.REST_END_POINT, "/models/[0]/name=='" + modelName + "'"); + assertJQ(TextToVectorModelStore.REST_END_POINT, "/models/[0]/name=='" + modelName + "'"); assertJQ( - ManagedTextToVectorModelStore.REST_END_POINT, + TextToVectorModelStore.REST_END_POINT, "/models/[0]/params/baseUrl=='https://api.openai.com/v1'"); + assertJQ(TextToVectorModelStore.REST_END_POINT, "/models/[0]/params/apiKey=='apiKey-openAI'"); assertJQ( - ManagedTextToVectorModelStore.REST_END_POINT, "/models/[0]/params/apiKey=='apiKey-openAI'"); - assertJQ( - ManagedTextToVectorModelStore.REST_END_POINT, + TextToVectorModelStore.REST_END_POINT, "/models/[0]/params/modelName=='text-embedding-3-small'"); - assertJQ(ManagedTextToVectorModelStore.REST_END_POINT, "/models/[0]/params/timeout==60"); - assertJQ(ManagedTextToVectorModelStore.REST_END_POINT, "/models/[0]/params/logRequests==true"); - assertJQ(ManagedTextToVectorModelStore.REST_END_POINT, "/models/[0]/params/logResponses==true"); - assertJQ(ManagedTextToVectorModelStore.REST_END_POINT, "/models/[0]/params/maxRetries==5"); + assertJQ(TextToVectorModelStore.REST_END_POINT, "/models/[0]/params/timeout==60"); + assertJQ(TextToVectorModelStore.REST_END_POINT, "/models/[0]/params/logRequests==true"); + assertJQ(TextToVectorModelStore.REST_END_POINT, "/models/[0]/params/logResponses==true"); + assertJQ(TextToVectorModelStore.REST_END_POINT, "/models/[0]/params/maxRetries==5"); - restTestHarness.delete(ManagedTextToVectorModelStore.REST_END_POINT + "/" + modelName); + restTestHarness.delete(TextToVectorModelStore.REST_END_POINT + "/" + modelName); } @Test - public void loadModel_mistralAi_shouldLoadModelConfig() throws Exception { - loadModel("mistralai-model.json"); + public void loadTextToVectorModel_mistralAi_shouldLoadModelConfig() throws Exception { + loadTextToVectorModel("mistralai-model.json"); final String modelName = "mistralai-1"; - assertJQ(ManagedTextToVectorModelStore.REST_END_POINT, "/models/[0]/name=='" + modelName + "'"); + assertJQ(TextToVectorModelStore.REST_END_POINT, "/models/[0]/name=='" + modelName + "'"); assertJQ( - ManagedTextToVectorModelStore.REST_END_POINT, + TextToVectorModelStore.REST_END_POINT, "/models/[0]/params/baseUrl=='https://api.mistral.ai/v1'"); assertJQ( - ManagedTextToVectorModelStore.REST_END_POINT, - "/models/[0]/params/apiKey=='apiKey-mistralAI'"); + TextToVectorModelStore.REST_END_POINT, "/models/[0]/params/apiKey=='apiKey-mistralAI'"); assertJQ( - ManagedTextToVectorModelStore.REST_END_POINT, - "/models/[0]/params/modelName=='mistral-embed'"); - assertJQ(ManagedTextToVectorModelStore.REST_END_POINT, "/models/[0]/params/timeout==60"); - assertJQ(ManagedTextToVectorModelStore.REST_END_POINT, "/models/[0]/params/logRequests==true"); - assertJQ(ManagedTextToVectorModelStore.REST_END_POINT, "/models/[0]/params/logResponses==true"); - assertJQ(ManagedTextToVectorModelStore.REST_END_POINT, "/models/[0]/params/maxRetries==5"); - - restTestHarness.delete(ManagedTextToVectorModelStore.REST_END_POINT + "/" + modelName); + TextToVectorModelStore.REST_END_POINT, "/models/[0]/params/modelName=='mistral-embed'"); + assertJQ(TextToVectorModelStore.REST_END_POINT, "/models/[0]/params/timeout==60"); + assertJQ(TextToVectorModelStore.REST_END_POINT, "/models/[0]/params/logRequests==true"); + assertJQ(TextToVectorModelStore.REST_END_POINT, "/models/[0]/params/logResponses==true"); + assertJQ(TextToVectorModelStore.REST_END_POINT, "/models/[0]/params/maxRetries==5"); + + restTestHarness.delete(TextToVectorModelStore.REST_END_POINT + "/" + modelName); } @Test - public void loadModel_huggingface_shouldLoadModelConfig() throws Exception { - loadModel("huggingface-model.json"); + public void loadTextToVectorModel_huggingface_shouldLoadModelConfig() throws Exception { + loadTextToVectorModel("huggingface-model.json"); final String modelName = "huggingface-1"; - assertJQ(ManagedTextToVectorModelStore.REST_END_POINT, "/models/[0]/name=='" + modelName + "'"); + assertJQ(TextToVectorModelStore.REST_END_POINT, "/models/[0]/name=='" + modelName + "'"); assertJQ( - ManagedTextToVectorModelStore.REST_END_POINT, + TextToVectorModelStore.REST_END_POINT, "/models/[0]/params/accessToken=='apiKey-huggingface'"); assertJQ( - ManagedTextToVectorModelStore.REST_END_POINT, + TextToVectorModelStore.REST_END_POINT, "/models/[0]/params/modelId=='sentence-transformers/all-MiniLM-L6-v2'"); - restTestHarness.delete(ManagedTextToVectorModelStore.REST_END_POINT + "/" + modelName); + restTestHarness.delete(TextToVectorModelStore.REST_END_POINT + "/" + modelName); } @Test - public void loadModel_dummyUnsupportedParam_shouldRaiseError() throws Exception { - loadModel("dummy-model-unsupported.json", "400"); + public void loadTextToVectorModel_dummyUnsupportedParam_shouldRaiseError() throws Exception { + loadTextToVectorModel("dummy-model-unsupported.json", "400"); } @Test - public void loadModel_dummyAmbiguousParam_shouldDefaultToString() throws Exception { - loadModel("dummy-model-ambiguous.json"); + public void loadTextToVectorModel_dummyAmbiguousParam_shouldDefaultToString() throws Exception { + loadTextToVectorModel("dummy-model-ambiguous.json"); final String modelName = "dummy-1"; - assertJQ(ManagedTextToVectorModelStore.REST_END_POINT, "/models/[0]/name=='" + modelName + "'"); - assertJQ(ManagedTextToVectorModelStore.REST_END_POINT, "/models/[0]/params/ambiguous==10"); + assertJQ(TextToVectorModelStore.REST_END_POINT, "/models/[0]/name=='" + modelName + "'"); + assertJQ(TextToVectorModelStore.REST_END_POINT, "/models/[0]/params/ambiguous==10"); - restTestHarness.delete(ManagedTextToVectorModelStore.REST_END_POINT + "/" + modelName); + restTestHarness.delete(TextToVectorModelStore.REST_END_POINT + "/" + modelName); } } diff --git a/solr/modules/language-models/src/test/org/apache/solr/languagemodels/store/rest/TestTextToVectorModelManagerPersistence.java b/solr/modules/language-models/src/test/org/apache/solr/languagemodels/store/rest/TestTextToVectorModelManagerPersistence.java new file mode 100644 index 000000000000..5a5189408ef0 --- /dev/null +++ b/solr/modules/language-models/src/test/org/apache/solr/languagemodels/store/rest/TestTextToVectorModelManagerPersistence.java @@ -0,0 +1,103 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.languagemodels.store.rest; + +import org.apache.solr.languagemodels.TestLanguageModelBase; +import org.junit.After; +import org.junit.Before; +import org.junit.Test; + +public class TestTextToVectorModelManagerPersistence extends TestLanguageModelBase { + + @Before + public void init() throws Exception { + setupTest("solrconfig-language-models.xml", "schema-language-models.xml", false, true); + } + + @After + public void cleanup() throws Exception { + afterTest(); + } + + @Test + public void testTextToVectorModelStorePersistence() throws Exception { + // check models are empty + assertJQ(TextToVectorModelStore.REST_END_POINT, "/models/==[]"); + + // load models and features from files + loadTextToVectorModel("cohere-model.json"); + + final String modelName = "cohere-1"; + assertJQ(TextToVectorModelStore.REST_END_POINT, "/models/[0]/name=='" + modelName + "'"); + assertJQ( + TextToVectorModelStore.REST_END_POINT, + "/models/[0]/params/baseUrl=='https://api.cohere.ai/v1/'"); + assertJQ(TextToVectorModelStore.REST_END_POINT, "/models/[0]/params/apiKey=='apiKey-cohere'"); + assertJQ( + TextToVectorModelStore.REST_END_POINT, + "/models/[0]/params/modelName=='embed-english-light-v3.0'"); + assertJQ( + TextToVectorModelStore.REST_END_POINT, "/models/[0]/params/inputType=='search_document'"); + assertJQ(TextToVectorModelStore.REST_END_POINT, "/models/[0]/params/timeout==60"); + assertJQ(TextToVectorModelStore.REST_END_POINT, "/models/[0]/params/logRequests==true"); + assertJQ(TextToVectorModelStore.REST_END_POINT, "/models/[0]/params/logResponses==true"); + + // check persistence after reload + restTestHarness.reload(); + assertJQ(TextToVectorModelStore.REST_END_POINT, "/models/[0]/name=='" + modelName + "'"); + assertJQ( + TextToVectorModelStore.REST_END_POINT, + "/models/[0]/params/baseUrl=='https://api.cohere.ai/v1/'"); + assertJQ(TextToVectorModelStore.REST_END_POINT, "/models/[0]/params/apiKey=='apiKey-cohere'"); + assertJQ( + TextToVectorModelStore.REST_END_POINT, + "/models/[0]/params/modelName=='embed-english-light-v3.0'"); + assertJQ( + TextToVectorModelStore.REST_END_POINT, "/models/[0]/params/inputType=='search_document'"); + assertJQ(TextToVectorModelStore.REST_END_POINT, "/models/[0]/params/timeout==60"); + assertJQ(TextToVectorModelStore.REST_END_POINT, "/models/[0]/params/logRequests==true"); + assertJQ(TextToVectorModelStore.REST_END_POINT, "/models/[0]/params/logResponses==true"); + + // check persistence after restart + restartJetty(); + assertJQ(TextToVectorModelStore.REST_END_POINT, "/models/[0]/name=='" + modelName + "'"); + assertJQ( + TextToVectorModelStore.REST_END_POINT, + "/models/[0]/params/baseUrl=='https://api.cohere.ai/v1/'"); + assertJQ(TextToVectorModelStore.REST_END_POINT, "/models/[0]/params/apiKey=='apiKey-cohere'"); + assertJQ( + TextToVectorModelStore.REST_END_POINT, + "/models/[0]/params/modelName=='embed-english-light-v3.0'"); + assertJQ( + TextToVectorModelStore.REST_END_POINT, "/models/[0]/params/inputType=='search_document'"); + assertJQ(TextToVectorModelStore.REST_END_POINT, "/models/[0]/params/timeout==60"); + assertJQ(TextToVectorModelStore.REST_END_POINT, "/models/[0]/params/logRequests==true"); + assertJQ(TextToVectorModelStore.REST_END_POINT, "/models/[0]/params/logResponses==true"); + + // delete loaded models and features + restTestHarness.delete(TextToVectorModelStore.REST_END_POINT + "/" + modelName); + assertJQ(TextToVectorModelStore.REST_END_POINT, "/models/==[]"); + + // check persistence after reload + restTestHarness.reload(); + assertJQ(TextToVectorModelStore.REST_END_POINT, "/models/==[]"); + + // check persistence after restart + restartJetty(); + assertJQ(TextToVectorModelStore.REST_END_POINT, "/models/==[]"); + } +} diff --git a/solr/modules/language-models/src/test/org/apache/solr/languagemodels/store/rest/TestManagedLanguageModelStoreInitialization.java b/solr/modules/language-models/src/test/org/apache/solr/languagemodels/store/rest/TestTextToVectorModelStoreInitialization.java similarity index 67% rename from solr/modules/language-models/src/test/org/apache/solr/languagemodels/store/rest/TestManagedLanguageModelStoreInitialization.java rename to solr/modules/language-models/src/test/org/apache/solr/languagemodels/store/rest/TestTextToVectorModelStoreInitialization.java index 4b414a18de6e..5a51375e4f46 100644 --- a/solr/modules/language-models/src/test/org/apache/solr/languagemodels/store/rest/TestManagedLanguageModelStoreInitialization.java +++ b/solr/modules/language-models/src/test/org/apache/solr/languagemodels/store/rest/TestTextToVectorModelStoreInitialization.java @@ -20,7 +20,7 @@ import org.junit.After; import org.junit.Test; -public class TestManagedLanguageModelStoreInitialization extends TestLanguageModelBase { +public class TestTextToVectorModelStoreInitialization extends TestLanguageModelBase { @After public void cleanUp() throws Exception { @@ -28,7 +28,7 @@ public void cleanUp() throws Exception { } @Test - public void managedModelStore_whenUpdateRequestComponentConfigured_shouldBeInitialized() + public void textToVectorModelStore_whenUpdateRequestComponentConfigured_shouldBeInitialized() throws Exception { setupTest( "solrconfig-language-models-update-request-processor-only.xml", @@ -36,12 +36,12 @@ public void managedModelStore_whenUpdateRequestComponentConfigured_shouldBeIniti false, false); - assertJQ(ManagedTextToVectorModelStore.REST_END_POINT, "/responseHeader/status==0"); - assertJQ(ManagedTextToVectorModelStore.REST_END_POINT, "/models==[]"); + assertJQ(TextToVectorModelStore.REST_END_POINT, "/responseHeader/status==0"); + assertJQ(TextToVectorModelStore.REST_END_POINT, "/models==[]"); } @Test - public void managedModelStore_whenQueryParserComponentConfigured_shouldBeInitialized() + public void textToVectorModelStore_whenQueryParserComponentConfigured_shouldBeInitialized() throws Exception { setupTest( "solrconfig-language-models-query-parser-only.xml", @@ -49,19 +49,19 @@ public void managedModelStore_whenQueryParserComponentConfigured_shouldBeInitial false, false); - assertJQ(ManagedTextToVectorModelStore.REST_END_POINT, "/responseHeader/status==0"); - assertJQ(ManagedTextToVectorModelStore.REST_END_POINT, "/models==[]"); + assertJQ(TextToVectorModelStore.REST_END_POINT, "/responseHeader/status==0"); + assertJQ(TextToVectorModelStore.REST_END_POINT, "/models==[]"); } @Test - public void managedModelStore_whenNoComponents_shouldNotBeInitialized() throws Exception { + public void textToVectorModelStore_whenNoComponents_shouldNotBeInitialized() throws Exception { setupTest( "solrconfig-language-models-no-components.xml", "schema-language-models.xml", false, false); assertJQ( - ManagedTextToVectorModelStore.REST_END_POINT, + TextToVectorModelStore.REST_END_POINT, "/responseHeader/status==400", "/error/msg=='No REST managed resource registered for path " - + ManagedTextToVectorModelStore.REST_END_POINT + + TextToVectorModelStore.REST_END_POINT + "'"); } } diff --git a/solr/modules/language-models/src/test/org/apache/solr/languagemodels/update/processor/DocumentEnrichmentUpdateProcessorTest.java b/solr/modules/language-models/src/test/org/apache/solr/languagemodels/update/processor/DocumentEnrichmentUpdateProcessorTest.java new file mode 100644 index 000000000000..d4662bdf66e4 --- /dev/null +++ b/solr/modules/language-models/src/test/org/apache/solr/languagemodels/update/processor/DocumentEnrichmentUpdateProcessorTest.java @@ -0,0 +1,808 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.languagemodels.update.processor; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import org.apache.solr.client.solrj.RemoteSolrException; +import org.apache.solr.client.solrj.SolrServerException; +import org.apache.solr.client.solrj.request.SolrQuery; +import org.apache.solr.client.solrj.request.UpdateRequest; +import org.apache.solr.common.SolrInputDocument; +import org.apache.solr.common.util.Utils; +import org.apache.solr.languagemodels.TestLanguageModelBase; +import org.apache.solr.languagemodels.model.DummyChatModel; +import org.apache.solr.languagemodels.store.rest.LargeLanguageModelStore; +import org.junit.After; +import org.junit.AfterClass; +import org.junit.BeforeClass; +import org.junit.Test; + +public class DocumentEnrichmentUpdateProcessorTest extends TestLanguageModelBase { + + @BeforeClass + public static void init() throws Exception { + setupTest("solrconfig-document-enrichment.xml", "schema-language-models.xml", false, false); + } + + @AfterClass + public static void cleanup() throws Exception { + afterTest(); + } + + private String loadedModelId; + + @After + public void afterEachTest() throws Exception { + if (loadedModelId != null) { + restTestHarness.delete(LargeLanguageModelStore.REST_END_POINT + "/" + loadedModelId); + loadedModelId = null; + } + } + + private void loadTestLargeLanguageModel(String fileName, String modelId) throws Exception { + loadLargeLanguageModel(fileName); + loadedModelId = modelId; + } + + private void loadDummyLargeLanguageModel(String modelId, String response) throws Exception { + Map model = new LinkedHashMap<>(); + model.put("class", "org.apache.solr.languagemodels.model.DummyChatModel"); + model.put("name", modelId); + model.put("params", Map.of("response", response)); + assertJPut( + LargeLanguageModelStore.REST_END_POINT, + Utils.toJSONString(model), + "/responseHeader/status==0"); + loadedModelId = modelId; + } + + @Test + public void processAdd_inputField_shouldEnrichInputField() throws Exception { + loadTestLargeLanguageModel("dummy-model.json", "dummy-1"); + + addWithChain( + sdoc("id", "99", "string_field", "Vegeta is the saiyan prince."), "documentEnrichment"); + addWithChain( + sdoc("id", "98", "string_field", "Kakaroth is a saiyan grown up on planet Earth."), + "documentEnrichment"); + assertU(commit()); + + final SolrQuery query = getEnrichmentQuery("enriched_field"); + + assertJQ( + "/query" + query.toQueryString(), + "/response/numFound==2]", + "/response/docs/[0]/id=='99'", + "/response/docs/[0]/enriched_field=='enriched content'", + "/response/docs/[1]/id=='98'", + "/response/docs/[1]/enriched_field=='enriched content'"); + } + + /* + This test looks for the 'dummy-1' model, but such model is not loaded — + the model store is empty, so the update fails. + */ + @Test + public void processAdd_modelNotFound_shouldThrowException() { + RuntimeException thrown = + assertThrows( + "model not found should throw an exception", + RemoteSolrException.class, + () -> + addWithChain( + sdoc("id", "99", "string_field", "Vegeta is the saiyan prince."), + "documentEnrichment")); + assertTrue( + thrown + .getMessage() + .contains( + "The model configured in the Update Request Processor 'dummy-1' can't be found in the store: /schema/large-language-model-store")); + } + + @Test + public void processAdd_emptyInputField_shouldLogAndIndexWithNoEnrichedField() throws Exception { + loadTestLargeLanguageModel("dummy-model.json", "dummy-1"); + addWithChain(sdoc("id", "99", "string_field", ""), "documentEnrichment"); + addWithChain( + sdoc("id", "98", "string_field", "Vegeta is the saiyan prince."), "documentEnrichment"); + assertU(commit()); + + final SolrQuery query = getEnrichmentQuery("enriched_field"); + + assertJQ( + "/query" + query.toQueryString(), + "/response/numFound==2]", + "/response/docs/[0]/id=='99'", + "!/response/docs/[0]/enriched_field==", // no enriched field for doc 99 + "/response/docs/[1]/id=='98'", + "/response/docs/[1]/enriched_field=='enriched content'"); + } + + @Test + public void processAdd_nullInputField_shouldLogAndIndexWithNoEnrichedField() throws Exception { + loadTestLargeLanguageModel("dummy-model.json", "dummy-1"); + addWithChain( + sdoc("id", "99", "string_field", "Vegeta is the saiyan prince."), "documentEnrichment"); + assertU(adoc("id", "98")); // no string_field + assertU(commit()); + + final SolrQuery query = getEnrichmentQuery("enriched_field"); + + assertJQ( + "/query" + query.toQueryString(), + "/response/numFound==2]", + "/response/docs/[0]/id=='99'", + "/response/docs/[0]/enriched_field=='enriched content'", + "/response/docs/[1]/id=='98'", + "!/response/docs/[1]/enriched_field=="); // no enriched field for doc 98 + } + + @Test + public void processAdd_failingEnrichment_shouldLogAndIndexWithNoEnrichedField() throws Exception { + loadTestLargeLanguageModel("exception-throwing-model.json", "exception-throwing-model"); + addWithChain( + sdoc("id", "99", "string_field", "Vegeta is the saiyan prince."), + "failingDocumentEnrichment"); + addWithChain( + sdoc("id", "98", "string_field", "Kakaroth is a saiyan grown up on planet Earth."), + "failingDocumentEnrichment"); + assertU(commit()); + + final SolrQuery query = getEnrichmentQuery("enriched_field"); + + assertJQ( + "/query" + query.toQueryString(), + "/response/numFound==2]", + "/response/docs/[0]/id=='99'", + "!/response/docs/[0]/enriched_field==", // no enriched field for doc 99 + "/response/docs/[1]/id=='98'", + "!/response/docs/[1]/enriched_field=="); // no enriched field for doc 98 + } + + @Test + public void processAtomicUpdate_shouldTriggerEnrichmentAndFetchTheStoredContent() + throws Exception { + // Verifies that when using a processor chain configured for partial updates + // (i.e., DistributedUpdateProcessorFactory before DocumentEnrichmentUpdateProcessorFactory), + // the system correctly retrieves the stored value of string_field and generates the + // enriched content for the document. + loadTestLargeLanguageModel("dummy-model.json", "dummy-1"); + assertU(adoc("id", "99", "string_field", "Vegeta is the saiyan prince.")); + assertU(adoc("id", "98", "string_field", "Kakaroth is a saiyan grown up on planet Earth.")); + assertU(commit()); + + SolrInputDocument atomicDoc = new SolrInputDocument(); + atomicDoc.setField("id", "99"); + atomicDoc.setField("enriched", Map.of("set", true)); + addWithChain(atomicDoc, "documentEnrichmentForPartialUpdates"); + assertU(commit()); + + final SolrQuery query = getEnrichmentQuery("enriched_field"); + + assertJQ( + "/query" + query.toQueryString(), + "/response/numFound==2]", + "/response/docs/[0]/id=='99'", + "/response/docs/[0]/enriched_field=='enriched content'", + "/response/docs/[1]/id=='98'", + "!/response/docs/[1]/enriched_field==" // no enriched field for document 98 + ); + } + + @Test + public void processAtomicUpdate_shouldReplaceExistingEnrichedFieldNotAppend() throws Exception { + // Verifies that when a document already contains an enriched_field and string_field is + // modified via atomic update, the enriched content is recomputed and replaces the previous + // value rather than being appended. + loadTestLargeLanguageModel("dummy-model.json", "dummy-1"); + assertU( + adoc( + "id", + "99", + "string_field", + "Vegeta is the saiyan prince.", + "enriched_field", + "old content")); + addWithChain( + sdoc("id", "98", "string_field", "Kakaroth is a saiyan grown up on planet Earth."), + "documentEnrichment"); + assertU(commit()); + + final SolrQuery query = getEnrichmentQuery("enriched_field"); + + assertJQ( + "/query" + query.toQueryString(), + "/response/numFound==2]", + "/response/docs/[0]/id=='99'", + "/response/docs/[0]/enriched_field=='old content'", + "/response/docs/[1]/id=='98'", + "/response/docs/[1]/enriched_field=='enriched content'"); + + SolrInputDocument atomicDoc = new SolrInputDocument(); + atomicDoc.setField("id", "99"); + atomicDoc.setField( + "string_field", Map.of("set", "Vegeta is the saiyan prince from the Dragon Ball series.")); + addWithChain(atomicDoc, "documentEnrichmentForPartialUpdates"); + assertU(commit()); + + assertJQ( + "/query" + query.toQueryString(), + "/response/numFound==2]", + "/response/docs/[0]/id=='99'", + "/response/docs/[0]/enriched_field=='enriched content'", + "/response/docs/[1]/id=='98'", + "/response/docs/[1]/enriched_field=='enriched content'"); + } + + // --- multi-field tests --- + + @Test + public void processAdd_arrInputField_shouldEnrichDocument() throws Exception { + // Verifies that in solrconfig behaves identically to + // multiple entries — both are accepted by removeConfigArgs. + loadTestLargeLanguageModel("dummy-model.json", "dummy-1"); + + DummyChatModel.lastReceivedPrompt = null; + + addWithChain( + sdoc( + "id", + "99", + "string_field", + "Vegeta is the saiyan prince.", + "body_field", + "He is very proud."), + "documentEnrichmentArrInputField"); + addWithChain( + sdoc( + "id", + "98", + "string_field", + "Kakaroth is a saiyan.", + "body_field", + "He grew up on Earth."), + "documentEnrichmentArrInputField"); + assertU(commit()); + + final SolrQuery query = getEnrichmentQuery("enriched_field"); + + assertJQ( + "/query" + query.toQueryString(), + "/response/numFound==2]", + "/response/docs/[0]/id=='99'", + "/response/docs/[0]/enriched_field=='enriched content'", + "/response/docs/[1]/id=='98'", + "/response/docs/[1]/enriched_field=='enriched content'"); + } + + @Test + public void processAdd_multipleInputFields_allPresent_shouldEnrichDocumentWithBothFields() + throws Exception { + loadTestLargeLanguageModel("dummy-model.json", "dummy-1"); + + DummyChatModel.lastReceivedPrompt = null; + + addWithChain( + sdoc( + "id", + "99", + "string_field", + "Vegeta is the saiyan prince.", + "body_field", + "He is very proud."), + "documentEnrichmentMultiField"); + addWithChain( + sdoc( + "id", + "98", + "string_field", + "Kakaroth is a saiyan.", + "body_field", + "He grew up on Earth."), + "documentEnrichmentMultiField"); + assertU(commit()); + + final SolrQuery query = getEnrichmentQuery("enriched_field"); + + assertJQ( + "/query" + query.toQueryString(), + "/response/numFound==2]", + "/response/docs/[0]/id=='99'", + "/response/docs/[0]/enriched_field=='enriched content'", + "/response/docs/[1]/id=='98'", + "/response/docs/[1]/enriched_field=='enriched content'"); + + // Verify both placeholders were substituted + assertEquals( + "Title: Kakaroth is a saiyan.. Body: He grew up on Earth..", + DummyChatModel.lastReceivedPrompt); + } + + @Test + public void processAdd_multipleInputFields_firstFieldNull_shouldSkipEnrichment() + throws Exception { + loadTestLargeLanguageModel("dummy-model.json", "dummy-1"); + + addWithChain( + sdoc("id", "99", "body_field", "He is very proud."), // string_field absent + "documentEnrichmentMultiField"); + addWithChain( + sdoc("id", "98", "body_field", "He is very jealous."), // string_field absent + "documentEnrichmentMultiField"); + assertU(commit()); + + final SolrQuery query = getEnrichmentQuery("enriched_field"); + + assertJQ( + "/query" + query.toQueryString(), + "/response/numFound==2]", + "/response/docs/[0]/id=='99'", + "!/response/docs/[0]/enriched_field==", + "/response/docs/[1]/id=='98'", + "!/response/docs/[1]/enriched_field=="); + } + + @Test + public void processAdd_multipleInputFields_secondFieldEmpty_shouldSkipEnrichment() + throws Exception { + loadTestLargeLanguageModel("dummy-model.json", "dummy-1"); + + addWithChain( + sdoc("id", "99", "string_field", "Vegeta is the saiyan prince.", "body_field", ""), + "documentEnrichmentMultiField"); + addWithChain( + sdoc("id", "98", "string_field", "Goku is the best saiyan.", "body_field", ""), + "documentEnrichmentMultiField"); + assertU(commit()); + + final SolrQuery query = getEnrichmentQuery("enriched_field"); + + assertJQ( + "/query" + query.toQueryString(), + "/response/numFound==2]", + "/response/docs/[0]/id=='99'", + "!/response/docs/[0]/enriched_field==", + "/response/docs/[1]/id=='98'", + "!/response/docs/[1]/enriched_field=="); + } + + @Test + public void processAdd_multipleInputFields_bothFieldsAbsent_shouldSkipEnrichment() + throws Exception { + loadTestLargeLanguageModel("dummy-model.json", "dummy-1"); + + addWithChain(sdoc("id", "99"), "documentEnrichmentMultiField"); + addWithChain(sdoc("id", "98"), "documentEnrichmentMultiField"); + assertU(commit()); + + final SolrQuery query = getEnrichmentQuery("enriched_field"); + + assertJQ( + "/query" + query.toQueryString(), + "/response/numFound==2]", + "/response/docs/[0]/id=='99'", + "!/response/docs/[0]/enriched_field==", + "/response/docs/[1]/id=='98'", + "!/response/docs/[1]/enriched_field=="); + } + + @Test + public void processAdd_multipleInputFields_failingModel_shouldLogAndSkipEnrichment() + throws Exception { + loadTestLargeLanguageModel("exception-throwing-model.json", "exception-throwing-model"); + + addWithChain( + sdoc( + "id", + "99", + "string_field", + "Vegeta is the saiyan prince.", + "body_field", + "He is very proud."), + "failingDocumentEnrichmentMultiField"); + addWithChain( + sdoc( + "id", + "98", + "string_field", + "Kakaroth is a saiyan.", + "body_field", + "He grew up on Earth."), + "failingDocumentEnrichmentMultiField"); + assertU(commit()); + + final SolrQuery query = getEnrichmentQuery("enriched_field"); + + assertJQ( + "/query" + query.toQueryString(), + "/response/numFound==2]", + "/response/docs/[0]/id=='99'", + "!/response/docs/[0]/enriched_field==", + "/response/docs/[1]/id=='98'", + "!/response/docs/[1]/enriched_field=="); + } + + @Test + public void processAdd_multivaluedInputField_shouldInterpolateCollectionAndEnrichDocument() + throws Exception { + // When an input field is multivalued, SolrInputField.getValue() returns the Collection, + // whose toString() is used for prompt interpolation (e.g. "[tag1, tag2, tag3]"). + // Enrichment must proceed — the collection is non-null and non-empty. + loadTestLargeLanguageModel("dummy-model.json", "dummy-1"); + + DummyChatModel.lastReceivedPrompt = null; + + addWithChain(sdoc("id", "98", "tags_field", "tag1"), "documentEnrichmentMultivaluedInput"); + + SolrInputDocument doc = new SolrInputDocument(); + doc.addField("id", "99"); + doc.addField("tags_field", "tag1"); + doc.addField("tags_field", "tag2"); + doc.addField("tags_field", "tag3"); + addWithChain(doc, "documentEnrichmentMultivaluedInput"); + assertU(commit()); + + final SolrQuery query = getEnrichmentQuery("enriched_field"); + + assertJQ( + "/query" + query.toQueryString(), + "/response/numFound==2]", + "/response/docs/[0]/id=='99'", + "/response/docs/[0]/enriched_field=='enriched content'", + "/response/docs/[1]/id=='98'", + "/response/docs/[1]/enriched_field=='enriched content'"); + + assertEquals("Classify these tags: [tag1, tag2, tag3]", DummyChatModel.lastReceivedPrompt); + } + + @Test + public void processAdd_multivaluedStringOutputField_emptyInput_shouldSkipEnrichment() + throws Exception { + loadDummyLargeLanguageModel("dummy-multivalued-1", "{\"value\": [\"tag1\", \"tag2\"]}"); + + addWithChain(sdoc("id", "99", "string_field", ""), "documentEnrichmentMultivaluedString"); + addWithChain(sdoc("id", "98", "string_field", ""), "documentEnrichmentMultivaluedString"); + assertU(commit()); + + final SolrQuery query = getEnrichmentQuery("enriched_field_multi"); + + assertJQ( + "/query" + query.toQueryString(), + "/response/numFound==2]", + "/response/docs/[0]/id=='99'", + "!/response/docs/[0]/enriched_field_multi==", + "/response/docs/[1]/id=='98'", + "!/response/docs/[1]/enriched_field_multi=="); + } + + // --- typed single-valued output field tests --- + + @Test + public void processAdd_singleTypedOutputField_shouldPopulateValue() throws Exception { + record TypedCase( + String modelId, String response, String chain, String field, String expectedValue) {} + List typedCases = + List.of( + new TypedCase( + "dummy-long", + "{\"value\": 3000000000}", + "documentEnrichmentSingleLong", + "output_long", + "3000000000"), + new TypedCase( + "dummy-int", "{\"value\": 7}", "documentEnrichmentSingleInt", "output_int", "7"), + new TypedCase( + "dummy-float", + "{\"value\": 1.5}", + "documentEnrichmentSingleFloat", + "output_float", + "1.5"), + new TypedCase( + "dummy-double", + "{\"value\": 1e308}", + "documentEnrichmentSingleDouble", + "output_double", + "1e308"), + new TypedCase( + "dummy-boolean", + "{\"value\": true}", + "documentEnrichmentSingleBoolean", + "output_boolean", + "true"), + new TypedCase( + "dummy-date", + "{\"value\": \"2024-01-15T00:00:00Z\"}", + "documentEnrichmentSingleDate", + "output_date", + "'2024-01-15T00:00:00Z'")); + + for (TypedCase typedCase : typedCases) { + loadDummyLargeLanguageModel(typedCase.modelId(), typedCase.response()); + addWithChain(sdoc("id", "99", "string_field", "some content"), typedCase.chain()); + addWithChain(sdoc("id", "98", "string_field", "other content"), typedCase.chain()); + assertU(commit()); + + final SolrQuery query = getEnrichmentQuery(typedCase.field()); + assertJQ( + "/query" + query.toQueryString(), + "/response/numFound==2]", + "/response/docs/[0]/id=='99'", + "/response/docs/[0]/" + typedCase.field() + "==" + typedCase.expectedValue(), + "/response/docs/[1]/id=='98'", + "/response/docs/[1]/" + typedCase.field() + "==" + typedCase.expectedValue()); + + restTestHarness.delete(LargeLanguageModelStore.REST_END_POINT + "/" + typedCase.modelId()); + loadedModelId = null; + } + } + + // --- typed multivalued output field tests --- + + @Test + public void processAdd_multivaluedTypedOutputField_shouldPopulateAllValues() throws Exception { + record TypeCaseMulti( + String modelId, String response, String chain, String field, List expectedValues) {} + List typedCaseMultis = + List.of( + new TypeCaseMulti( + "dummy-multivalued-1", + "{\"value\": [\"tag1\", \"tag2\"]}", + "documentEnrichmentMultivaluedString", + "enriched_field_multi", + List.of("'tag1'", "'tag2'")), + new TypeCaseMulti( + "dummy-long-multi", + "{\"value\": [1000000000, 2000000000, 3000000000]}", + "documentEnrichmentMultivaluedLong", + "output_long_multi", + List.of("1000000000", "2000000000", "3000000000")), + new TypeCaseMulti( + "dummy-int-multi", + "{\"value\": [1, 2]}", + "documentEnrichmentMultivaluedInt", + "output_int_multi", + List.of("1", "2")), + new TypeCaseMulti( + "dummy-float-multi", + "{\"value\": [1.5, 2.5]}", + "documentEnrichmentMultivaluedFloat", + "output_float_multi", + List.of("1.5", "2.5")), + new TypeCaseMulti( + "dummy-double-multi", + "{\"value\": [1e308, 1.1e308]}", + "documentEnrichmentMultivaluedDouble", + "output_double_multi", + List.of("1e308", "1.1e308")), + new TypeCaseMulti( + "dummy-boolean-multi", + "{\"value\": [true, false]}", + "documentEnrichmentMultivaluedBoolean", + "output_boolean_multi", + List.of("true", "false")), + new TypeCaseMulti( + "dummy-date-multi", + "{\"value\": [\"2024-01-15T00:00:00Z\", \"2025-06-30T00:00:00Z\"]}", + "documentEnrichmentMultivaluedDate", + "output_date_multi", + List.of("'2024-01-15T00:00:00Z'", "'2025-06-30T00:00:00Z'"))); + + for (TypeCaseMulti typedCase : typedCaseMultis) { + loadDummyLargeLanguageModel(typedCase.modelId(), typedCase.response()); + addWithChain(sdoc("id", "99", "string_field", "some content"), typedCase.chain()); + addWithChain(sdoc("id", "98", "string_field", "other content"), typedCase.chain()); + assertU(commit()); + + final SolrQuery query = getEnrichmentQuery(typedCase.field()); + List assertions = new ArrayList<>(); + assertions.add("/response/numFound==2]"); + for (int docIdx = 0; docIdx < 2; docIdx++) { + String docId = docIdx == 0 ? "'99'" : "'98'"; + assertions.add("/response/docs/[" + docIdx + "]/id==" + docId); + for (int i = 0; i < typedCase.expectedValues().size(); i++) { + assertions.add( + "/response/docs/[" + + docIdx + + "]/" + + typedCase.field() + + "/[" + + i + + "]==" + + typedCase.expectedValues().get(i)); + } + } + assertJQ("/query" + query.toQueryString(), assertions.toArray(new String[0])); + + restTestHarness.delete(LargeLanguageModelStore.REST_END_POINT + "/" + typedCase.modelId()); + loadedModelId = null; + } + } + + // --- LLM response contract violation tests --- + + @Test + public void processAdd_llmResponseMissingValueKey_shouldLogAndIndexWithNoEnrichedField() + throws Exception { + // Model returns valid JSON but without the required "value" key + loadDummyLargeLanguageModel("dummy-1", "{\"result\": \"some value\"}"); + + addWithChain( + sdoc("id", "99", "string_field", "Vegeta is the saiyan prince."), "documentEnrichment"); + addWithChain( + sdoc("id", "98", "string_field", "Kakaroth is a saiyan grown up on planet Earth."), + "documentEnrichment"); + assertU(commit()); + + final SolrQuery query = getEnrichmentQuery("enriched_field"); + + assertJQ( + "/query" + query.toQueryString(), + "/response/numFound==2]", + "/response/docs/[0]/id=='99'", + "!/response/docs/[0]/enriched_field==", + "/response/docs/[1]/id=='98'", + "!/response/docs/[1]/enriched_field=="); + } + + @Test + public void processAdd_llmResponseMalformedJson_shouldLogAndIndexWithNoEnrichedField() + throws Exception { + // Model returns a plain string that cannot be parsed as JSON + loadDummyLargeLanguageModel("dummy-1", "not valid json at all"); + + addWithChain( + sdoc("id", "99", "string_field", "Vegeta is the saiyan prince."), "documentEnrichment"); + addWithChain( + sdoc("id", "98", "string_field", "Kakaroth is a saiyan grown up on planet Earth."), + "documentEnrichment"); + assertU(commit()); + + final SolrQuery query = getEnrichmentQuery("enriched_field"); + + assertJQ( + "/query" + query.toQueryString(), + "/response/numFound==2]", + "/response/docs/[0]/id=='99'", + "!/response/docs/[0]/enriched_field==", + "/response/docs/[1]/id=='98'", + "!/response/docs/[1]/enriched_field=="); + } + + // --- field type incompatibility tests --- + + @Test + public void processAdd_dateOutputField_malformedDateString_shouldFailToIndex() throws Exception { + // DatePointField requires a full ISO-8601 datetime string (e.g. "2024-01-15T00:00:00Z"). + // A date-only string like "2024-01-15" (missing time component) cannot be parsed by + // DateMathParser and causes the update to fail. + // Unlike model exceptions (caught inside processAdd), this error occurs during Solr field + // conversion in super.processAdd() and propagates as a RemoteSolrException to the caller. + loadDummyLargeLanguageModel("dummy-date", "{\"value\": \"2024-01-15\"}"); + + assertThrows( + "date string without time component should fail to index", + RemoteSolrException.class, + () -> + addWithChain( + sdoc("id", "99", "string_field", "some content"), "documentEnrichmentSingleDate")); + } + + @Test + public void processAdd_intOutputField_decimalResponse_shouldLogAndIndexWithNoEnrichedField() + throws Exception { + loadDummyLargeLanguageModel("dummy-int", "{\"value\": 3.7}"); + + addWithChain(sdoc("id", "99", "string_field", "some content"), "documentEnrichmentSingleInt"); + addWithChain(sdoc("id", "98", "string_field", "other content"), "documentEnrichmentSingleInt"); + assertU(commit()); + + final SolrQuery query = getEnrichmentQuery("output_int"); + assertJQ( + "/query" + query.toQueryString(), + "/response/numFound==2]", + "/response/docs/[0]/id=='99'", + "!/response/docs/[0]/output_int==", + "/response/docs/[1]/id=='98'", + "!/response/docs/[1]/output_int=="); + } + + @Test + public void processAdd_doubleOutputField_intResponse_shouldConvertAndIndex() throws Exception { + loadDummyLargeLanguageModel("dummy-double", "{\"value\": 3}"); + + addWithChain( + sdoc("id", "99", "string_field", "some content"), "documentEnrichmentSingleDouble"); + addWithChain( + sdoc("id", "98", "string_field", "other content"), "documentEnrichmentSingleDouble"); + assertU(commit()); + + final SolrQuery query = getEnrichmentQuery("output_double"); + assertJQ( + "/query" + query.toQueryString(), + "/response/numFound==2]", + "/response/docs/[0]/id=='99'", + "/response/docs/[0]/output_double==3.0", + "/response/docs/[1]/id=='98'", + "/response/docs/[1]/output_double==3.0"); + } + + @Test + public void processAdd_floatOutputField_doubleResponse_shouldRoundToFloatPrecision() + throws Exception { + loadDummyLargeLanguageModel("dummy-float", "{\"value\": 3.141592653589793}"); + + addWithChain(sdoc("id", "99", "string_field", "some content"), "documentEnrichmentSingleFloat"); + addWithChain( + sdoc("id", "98", "string_field", "other content"), "documentEnrichmentSingleFloat"); + assertU(commit()); + + final SolrQuery query = getEnrichmentQuery("output_float"); + assertJQ( + "/query" + query.toQueryString(), + "/response/numFound==2]", + "/response/docs/[0]/id=='99'", + "/response/docs/[0]/output_float==3.1415927", + "/response/docs/[1]/id=='98'", + "/response/docs/[1]/output_float==3.1415927"); + } + + // --- multivalued output field / scalar response test --- + + @Test + public void processAdd_multivaluedOutputField_singleValuedLlmResponse_shouldSkipEnrichment() + throws Exception { + loadDummyLargeLanguageModel("dummy-multivalued-1", "{\"value\": \"a single string\"}"); + + addWithChain( + sdoc("id", "99", "string_field", "Vegeta is the saiyan prince."), + "documentEnrichmentMultivaluedString"); + addWithChain( + sdoc("id", "98", "string_field", "Kakaroth is a saiyan grown up on planet Earth."), + "documentEnrichmentMultivaluedString"); + assertU(commit()); + + final SolrQuery query = getEnrichmentQuery("enriched_field_multi"); + + assertJQ( + "/query" + query.toQueryString(), + "/response/numFound==2]", + "/response/docs/[0]/id=='99'", + "!/response/docs/[0]/enriched_field_multi==", + "/response/docs/[1]/id=='98'", + "!/response/docs/[1]/enriched_field_multi=="); + } + + private SolrQuery getEnrichmentQuery(String enrichedFieldName) { + final SolrQuery query = new SolrQuery(); + query.setQuery("*:*"); + query.add("fl", "id," + enrichedFieldName); + query.add("sort", "id desc"); + return query; + } + + void addWithChain(SolrInputDocument document, String updateChain) + throws SolrServerException, IOException { + UpdateRequest req = new UpdateRequest(); + req.add(document); + req.setParam("update.chain", updateChain); + solrTestRule.getSolrClient("collection1").request(req); + } +} diff --git a/solr/modules/language-models/src/test/org/apache/solr/languagemodels/update/processor/TextToVectorUpdateProcessorTest.java b/solr/modules/language-models/src/test/org/apache/solr/languagemodels/update/processor/TextToVectorUpdateProcessorTest.java index 75a9f206f14e..2b948786d2af 100644 --- a/solr/modules/language-models/src/test/org/apache/solr/languagemodels/update/processor/TextToVectorUpdateProcessorTest.java +++ b/solr/modules/language-models/src/test/org/apache/solr/languagemodels/update/processor/TextToVectorUpdateProcessorTest.java @@ -24,7 +24,7 @@ import org.apache.solr.client.solrj.request.UpdateRequest; import org.apache.solr.common.SolrInputDocument; import org.apache.solr.languagemodels.TestLanguageModelBase; -import org.apache.solr.languagemodels.store.rest.ManagedTextToVectorModelStore; +import org.apache.solr.languagemodels.store.rest.TextToVectorModelStore; import org.junit.After; import org.junit.AfterClass; import org.junit.BeforeClass; @@ -44,14 +44,14 @@ public static void cleanup() throws Exception { @After public void afterEachTest() throws Exception { - restTestHarness.delete(ManagedTextToVectorModelStore.REST_END_POINT + "/dummy-1"); + restTestHarness.delete(TextToVectorModelStore.REST_END_POINT + "/dummy-1"); restTestHarness.delete( - ManagedTextToVectorModelStore.REST_END_POINT + "/exception-throwing-model"); // clean + TextToVectorModelStore.REST_END_POINT + "/exception-throwing-model"); // clean } @Test public void processAdd_inputField_shouldVectoriseInputField() throws Exception { - loadModel("dummy-model.json"); // preparation + loadTextToVectorModel("dummy-model.json"); // preparation addWithChain(sdoc("id", "99", "_text_", "Vegeta is the saiyan prince."), "textToVector"); addWithChain( @@ -69,7 +69,7 @@ public void processAdd_inputField_shouldVectoriseInputField() throws Exception { "/response/docs/[1]/id=='98'", "/response/docs/[1]/vector==[1.0, 2.0, 3.0, 4.0]"); - restTestHarness.delete(ManagedTextToVectorModelStore.REST_END_POINT + "/dummy-1"); // clean up + restTestHarness.delete(TextToVectorModelStore.REST_END_POINT + "/dummy-1"); // clean up } private SolrQuery getSolrQuery() { @@ -102,7 +102,7 @@ public void processAdd_modelNotFound_shouldThrowException() { @Test public void processAdd_emptyInputField_shouldLogAndIndexWithNoVector() throws Exception { - loadModel("dummy-model.json"); // preparation + loadTextToVectorModel("dummy-model.json"); // preparation addWithChain(sdoc("id", "99", "_text_", ""), "textToVector"); addWithChain(sdoc("id", "98", "_text_", "Vegeta is the saiyan prince."), "textToVector"); assertU(commit()); @@ -117,12 +117,12 @@ public void processAdd_emptyInputField_shouldLogAndIndexWithNoVector() throws Ex "/response/docs/[1]/id=='98'", "/response/docs/[1]/vector==[1.0, 2.0, 3.0, 4.0]"); - restTestHarness.delete(ManagedTextToVectorModelStore.REST_END_POINT + "/dummy-1"); // clean up + restTestHarness.delete(TextToVectorModelStore.REST_END_POINT + "/dummy-1"); // clean up } @Test public void processAdd_nullInputField_shouldLogAndIndexWithNoVector() throws Exception { - loadModel("dummy-model.json"); // preparation + loadTextToVectorModel("dummy-model.json"); // preparation addWithChain(sdoc("id", "99", "_text_", "Vegeta is the saiyan prince."), "textToVector"); assertU(adoc("id", "98")); assertU(commit()); @@ -137,12 +137,12 @@ public void processAdd_nullInputField_shouldLogAndIndexWithNoVector() throws Exc "/response/docs/[1]/id=='98'", "!/response/docs/[1]/vector=="); // no vector field for the document 98 - restTestHarness.delete(ManagedTextToVectorModelStore.REST_END_POINT + "/dummy-1"); // clean up + restTestHarness.delete(TextToVectorModelStore.REST_END_POINT + "/dummy-1"); // clean up } @Test public void processAdd_failingVectorisation_shouldLogAndIndexWithNoVector() throws Exception { - loadModel("exception-throwing-model.json"); // preparation + loadTextToVectorModel("exception-throwing-model.json"); // preparation addWithChain(sdoc("id", "99", "_text_", "Vegeta is the saiyan prince."), "failingTextToVector"); addWithChain( sdoc("id", "98", "_text_", "Kakaroth is a saiyan grown up on planet Earth."), @@ -160,7 +160,7 @@ public void processAdd_failingVectorisation_shouldLogAndIndexWithNoVector() thro "!/response/docs/[1]/vector=="); // no vector field for the document 98 restTestHarness.delete( - ManagedTextToVectorModelStore.REST_END_POINT + "/exception-throwing-model"); // clean up + TextToVectorModelStore.REST_END_POINT + "/exception-throwing-model"); // clean up } @Test @@ -170,7 +170,7 @@ public void processAtomicUpdate_shouldTriggerTheVectorizationAndFetchTheStoredCo // (i.e., the UpdateRequestProcessor is placed before the TextToVector processor), // the system correctly retrieves the stored value of the input field (string_field) // and generates the vector for the document. - loadModel("dummy-model.json"); + loadTextToVectorModel("dummy-model.json"); assertU(adoc("id", "99", "string_field", "Vegeta is the saiyan prince.")); assertU(adoc("id", "98", "string_field", "Kakaroth is a saiyan grown up on planet Earth.")); assertU(commit()); @@ -192,7 +192,7 @@ public void processAtomicUpdate_shouldTriggerTheVectorizationAndFetchTheStoredCo "/response/docs/[1]/id=='99'", "/response/docs/[1]/vector==[1.0, 2.0, 3.0, 4.0]"); - restTestHarness.delete(ManagedTextToVectorModelStore.REST_END_POINT + "/dummy-1"); + restTestHarness.delete(TextToVectorModelStore.REST_END_POINT + "/dummy-1"); } @Test @@ -201,7 +201,7 @@ public void processAtomicUpdate_shouldReplaceExistingVectorNotAppend() throws Ex // is // modified using an atomic update, the vector is recomputed and replaces the previous one. It // ensures that the system does not append or merge vector values. - loadModel("dummy-model.json"); + loadTextToVectorModel("dummy-model.json"); addWithChain( sdoc("id", "99", "string_field", "Vegeta is the saiyan prince."), "textToVectorStoredInputField"); @@ -228,7 +228,7 @@ public void processAtomicUpdate_shouldReplaceExistingVectorNotAppend() throws Ex "/response/docs/[1]/id=='98'", "/response/docs/[1]/vector==[1.0, 2.0, 3.0, 4.0]"); - restTestHarness.delete(ManagedTextToVectorModelStore.REST_END_POINT + "/dummy-1"); + restTestHarness.delete(TextToVectorModelStore.REST_END_POINT + "/dummy-1"); } void addWithChain(SolrInputDocument document, String updateChain) diff --git a/solr/modules/language-models/src/test/org/apache/solr/languagemodels/update/processor/factory/DocumentEnrichmentUpdateProcessorFactoryTest.java b/solr/modules/language-models/src/test/org/apache/solr/languagemodels/update/processor/factory/DocumentEnrichmentUpdateProcessorFactoryTest.java new file mode 100644 index 000000000000..d99b7ea6e877 --- /dev/null +++ b/solr/modules/language-models/src/test/org/apache/solr/languagemodels/update/processor/factory/DocumentEnrichmentUpdateProcessorFactoryTest.java @@ -0,0 +1,450 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.languagemodels.update.processor.factory; + +import dev.langchain4j.model.chat.request.ResponseFormatType; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; +import org.apache.solr.common.SolrException; +import org.apache.solr.common.params.ModifiableSolrParams; +import org.apache.solr.common.util.NamedList; +import org.apache.solr.core.SolrCore; +import org.apache.solr.languagemodels.TestLanguageModelBase; +import org.apache.solr.languagemodels.model.SolrLargeLanguageModel; +import org.apache.solr.languagemodels.store.rest.LargeLanguageModelStore; +import org.apache.solr.request.SolrQueryRequestBase; +import org.apache.solr.update.processor.UpdateRequestProcessor; +import org.junit.After; +import org.junit.AfterClass; +import org.junit.Before; +import org.junit.BeforeClass; +import org.junit.Test; + +public class DocumentEnrichmentUpdateProcessorFactoryTest extends TestLanguageModelBase { + + @BeforeClass + public static void init() throws Exception { + setupTest("solrconfig-document-enrichment.xml", "schema-language-models.xml", false, false); + } + + @AfterClass + public static void cleanup() throws Exception { + afterTest(); + } + + SolrCore collection1; + + @Before + public void setup() { + collection1 = solrTestRule.getCoreContainer().getCore("collection1"); + } + + @After + public void after() { + collection1.close(); + } + + @Test + public void init_fullArgs_shouldInitAllParams() { + DocumentEnrichmentUpdateProcessorFactory factory = + initializeUpdateProcessorFactory(List.of("string_field"), "enriched_field", null, "model1"); + + assertEquals(List.of("string_field"), factory.getInputFields()); + assertEquals("enriched_field", factory.getOutputField()); + assertEquals("Summarize: {string_field}.", factory.getPrompt()); + assertEquals("model1", factory.getModelName()); + } + + @Test + public void init_multipleInputFields_shouldInitAllFields() { + DocumentEnrichmentUpdateProcessorFactory factory = + initializeUpdateProcessorFactory( + List.of("string_field", "body_field"), "enriched_field", null, "model1"); + + assertEquals(List.of("string_field", "body_field"), factory.getInputFields()); + } + + @Test + public void init_arrInputField_shouldInitAllFields() { + NamedList args = new NamedList<>(); + args.add("inputField", new ArrayList<>(List.of("string_field", "body_field"))); + args.add("outputField", "enriched_field"); + args.add("prompt", "Title: {string_field}. Body: {body_field}."); + args.add("model", "model1"); + + DocumentEnrichmentUpdateProcessorFactory factory = + new DocumentEnrichmentUpdateProcessorFactory(); + factory.init(args); + + assertEquals(List.of("string_field", "body_field"), factory.getInputFields()); + } + + // when exception are thrown at init time, the helper function cannot be used + @Test + public void init_noInputField_shouldThrowExceptionWithDetailedMessage() { + NamedList args = new NamedList<>(); + args.add("outputField", "enriched_field"); + args.add("prompt", "Summarize: {string_field}."); + args.add("model", "model1"); + + DocumentEnrichmentUpdateProcessorFactory factory = + new DocumentEnrichmentUpdateProcessorFactory(); + + SolrException e = assertThrows(SolrException.class, () -> factory.init(args)); + assertEquals("At least one 'inputField' must be provided", e.getMessage()); + } + + @Test + public void init_nullOutputField_shouldThrowExceptionWithDetailedMessage() { + NamedList args = new NamedList<>(); + args.add("inputField", "string_field"); + args.add("prompt", "Summarize: {string_field}"); + args.add("model", "model1"); + + DocumentEnrichmentUpdateProcessorFactory factory = + new DocumentEnrichmentUpdateProcessorFactory(); + + SolrException e = assertThrows(SolrException.class, () -> factory.init(args)); + assertEquals("Exactly one 'outputField' must be provided", e.getMessage()); + } + + @Test + public void init_moreThanOneOutputField_shouldThrowExceptionWithDetailedMessage() { + NamedList args = new NamedList<>(); + args.add("inputField", "string_field"); + args.add("outputField", "enriched_field"); + args.add("outputField", "body_field"); + args.add("prompt", "Summarize: {string_field}"); + args.add("model", "model1"); + + DocumentEnrichmentUpdateProcessorFactory factory = + new DocumentEnrichmentUpdateProcessorFactory(); + SolrException e = assertThrows(SolrException.class, () -> factory.init(args)); + assertEquals( + "Only one 'outputField' can be provided, but found: [enriched_field, body_field]", + e.getMessage()); + } + + @Test + public void init_neitherPromptNorPromptFile_shouldThrowExceptionWithDetailedMessage() { + NamedList args = new NamedList<>(); + args.add("inputField", "string_field"); + args.add("outputField", "enriched_field"); + args.add("model", "model1"); + + DocumentEnrichmentUpdateProcessorFactory factory = + new DocumentEnrichmentUpdateProcessorFactory(); + + SolrException e = assertThrows(SolrException.class, () -> factory.init(args)); + assertEquals("Either 'prompt' or 'promptFile' must be provided", e.getMessage()); + } + + @Test + public void init_bothPromptAndPromptFile_shouldThrowExceptionWithDetailedMessage() { + NamedList args = new NamedList<>(); + args.add("inputField", "string_field"); + args.add("outputField", "enriched_field"); + args.add("prompt", "Summarize: {string_field}"); + args.add("promptFile", "prompt.txt"); + args.add("model", "model1"); + + DocumentEnrichmentUpdateProcessorFactory factory = + new DocumentEnrichmentUpdateProcessorFactory(); + + SolrException e = assertThrows(SolrException.class, () -> factory.init(args)); + assertEquals("Only one of 'prompt' or 'promptFile' can be provided, not both", e.getMessage()); + } + + @Test + public void + init_promptMissingPlaceholderForDeclaredField_shouldThrowExceptionWithDetailedMessage() { + NamedList args = new NamedList<>(); + args.add("inputField", "string_field"); + args.add("outputField", "enriched_field"); + args.add("prompt", "Summarize:"); + args.add("model", "model1"); + + DocumentEnrichmentUpdateProcessorFactory factory = + new DocumentEnrichmentUpdateProcessorFactory(); + + SolrException e = assertThrows(SolrException.class, () -> factory.init(args)); + assertEquals( + "prompt is missing placeholders for inputField(s): [string_field]", e.getMessage()); + } + + @Test + public void + init_promptMissingOnePlaceholderOfMultipleFields_shouldThrowExceptionWithDetailedMessage() { + NamedList args = new NamedList<>(); + args.add("inputField", "string_field"); + args.add("inputField", "body_field"); + args.add("outputField", "enriched_field"); + args.add("prompt", "Title: {string_field}."); + args.add("model", "model1"); + + DocumentEnrichmentUpdateProcessorFactory factory = + new DocumentEnrichmentUpdateProcessorFactory(); + + SolrException e = assertThrows(SolrException.class, () -> factory.init(args)); + assertEquals("prompt is missing placeholders for inputField(s): [body_field]", e.getMessage()); + } + + @Test + public void + init_promptHasExtraPlaceholderNotDeclaredAsInputField_shouldThrowExceptionWithDetailedMessage() { + NamedList args = new NamedList<>(); + args.add("inputField", "string_field"); + args.add("outputField", "enriched_field"); + args.add("prompt", "Title: {string_field}. Extra: {unknown_field}."); + args.add("model", "model1"); + + DocumentEnrichmentUpdateProcessorFactory factory = + new DocumentEnrichmentUpdateProcessorFactory(); + + SolrException e = assertThrows(SolrException.class, () -> factory.init(args)); + assertEquals( + "prompt contains placeholders not declared as inputField(s): [unknown_field]", + e.getMessage()); + } + + @Test + public void init_nullModel_shouldThrowExceptionWithDetailedMessage() { + NamedList args = new NamedList<>(); + args.add("inputField", "string_field"); + args.add("outputField", "enriched_field"); + args.add("prompt", "Summarize: {string_field}"); + + DocumentEnrichmentUpdateProcessorFactory factory = + new DocumentEnrichmentUpdateProcessorFactory(); + + SolrException e = assertThrows(SolrException.class, () -> factory.init(args)); + assertEquals("Missing required parameter: model", e.getMessage()); + } + + @Test + public void init_promptFile_shouldLoadPromptFromFile() { + DocumentEnrichmentUpdateProcessorFactory factory = + initializeUpdateProcessorFactory( + List.of("string_field"), "enriched_field", "prompt.txt", "model1"); + factory.inform(collection1); + + assertEquals("prompt.txt", factory.getPromptFile()); + assertNotNull(factory.getPrompt()); + assertTrue(factory.getPrompt().contains("{string_field}")); + } + + @Test + public void init_promptFileMultiField_shouldLoadAndValidateBothPlaceholders() { + DocumentEnrichmentUpdateProcessorFactory factory = + initializeUpdateProcessorFactory( + List.of("string_field", "body_field"), + "enriched_field", + "prompt-multi-field.txt", + "model1"); + factory.inform(collection1); + + assertNotNull(factory.getPrompt()); + assertTrue(factory.getPrompt().contains("{string_field}")); + assertTrue(factory.getPrompt().contains("{body_field}")); + } + + @Test + public void init_promptFileWithMissingPlaceholder_shouldThrowExceptionInInform() { + DocumentEnrichmentUpdateProcessorFactory factory = + initializeUpdateProcessorFactory( + List.of("string_field"), "enriched_field", "prompt-no-placeholder.txt", "model1"); + + SolrException e = assertThrows(SolrException.class, () -> factory.inform(collection1)); + assertEquals( + "prompt is missing placeholders for inputField(s): [string_field]", e.getMessage()); + } + + /* Following tests depend on a real solr schema and depend on BeforeClass-AfterClass methods */ + + @Test + public void init_notExistentOutputField_shouldThrowExceptionWithDetailedMessage() + throws Exception { + SolrException e = + assertThrows( + SolrException.class, + () -> + createUpdateProcessor( + List.of("string_field"), "notExistentOutput", null, collection1, "model1")); + assertEquals("undefined field: \"notExistentOutput\"", e.getMessage()); + restTestHarness.delete(LargeLanguageModelStore.REST_END_POINT + "/model1"); + } + + @Test + public void init_notExistentInputField_shouldThrowExceptionWithDetailedMessage() + throws Exception { + SolrException e = + assertThrows( + SolrException.class, + () -> + createUpdateProcessor( + List.of("notExistentInput"), "enriched_field", null, collection1, "model1")); + assertEquals("undefined field: \"notExistentInput\"", e.getMessage()); + restTestHarness.delete(LargeLanguageModelStore.REST_END_POINT + "/model1"); + } + + @Test + public void init_multipleInputFields_oneNotExistent_shouldThrowExceptionWithDetailedMessage() + throws Exception { + SolrException e = + assertThrows( + SolrException.class, + () -> + createUpdateProcessor( + List.of("string_field", "notExistentInput"), + "enriched_field_multi", + null, + collection1, + "model1")); + assertEquals("undefined field: \"notExistentInput\"", e.getMessage()); + restTestHarness.delete(LargeLanguageModelStore.REST_END_POINT + "/model1"); + } + + @Test + public void init_multivaluedStringOutputField_shouldNotThrowException() throws Exception { + UpdateRequestProcessor instance = + createUpdateProcessor( + List.of("string_field"), "enriched_field_multi", null, collection1, "model1"); + assertNotNull(instance); + restTestHarness.delete(LargeLanguageModelStore.REST_END_POINT + "/model1"); + } + + /* getJsonSchema tests for unsupported field types from the Solr documentation: + * - BinaryField: not supported + * - UUIDField, NestPathField, DenseVectorField: explicitly removed support for these fields since the Java classes + * extend some supported field types + * */ + + @Test + public void getJsonSchema_unsupportedFieldTypes_shouldThrowUnsupportedFieldTypeException() { + var cases = + Map.of( + "output_binary", "BinaryField", + "output_uuid", "UUIDField", + "output_nest_path", "NestPathField", + "vector", "DenseVectorField"); + var schema = collection1.getLatestSchema(); + cases.forEach( + (fieldName, expectedTypeName) -> { + var schemaField = schema.getField(fieldName); + SolrException e = + assertThrows( + SolrException.class, + () -> DocumentEnrichmentUpdateProcessorFactory.getJsonSchema(schemaField)); + assertEquals( + "field type is not supported by Document Enrichment: " + expectedTypeName, + e.getMessage()); + }); + } + + @Test + public void init_sortableTextOutputField_getJsonSchema_shouldProduceStringSchema() { + var schemaField = collection1.getLatestSchema().getField("output_sortable_text"); + var responseFormat = DocumentEnrichmentUpdateProcessorFactory.getJsonSchema(schemaField); + assertNotNull(responseFormat); + assertEquals(ResponseFormatType.JSON, responseFormat.type()); + assertNotNull(responseFormat.jsonSchema()); + } + + @Test + public void init_multivaluedStringOutputField_getJsonSchema_shouldProduceArraySchema() { + // verify the ResponseFormat is constructed correctly for the multivalued field + var schema = collection1.getLatestSchema(); + var schemaField = schema.getField("enriched_field_multi"); + assertTrue(schemaField.multiValued()); + var responseFormat = DocumentEnrichmentUpdateProcessorFactory.getJsonSchema(schemaField); + assertNotNull(responseFormat); + assertEquals(ResponseFormatType.JSON, responseFormat.type()); + assertNotNull(responseFormat.jsonSchema()); + } + + @Test + public void init_singleValuedStringOutputField_getJsonSchema_shouldProduceStringSchema() { + var schema = collection1.getLatestSchema(); + var schemaField = schema.getField("enriched_field"); + assertFalse(schemaField.multiValued()); + var responseFormat = DocumentEnrichmentUpdateProcessorFactory.getJsonSchema(schemaField); + assertNotNull(responseFormat); + assertEquals(ResponseFormatType.JSON, responseFormat.type()); + assertNotNull(responseFormat.jsonSchema()); + } + + @Test + public void init_dynamicInputField_shouldNotThrowException() throws Exception { + UpdateRequestProcessor instance = + createUpdateProcessor(List.of("text_s"), "enriched_field", null, collection1, "model1"); + assertNotNull(instance); + restTestHarness.delete(LargeLanguageModelStore.REST_END_POINT + "/model1"); + } + + @Test + public void init_multipleDynamicInputFields_shouldNotThrowException() throws Exception { + UpdateRequestProcessor instance = + createUpdateProcessor( + List.of("text_s", "body_field"), "enriched_field", null, collection1, "model1"); + assertNotNull(instance); + restTestHarness.delete(LargeLanguageModelStore.REST_END_POINT + "/model1"); + } + + private UpdateRequestProcessor createUpdateProcessor( + List inputFieldNames, + String outputFieldName, + String prompt, + SolrCore core, + String modelName) + throws Exception { + + LargeLanguageModelStore.getManagedModelStore(core) + .addModel(new SolrLargeLanguageModel(modelName, null, null)); + + DocumentEnrichmentUpdateProcessorFactory factory = + initializeUpdateProcessorFactory(inputFieldNames, outputFieldName, prompt, modelName); + + ModifiableSolrParams params = new ModifiableSolrParams(); + SolrQueryRequestBase req = new SolrQueryRequestBase(core, params) {}; + + return factory.getInstance(req, null, null); + } + + private DocumentEnrichmentUpdateProcessorFactory initializeUpdateProcessorFactory( + List inputFieldNames, String outputFieldName, String prompt, String modelName) { + NamedList args = new NamedList<>(); + + for (String fieldName : inputFieldNames) { + args.add("inputField", fieldName); + } + args.add("outputField", outputFieldName); + + if (prompt != null) { + args.add("promptFile", prompt); + } else { + args.add("prompt", "Summarize: {" + String.join("}. {", inputFieldNames) + "}."); + } + + args.add("model", modelName); + + DocumentEnrichmentUpdateProcessorFactory factory = + new DocumentEnrichmentUpdateProcessorFactory(); + factory.init(args); + return factory; + } +} diff --git a/solr/modules/language-models/src/test/org/apache/solr/languagemodels/update/processor/factory/TextToVectorUpdateProcessorFactoryTest.java b/solr/modules/language-models/src/test/org/apache/solr/languagemodels/update/processor/factory/TextToVectorUpdateProcessorFactoryTest.java index 02617c646364..4c511aa9cc82 100644 --- a/solr/modules/language-models/src/test/org/apache/solr/languagemodels/update/processor/factory/TextToVectorUpdateProcessorFactoryTest.java +++ b/solr/modules/language-models/src/test/org/apache/solr/languagemodels/update/processor/factory/TextToVectorUpdateProcessorFactoryTest.java @@ -22,7 +22,7 @@ import org.apache.solr.core.SolrCore; import org.apache.solr.languagemodels.TestLanguageModelBase; import org.apache.solr.languagemodels.model.SolrTextToVectorModel; -import org.apache.solr.languagemodels.store.rest.ManagedTextToVectorModelStore; +import org.apache.solr.languagemodels.store.rest.TextToVectorModelStore; import org.apache.solr.request.SolrQueryRequestBase; import org.apache.solr.update.processor.UpdateRequestProcessor; import org.junit.After; @@ -187,7 +187,7 @@ private UpdateRequestProcessor createUpdateProcessor( String inputFieldName, String outputFieldName, SolrCore collection1, String modelName) { NamedList args = new NamedList<>(); - ManagedTextToVectorModelStore.getManagedModelStore(collection1) + TextToVectorModelStore.getManagedModelStore(collection1) .addModel(new SolrTextToVectorModel(modelName, null, null)); args.add("inputField", inputFieldName); args.add("outputField", outputFieldName); diff --git a/solr/solr-ref-guide/modules/configuration-guide/pages/update-request-processors.adoc b/solr/solr-ref-guide/modules/configuration-guide/pages/update-request-processors.adoc index e18632462674..84a65feac0d3 100644 --- a/solr/solr-ref-guide/modules/configuration-guide/pages/update-request-processors.adoc +++ b/solr/solr-ref-guide/modules/configuration-guide/pages/update-request-processors.adoc @@ -427,6 +427,10 @@ The {solr-javadocs}/modules/language-models/index.html[`language-models`] module It uses external text to vectors language models to perform the vectorisation for each processed document. For more information: xref:query-guide:text-to-vector.adoc[Update Request Processor] +{solr-javadocs}/modules/language-models/org/apache/solr/languagemodels/update/processor/factory/DocumentEnrichmentUpdateProcessorFactory.html[DocumentEnrichmentUpdateProcessorFactory]:: Update processor that takes one or more fields and a given prompt in input and returns the output of an LLM as the value of a new field. +It uses external Large Language Model services to perform the enrichment of each processed document. +For more information: xref:indexing-guide:document-enrichment-with-llms.adoc[Document Enrichment documentation] + The {solr-javadocs}/modules/langid/index.html[`langid`] module provides:: {solr-javadocs}/modules/langid/org/apache/solr/update/processor/LangDetectLanguageIdentifierUpdateProcessorFactory.html[LangDetectLanguageIdentifierUpdateProcessorFactory]::: Identifies the language of a set of input fields using http://code.google.com/p/language-detection. diff --git a/solr/solr-ref-guide/modules/indexing-guide/indexing-nav.adoc b/solr/solr-ref-guide/modules/indexing-guide/indexing-nav.adoc index 9b50849716c3..940225e8d4ef 100644 --- a/solr/solr-ref-guide/modules/indexing-guide/indexing-nav.adoc +++ b/solr/solr-ref-guide/modules/indexing-guide/indexing-nav.adoc @@ -58,5 +58,6 @@ ** xref:partial-document-updates.adoc[] ** xref:reindexing.adoc[] ** xref:language-detection.adoc[] +** xref:document-enrichment-with-llms.adoc[] ** xref:de-duplication.adoc[] ** xref:content-streams.adoc[] diff --git a/solr/solr-ref-guide/modules/indexing-guide/pages/document-enrichment-with-llms.adoc b/solr/solr-ref-guide/modules/indexing-guide/pages/document-enrichment-with-llms.adoc new file mode 100644 index 000000000000..600268187aee --- /dev/null +++ b/solr/solr-ref-guide/modules/indexing-guide/pages/document-enrichment-with-llms.adoc @@ -0,0 +1,534 @@ += Document Enrichment with LLMs +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +This module brings the power of *Large Language Models* to Solr. + +More specifically, it enables calling an LLM at indexing time to enrich documents with additional/generated/extracted +data. Given a prompt and a set of input fields, for each document, the LLM is invoked through +https://github.com/langchain4j/langchain4j[LangChain4j], and the result is stored in an output field, which can support +multiple types and may also be multivalued. + +_Without_ this module, the LLM calls to enrich documents must be done _outside_ Solr, before indexing. + +[IMPORTANT] +==== +This module sends your documents off to some hosted service on the internet. +There are cost, privacy, performance, and service availability implications on such a strong dependency that should be +diligently examined before employing this module in a serious way. + +==== + +At the moment, Solr supports a subset of the LLM providers available in LangChain4j. + +*Disclaimer*: Apache Solr is *in no way* affiliated to any of these corporations or services. + +If you want to add support for additional services or improve the support for the existing ones, feel free to +contribute: + +* https://github.com/apache/solr/blob/main/CONTRIBUTING.md[Contributing to Solr] + +== Module + +This is provided via the `language-models` xref:configuration-guide:solr-modules.adoc[Solr Module] that needs to be +enabled before use. + +== Language Model Configuration + +Language Models is a module and therefore its plugins must be configured in `solrconfig.xml`. + +=== Minimum Requirements + +* Enable the `language-models` module to make the Language Models classes available on Solr's classpath. +See xref:configuration-guide:solr-modules.adoc[Solr Module] for more details. + +* An {solr-javadocs}/core/org/apache/solr/update/processor/UpdateRequestProcessorChain.html[UpdateRequestProcessorChain] +that includes at least one `DocumentEnrichmentUpdateProcessor` update processor. + +=== Update Processor Chain Design + +To properly design the Update Processor Chain for Document Enrichment, several parameters must be defined: + +`inputField`:: ++ +[%autowidth,frame=none] +|=== +s|Required |Default: none +|=== ++ +The field whose content is passed to the LLM to enrich the documents. Every `inputField` declared must be referred to in +the prompt. + ++ +Multiple `inputField` are supported and can be defined by using one of the following notations: + +* Add more than one `inputField` string element ++ +[source,xml] +---- + + + title + body + summary + Summarize with the following information. Title: {title}. Body: {body}. + model-name + + + +---- + +* Substitute the `inputField` string element with an array of string elements with the same name ++ +[source,xml] +---- + + title + body + +---- + + +`outputField`:: ++ +[%autowidth,frame=none] +|=== +s|Required |Default: none +|=== ++ +The LLM response is mapped to the specified `outputField`, and only one field is supported as output. Note that this +module only supports a subset of Solr's available field types, which includes: + +* *String/Text*: `StrField`, `TextField`, `SortableTextField` +* *Date*: `DatePointField` (the LLM must return an ISO-8601 date string; it might be useful to tune your prompt accordingly, to avoid indexing errors) +* *Numeric*: `IntPointField`, `LongPointField`, `FloatPointField`, `DoublePointField` +* *Boolean*: `BoolField` + + +These fields _can_ be multivalued. Solr uses structured output from LangChain4j to deal with LLMs' responses. + + +`prompt` or `promptFile`:: ++ +[%autowidth,frame=none] +|=== +s|Exactly one of these parameters is required |Default: none +|=== ++ +Two different ways to define a prompt are available: one directly in the solrconfig and one through a dedicated file. +Either way, the content of the prompt _must_ contain a special token for each `inputField` declared, that are the +`fieldName` surrounded by curly brackets (e.g., `{string_field}`, in the example below). Solr will throw an error if +the parameters are not properly defined. ++ +These parameters can be defined in one of the following ways: + +* Update processor definition with the `prompt` parameter ++ +[source,xml] +---- + + + string_field + summary + Summarize this content: {string_field} + model-name + + + +---- + +* Update processor definition with the `promptFile` parameter: in this case, the file `prompt.txt` must be uploaded to +Solr inside the config folder of the collection (e.g., similarly to `solrconfig.xml`, `synonyms.txt`, etc.) ++ +[source,xml] +---- + + + string_field + summary + prompt.txt + model-name + + + +---- + +`model`:: ++ +[%autowidth,frame=none] +|=== +s|Required |Default: none +|=== ++ + +The name of the model that will be uploaded via REST. See xref:document-enrichment-with-llms.adoc#general-purpose-llm-setup[] for +more information. + + +For more details on how to work with update request processors in Apache Solr, please refer to the dedicated page: +xref:configuration-guide:update-request-processors.adoc[Update Request Processor] + +[IMPORTANT] +==== +This update processor sends your document field content off to some hosted service on the internet. +There are serious performance implications that should be diligently examined before employing this component in production. +It will slow down substantially your indexing pipeline so make sure to stress test your solution before going live. + +==== + +[NOTE] +==== +If any `inputField` value is absent or empty for a given document, enrichment is silently skipped for that document: +the `outputField` is not added and the document is indexed as-is. + +If the LLM call fails at runtime (e.g., network error, model timeout), the exception is caught and logged but is +*non-fatal*: the document is still indexed without the `outputField`. +Monitor your indexing logs to detect documents that were not enriched as expected. +==== + +== General Purpose LLM Setup + +=== Models + +* A model is a https://docs.langchain4j.dev/category/language-models/[Langchain4j ChatModel] that generates a response given a prompt. +* A model is a reference to an external API that runs the Large Language Model. + +[IMPORTANT] +==== +The Solr model specifies the parameters to access the APIs, the LLM doesn't run internally in Solr. + +==== + +A model is described by these parameters: + + +`class`:: ++ +[%autowidth,frame=none] +|=== +s|Required |Default: none +|=== ++ +The model https://github.com/langchain4j/langchain4j[LangChain4j] implementation. +Accepted values: + +* `dev.langchain4j.model.ollama.OllamaChatModel` +* `dev.langchain4j.model.mistralai.MistralAiChatModel` +* `dev.langchain4j.model.anthropic.AnthropicChatModel` +* `dev.langchain4j.model.openai.OpenAiChatModel` +* `dev.langchain4j.model.googleai.GoogleAiGeminiChatModel` + +`name`:: ++ +[%autowidth,frame=none] +|=== +s|Required |Default: none +|=== ++ +The identifier of your model, this is used by any component that intends to use the model +(e.g., `DocumentEnrichmentUpdateProcessorFactory` update processor). + +`params`:: ++ +[%autowidth,frame=none] +|=== +|Optional |Default: none +|=== ++ +Each model class has potentially different params. +Many are shared but for the full set of parameters of the model you are interested in please refer to the official +documentation of the LangChain4j version included in Solr: +https://docs.langchain4j.dev/category/language-models[Chat Models in LangChain4j]. + +=== Supported Models +Apache Solr uses https://github.com/langchain4j/langchain4j[LangChain4j] to support document enrichment with LLMs. +The models currently supported are: + +[tabs#supported-chat-models] +====== +Ollama:: ++ +==== + +[source,json] +---- +{ + "class": "dev.langchain4j.model.ollama.OllamaChatModel", + "name": "", + "params": { + "baseUrl": "http://localhost:11434", + "modelName": "", + "timeout": 300, + "logRequests": true, + "logResponses": true, + "maxRetries": 5 + } +} +---- +==== + +MistralAI:: ++ +==== +[source,json] +---- +{ + "class": "dev.langchain4j.model.mistralai.MistralAiChatModel", + "name": "", + "params": { + "baseUrl": "https://api.mistral.ai/v1", + "apiKey": "", + "modelName": "", + "timeout": 60, + "logRequests": true, + "logResponses": true, + "maxRetries": 5 + } +} +---- +==== +OpenAI:: ++ +==== +[source,json] +---- +{ + "class": "dev.langchain4j.model.openai.OpenAiChatModel", + "name": "", + "params": { + "baseUrl": "https://api.openai.com/v1", + "apiKey": "", + "modelName": "", + "timeout": 60, + "logRequests": true, + "logResponses": true, + "maxRetries": 5 + } +} +---- +==== + +Anthropic:: ++ +==== +[source,json] +---- +{ + "class": "dev.langchain4j.model.anthropic.AnthropicChatModel", + "name": "", + "params": { + "baseUrl": "https://api.anthropic.com/v1/", + "apiKey": "", + "modelName": "", + "timeout": 60, + "logRequests": true, + "logResponses": true, + "maxRetries": 5 + } +} +---- +==== + +Gemini:: ++ +==== +[source,json] +---- +{ + "class": "dev.langchain4j.model.googleai.GoogleAiGeminiChatModel", + "name": "", + "params": { + "baseUrl": "https://generativelanguage.googleapis.com/v1beta/", + "apiKey": "", + "modelName": "", + "timeout": 60, + "logRequests": true, + "logResponses": true, + "maxRetries": 5 + } +} +---- +==== +====== + +=== Uploading a Model + +To upload the model in a `/path/myModel.json` file, please run: + +[source,bash] +---- +curl -XPUT 'http://localhost:8983/solr/YOUR_COLLECTION/schema/large-language-model-store' --data-binary "@/path/myModel.json" -H 'Content-type:application/json' +---- + +To delete the `currentModel` model: + +[source,bash] +---- +curl -XDELETE 'http://localhost:8983/solr/YOUR_COLLECTION/schema/large-language-model-store/currentModel' +---- + +To view all models: + +[source,text] +http://localhost:8983/solr/YOUR_COLLECTION/schema/large-language-model-store + + +.Example: /path/myOpenAIModel.json +[source,json] +---- +{ + "class": "dev.langchain4j.model.openai.OpenAiChatModel", + "name": "openai-1", + "params": { + "baseUrl": "https://api.openai.com/v1", + "apiKey": "apiKey-openAI", + "modelName": "gpt-5.4-nano", + "timeout": 60, + "logRequests": true, + "logResponses": true, + "maxRetries": 5 + } +} +---- + + +== Index First and Enrich your Documents on a Second Pass +LLM calls are typically slow, so depending on your use case, it may be preferable to first index your documents and +enrich them with LLM-generated fields at a later stage. + +This can be done in Solr defining two update request processors chains: one that includes all the processors you need, +excluding the `DocumentEnrichmentUpdateProcessor` (let's call it 'no-enrichment') and one that includes the +`DocumentEnrichmentUpdateProcessor` (let's call it 'enrichment'). + +[source,xml] +---- + + + ... + + ... + + ... + + + +---- + +[source,xml] +---- + + + ... + + ... + + ... + + + string_field + summary + Summarize this content: {string_field} + model-name + + + +---- + +You would index your documents first using the 'no-enrichment' and when finished, incrementally repeat the indexing +targeting the 'enrichment' chain. + +[IMPORTANT] +==== +This implies you need to send the documents you want to index to Solr twice and re-run any other update request +processor you need, in the second chain. This has data traffic implications (you transfer your documents over the +network twice) and processing implications (if you have other update request processors in your chain, those must be +repeated the second time as we are literally replacing the indexed documents one by one). +==== + +If your use case is compatible with xref:indexing-guide:partial-document-updates.adoc[Partial Updates], you can do better: + +You still define two chains, but this time the 'enrichment' one only includes the 'DocumentEnrichmentUpdateProcessor' +(and the xref:configuration-guide:update-request-processors.adoc[Mandatory Processors]) + +[source,xml] +---- + + + ... + + ... + + ... + + + +---- + +[source,xml] +---- + + + + string_field + summary + Summarize this content: {string_field} + model-name + + + +---- + +[NOTE] +==== +Since partial updates are resolved by `DistributedUpdateProcessorFactory`, be sure to place +`DocumentEnrichmentUpdateProcessorFactory` afterwards so that it sees normal/complete documents. +==== + +Add to your schema a simple field that will be useful to track the enrichment process and use atomic updates: + +[source,xml] +---- + + +---- + +In the first pass just index your documents using your reliable and fast 'no-enrichment' chain. + +On the second pass, re-index all your documents using atomic updates and targeting the 'enrichment' chain: + +[source,json] +---- +{ + "id":"mydoc", + "enriched": { + "set": true + } +} +---- + +What will happen is that internally Solr fetches the stored content of the docs to update, all the existing fields are +retrieved and a re-indexing happens, targeting the 'enrichment' chain that will add the LLM-generated fields and set the +boolean `enriched` field to `true`. + +Faceting or querying on the boolean `enriched` field can also give you a quick idea on how many documents have been +enriched with the new generated fields. + +[NOTE] +==== +To gain information about several ways to target a different `updateRequestProcessorChain` from the default one, see the +section related to xref:configuration-guide:update-request-processors.adoc#using-custom-chains[Using Custom Chains]. +==== diff --git a/solr/solr-ref-guide/modules/query-guide/pages/text-to-vector.adoc b/solr/solr-ref-guide/modules/query-guide/pages/text-to-vector.adoc index 7e3843230cf7..5e04ac000859 100644 --- a/solr/solr-ref-guide/modules/query-guide/pages/text-to-vector.adoc +++ b/solr/solr-ref-guide/modules/query-guide/pages/text-to-vector.adoc @@ -292,8 +292,6 @@ http://localhost:8983/solr/techproducts/schema/text-to-vector-model-store ---- -Documentation Indexing time - === Enriching documents with vectors at indexing time To vectorise textual fields of your documents at indexing time you need to configure an {solr-javadocs}/core/org/apache/solr/update/processor/UpdateRequestProcessorChain.html[Update Request Processor Chain] that includes at least one `TextToVectorUpdateProcessor` update request processor (you can include more than one, if you want to vectorise multiple fields):