mirror of
https://github.com/amithkoujalgi/ollama4j.git
synced 2025-09-16 03:39:05 +02:00
Refactor OllamaAPI for improved async handling and response streaming
Updated OllamaAPI to support separate thinking and response stream handlers, enhancing the asynchronous generation of responses. Adjusted related models and observers to accommodate new streaming logic. Improved the handling of response data in OllamaResult and OllamaGenerateResponseModel, adding new properties for better tracking of response metrics. Refined integration tests to reflect changes in method signatures and ensure proper logging of streamed responses.
This commit is contained in:
parent
5f5fa8ecae
commit
c754bd11da
@ -52,7 +52,7 @@ import java.util.stream.Collectors;
|
|||||||
/**
|
/**
|
||||||
* The base Ollama API class.
|
* The base Ollama API class.
|
||||||
*/
|
*/
|
||||||
@SuppressWarnings({"DuplicatedCode", "resource"})
|
@SuppressWarnings({ "DuplicatedCode", "resource" })
|
||||||
public class OllamaAPI {
|
public class OllamaAPI {
|
||||||
|
|
||||||
private static final Logger logger = LoggerFactory.getLogger(OllamaAPI.class);
|
private static final Logger logger = LoggerFactory.getLogger(OllamaAPI.class);
|
||||||
@ -101,7 +101,7 @@ public class OllamaAPI {
|
|||||||
* Default is 0 (no retries).
|
* Default is 0 (no retries).
|
||||||
*/
|
*/
|
||||||
@Setter
|
@Setter
|
||||||
@SuppressWarnings({"FieldMayBeFinal", "FieldCanBeLocal"})
|
@SuppressWarnings({ "FieldMayBeFinal", "FieldCanBeLocal" })
|
||||||
private int numberOfRetriesForModelPull = 0;
|
private int numberOfRetriesForModelPull = 0;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -244,7 +244,7 @@ public class OllamaAPI {
|
|||||||
* tags, tag count, and the time when model was updated.
|
* tags, tag count, and the time when model was updated.
|
||||||
*
|
*
|
||||||
* @return A list of {@link LibraryModel} objects representing the models
|
* @return A list of {@link LibraryModel} objects representing the models
|
||||||
* available in the Ollama library.
|
* available in the Ollama library.
|
||||||
* @throws OllamaBaseException If the HTTP request fails or the response is not
|
* @throws OllamaBaseException If the HTTP request fails or the response is not
|
||||||
* successful (non-200 status code).
|
* successful (non-200 status code).
|
||||||
* @throws IOException If an I/O error occurs during the HTTP request
|
* @throws IOException If an I/O error occurs during the HTTP request
|
||||||
@ -312,7 +312,7 @@ public class OllamaAPI {
|
|||||||
* of the library model
|
* of the library model
|
||||||
* for which the tags need to be fetched.
|
* for which the tags need to be fetched.
|
||||||
* @return a list of {@link LibraryModelTag} objects containing the extracted
|
* @return a list of {@link LibraryModelTag} objects containing the extracted
|
||||||
* tags and their associated metadata.
|
* tags and their associated metadata.
|
||||||
* @throws OllamaBaseException if the HTTP response status code indicates an
|
* @throws OllamaBaseException if the HTTP response status code indicates an
|
||||||
* error (i.e., not 200 OK),
|
* error (i.e., not 200 OK),
|
||||||
* or if there is any other issue during the
|
* or if there is any other issue during the
|
||||||
@ -371,9 +371,12 @@ public class OllamaAPI {
|
|||||||
/**
|
/**
|
||||||
* Finds a specific model using model name and tag from Ollama library.
|
* Finds a specific model using model name and tag from Ollama library.
|
||||||
* <p>
|
* <p>
|
||||||
* <b>Deprecated:</b> This method relies on the HTML structure of the Ollama website,
|
* <b>Deprecated:</b> This method relies on the HTML structure of the Ollama
|
||||||
* which is subject to change at any time. As a result, it is difficult to keep this API
|
* website,
|
||||||
* method consistently updated and reliable. Therefore, this method is deprecated and
|
* which is subject to change at any time. As a result, it is difficult to keep
|
||||||
|
* this API
|
||||||
|
* method consistently updated and reliable. Therefore, this method is
|
||||||
|
* deprecated and
|
||||||
* may be removed in future releases.
|
* may be removed in future releases.
|
||||||
* <p>
|
* <p>
|
||||||
* This method retrieves the model from the Ollama library by its name, then
|
* This method retrieves the model from the Ollama library by its name, then
|
||||||
@ -386,14 +389,16 @@ public class OllamaAPI {
|
|||||||
* @param modelName The name of the model to search for in the library.
|
* @param modelName The name of the model to search for in the library.
|
||||||
* @param tag The tag name to search for within the specified model.
|
* @param tag The tag name to search for within the specified model.
|
||||||
* @return The {@link LibraryModelTag} associated with the specified model and
|
* @return The {@link LibraryModelTag} associated with the specified model and
|
||||||
* tag.
|
* tag.
|
||||||
* @throws OllamaBaseException If there is a problem with the Ollama library
|
* @throws OllamaBaseException If there is a problem with the Ollama library
|
||||||
* operations.
|
* operations.
|
||||||
* @throws IOException If an I/O error occurs during the operation.
|
* @throws IOException If an I/O error occurs during the operation.
|
||||||
* @throws URISyntaxException If there is an error with the URI syntax.
|
* @throws URISyntaxException If there is an error with the URI syntax.
|
||||||
* @throws InterruptedException If the operation is interrupted.
|
* @throws InterruptedException If the operation is interrupted.
|
||||||
* @throws NoSuchElementException If the model or the tag is not found.
|
* @throws NoSuchElementException If the model or the tag is not found.
|
||||||
* @deprecated This method relies on the HTML structure of the Ollama website, which can change at any time and break this API. It is deprecated and may be removed in the future.
|
* @deprecated This method relies on the HTML structure of the Ollama website,
|
||||||
|
* which can change at any time and break this API. It is deprecated
|
||||||
|
* and may be removed in the future.
|
||||||
*/
|
*/
|
||||||
@Deprecated
|
@Deprecated
|
||||||
public LibraryModelTag findModelTagFromLibrary(String modelName, String tag)
|
public LibraryModelTag findModelTagFromLibrary(String modelName, String tag)
|
||||||
@ -453,12 +458,13 @@ public class OllamaAPI {
|
|||||||
/**
|
/**
|
||||||
* Handles retry backoff for pullModel.
|
* Handles retry backoff for pullModel.
|
||||||
*/
|
*/
|
||||||
private void handlePullRetry(String modelName, int currentRetry, int maxRetries, long baseDelayMillis) throws InterruptedException {
|
private void handlePullRetry(String modelName, int currentRetry, int maxRetries, long baseDelayMillis)
|
||||||
|
throws InterruptedException {
|
||||||
int attempt = currentRetry + 1;
|
int attempt = currentRetry + 1;
|
||||||
if (attempt < maxRetries) {
|
if (attempt < maxRetries) {
|
||||||
long backoffMillis = baseDelayMillis * (1L << currentRetry);
|
long backoffMillis = baseDelayMillis * (1L << currentRetry);
|
||||||
logger.error("Failed to pull model {}, retrying in {}s... (attempt {}/{})",
|
logger.error("Failed to pull model {}, retrying in {}s... (attempt {}/{})",
|
||||||
modelName, backoffMillis/1000, attempt, maxRetries);
|
modelName, backoffMillis / 1000, attempt, maxRetries);
|
||||||
try {
|
try {
|
||||||
Thread.sleep(backoffMillis);
|
Thread.sleep(backoffMillis);
|
||||||
} catch (InterruptedException ie) {
|
} catch (InterruptedException ie) {
|
||||||
@ -470,7 +476,6 @@ public class OllamaAPI {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
private void doPullModel(String modelName)
|
private void doPullModel(String modelName)
|
||||||
throws OllamaBaseException, IOException, URISyntaxException, InterruptedException {
|
throws OllamaBaseException, IOException, URISyntaxException, InterruptedException {
|
||||||
String url = this.host + "/api/pull";
|
String url = this.host + "/api/pull";
|
||||||
@ -825,36 +830,74 @@ public class OllamaAPI {
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* Generate response for a question to a model running on Ollama server. This is
|
* Generate response for a question to a model running on Ollama server. This is
|
||||||
* a sync/blocking
|
* a sync/blocking call. This API does not support "thinking" models.
|
||||||
* call.
|
|
||||||
*
|
*
|
||||||
* @param model the ollama model to ask the question to
|
* @param model the ollama model to ask the question to
|
||||||
* @param prompt the prompt/question text
|
* @param prompt the prompt/question text
|
||||||
* @param raw if true no formatting will be applied to the prompt. You
|
* @param raw if true no formatting will be applied to the
|
||||||
* may choose to use the raw parameter if you are
|
* prompt. You
|
||||||
* specifying a full templated prompt in your request to
|
* may choose to use the raw parameter if you are
|
||||||
* the API
|
* specifying a full templated prompt in your
|
||||||
* @param think if true the model will "think" step-by-step before
|
* request to
|
||||||
* generating the final response
|
* the API
|
||||||
* @param options the Options object - <a
|
* @param options the Options object - <a
|
||||||
* href=
|
* href=
|
||||||
* "https://github.com/jmorganca/ollama/blob/main/docs/modelfile.md#valid-parameters-and-values">More
|
* "https://github.com/jmorganca/ollama/blob/main/docs/modelfile.md#valid-parameters-and-values">More
|
||||||
* details on the options</a>
|
* details on the options</a>
|
||||||
* @param streamHandler optional callback consumer that will be applied every
|
* @param responseStreamHandler optional callback consumer that will be applied
|
||||||
* time a streamed response is received. If not set, the
|
* every
|
||||||
* stream parameter of the request is set to false.
|
* time a streamed response is received. If not
|
||||||
|
* set, the
|
||||||
|
* stream parameter of the request is set to false.
|
||||||
* @return OllamaResult that includes response text and time taken for response
|
* @return OllamaResult that includes response text and time taken for response
|
||||||
* @throws OllamaBaseException if the response indicates an error status
|
* @throws OllamaBaseException if the response indicates an error status
|
||||||
* @throws IOException if an I/O error occurs during the HTTP request
|
* @throws IOException if an I/O error occurs during the HTTP request
|
||||||
* @throws InterruptedException if the operation is interrupted
|
* @throws InterruptedException if the operation is interrupted
|
||||||
*/
|
*/
|
||||||
public OllamaResult generate(String model, String prompt, boolean raw, boolean think, Options options,
|
public OllamaResult generate(String model, String prompt, boolean raw, Options options,
|
||||||
OllamaStreamHandler streamHandler) throws OllamaBaseException, IOException, InterruptedException {
|
OllamaStreamHandler responseStreamHandler) throws OllamaBaseException, IOException, InterruptedException {
|
||||||
OllamaGenerateRequest ollamaRequestModel = new OllamaGenerateRequest(model, prompt);
|
OllamaGenerateRequest ollamaRequestModel = new OllamaGenerateRequest(model, prompt);
|
||||||
ollamaRequestModel.setRaw(raw);
|
ollamaRequestModel.setRaw(raw);
|
||||||
ollamaRequestModel.setThink(think);
|
ollamaRequestModel.setThink(false);
|
||||||
ollamaRequestModel.setOptions(options.getOptionsMap());
|
ollamaRequestModel.setOptions(options.getOptionsMap());
|
||||||
return generateSyncForOllamaRequestModel(ollamaRequestModel, streamHandler);
|
return generateSyncForOllamaRequestModel(ollamaRequestModel, null, responseStreamHandler);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Generate thinking and response tokens for a question to a thinking model
|
||||||
|
* running on Ollama server. This is
|
||||||
|
* a sync/blocking call.
|
||||||
|
*
|
||||||
|
* @param model the ollama model to ask the question to
|
||||||
|
* @param prompt the prompt/question text
|
||||||
|
* @param raw if true no formatting will be applied to the
|
||||||
|
* prompt. You
|
||||||
|
* may choose to use the raw parameter if you are
|
||||||
|
* specifying a full templated prompt in your
|
||||||
|
* request to
|
||||||
|
* the API
|
||||||
|
* @param options the Options object - <a
|
||||||
|
* href=
|
||||||
|
* "https://github.com/jmorganca/ollama/blob/main/docs/modelfile.md#valid-parameters-and-values">More
|
||||||
|
* details on the options</a>
|
||||||
|
* @param responseStreamHandler optional callback consumer that will be applied
|
||||||
|
* every
|
||||||
|
* time a streamed response is received. If not
|
||||||
|
* set, the
|
||||||
|
* stream parameter of the request is set to false.
|
||||||
|
* @return OllamaResult that includes response text and time taken for response
|
||||||
|
* @throws OllamaBaseException if the response indicates an error status
|
||||||
|
* @throws IOException if an I/O error occurs during the HTTP request
|
||||||
|
* @throws InterruptedException if the operation is interrupted
|
||||||
|
*/
|
||||||
|
public OllamaResult generate(String model, String prompt, boolean raw, Options options,
|
||||||
|
OllamaStreamHandler thinkingStreamHandler, OllamaStreamHandler responseStreamHandler)
|
||||||
|
throws OllamaBaseException, IOException, InterruptedException {
|
||||||
|
OllamaGenerateRequest ollamaRequestModel = new OllamaGenerateRequest(model, prompt);
|
||||||
|
ollamaRequestModel.setRaw(raw);
|
||||||
|
ollamaRequestModel.setThink(true);
|
||||||
|
ollamaRequestModel.setOptions(options.getOptionsMap());
|
||||||
|
return generateSyncForOllamaRequestModel(ollamaRequestModel, thinkingStreamHandler, responseStreamHandler);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -862,7 +905,7 @@ public class OllamaAPI {
|
|||||||
* mode).
|
* mode).
|
||||||
* <p>
|
* <p>
|
||||||
* Uses
|
* Uses
|
||||||
* {@link #generate(String, String, boolean, boolean, Options, OllamaStreamHandler)}
|
* {@link #generate(String, String, boolean, Options, OllamaStreamHandler)}
|
||||||
*
|
*
|
||||||
* @param model The name or identifier of the AI model to use for generating
|
* @param model The name or identifier of the AI model to use for generating
|
||||||
* the response.
|
* the response.
|
||||||
@ -871,10 +914,10 @@ public class OllamaAPI {
|
|||||||
* and provide a full prompt. In this case, you can use the raw
|
* and provide a full prompt. In this case, you can use the raw
|
||||||
* parameter to disable templating. Also note that raw mode will
|
* parameter to disable templating. Also note that raw mode will
|
||||||
* not return a context.
|
* not return a context.
|
||||||
* @param think If set to true, the model will "think" step-by-step before
|
|
||||||
* generating the final response.
|
|
||||||
* @param options Additional options or configurations to use when generating
|
* @param options Additional options or configurations to use when generating
|
||||||
* the response.
|
* the response.
|
||||||
|
* @param think if true the model will "think" step-by-step before
|
||||||
|
* generating the final response
|
||||||
* @return {@link OllamaResult}
|
* @return {@link OllamaResult}
|
||||||
* @throws OllamaBaseException if the response indicates an error status
|
* @throws OllamaBaseException if the response indicates an error status
|
||||||
* @throws IOException if an I/O error occurs during the HTTP request
|
* @throws IOException if an I/O error occurs during the HTTP request
|
||||||
@ -882,7 +925,11 @@ public class OllamaAPI {
|
|||||||
*/
|
*/
|
||||||
public OllamaResult generate(String model, String prompt, boolean raw, boolean think, Options options)
|
public OllamaResult generate(String model, String prompt, boolean raw, boolean think, Options options)
|
||||||
throws OllamaBaseException, IOException, InterruptedException {
|
throws OllamaBaseException, IOException, InterruptedException {
|
||||||
return generate(model, prompt, raw, think, options, null);
|
if (think) {
|
||||||
|
return generate(model, prompt, raw, options, null, null);
|
||||||
|
} else {
|
||||||
|
return generate(model, prompt, raw, options, null);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -896,7 +943,7 @@ public class OllamaAPI {
|
|||||||
* @param format A map containing the format specification for the structured
|
* @param format A map containing the format specification for the structured
|
||||||
* output.
|
* output.
|
||||||
* @return An instance of {@link OllamaResult} containing the structured
|
* @return An instance of {@link OllamaResult} containing the structured
|
||||||
* response.
|
* response.
|
||||||
* @throws OllamaBaseException if the response indicates an error status.
|
* @throws OllamaBaseException if the response indicates an error status.
|
||||||
* @throws IOException if an I/O error occurs during the HTTP request.
|
* @throws IOException if an I/O error occurs during the HTTP request.
|
||||||
* @throws InterruptedException if the operation is interrupted.
|
* @throws InterruptedException if the operation is interrupted.
|
||||||
@ -958,18 +1005,16 @@ public class OllamaAPI {
|
|||||||
* @param model The name or identifier of the AI model to use for generating
|
* @param model The name or identifier of the AI model to use for generating
|
||||||
* the response.
|
* the response.
|
||||||
* @param prompt The input text or prompt to provide to the AI model.
|
* @param prompt The input text or prompt to provide to the AI model.
|
||||||
* @param think If set to true, the model will "think" step-by-step before
|
|
||||||
* generating the final response.
|
|
||||||
* @param options Additional options or configurations to use when generating
|
* @param options Additional options or configurations to use when generating
|
||||||
* the response.
|
* the response.
|
||||||
* @return {@link OllamaToolsResult} An OllamaToolsResult object containing the
|
* @return {@link OllamaToolsResult} An OllamaToolsResult object containing the
|
||||||
* response from the AI model and the results of invoking the tools on
|
* response from the AI model and the results of invoking the tools on
|
||||||
* that output.
|
* that output.
|
||||||
* @throws OllamaBaseException if the response indicates an error status
|
* @throws OllamaBaseException if the response indicates an error status
|
||||||
* @throws IOException if an I/O error occurs during the HTTP request
|
* @throws IOException if an I/O error occurs during the HTTP request
|
||||||
* @throws InterruptedException if the operation is interrupted
|
* @throws InterruptedException if the operation is interrupted
|
||||||
*/
|
*/
|
||||||
public OllamaToolsResult generateWithTools(String model, String prompt, boolean think, Options options)
|
public OllamaToolsResult generateWithTools(String model, String prompt, Options options)
|
||||||
throws OllamaBaseException, IOException, InterruptedException, ToolInvocationException {
|
throws OllamaBaseException, IOException, InterruptedException, ToolInvocationException {
|
||||||
boolean raw = true;
|
boolean raw = true;
|
||||||
OllamaToolsResult toolResult = new OllamaToolsResult();
|
OllamaToolsResult toolResult = new OllamaToolsResult();
|
||||||
@ -984,7 +1029,7 @@ public class OllamaAPI {
|
|||||||
prompt = promptBuilder.build();
|
prompt = promptBuilder.build();
|
||||||
}
|
}
|
||||||
|
|
||||||
OllamaResult result = generate(model, prompt, raw, think, options, null);
|
OllamaResult result = generate(model, prompt, raw, options, null);
|
||||||
toolResult.setModelResult(result);
|
toolResult.setModelResult(result);
|
||||||
|
|
||||||
String toolsResponse = result.getResponse();
|
String toolsResponse = result.getResponse();
|
||||||
@ -1014,19 +1059,47 @@ public class OllamaAPI {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Generate response for a question to a model running on Ollama server and get
|
* Asynchronously generates a response for a prompt using a model running on the
|
||||||
* a callback handle
|
* Ollama server.
|
||||||
* that can be used to check for status and get the response from the model
|
* <p>
|
||||||
* later. This would be
|
* This method returns an {@link OllamaAsyncResultStreamer} handle that can be
|
||||||
* an async/non-blocking call.
|
* used to poll for
|
||||||
|
* status and retrieve streamed "thinking" and response tokens from the model.
|
||||||
|
* The call is non-blocking.
|
||||||
|
* </p>
|
||||||
*
|
*
|
||||||
* @param model the ollama model to ask the question to
|
* <p>
|
||||||
* @param prompt the prompt/question text
|
* <b>Example usage:</b>
|
||||||
* @return the ollama async result callback handle
|
* </p>
|
||||||
|
*
|
||||||
|
* <pre>{@code
|
||||||
|
* OllamaAsyncResultStreamer resultStreamer = ollamaAPI.generateAsync("gpt-oss:20b", "Who are you", false, true);
|
||||||
|
* int pollIntervalMilliseconds = 1000;
|
||||||
|
* while (true) {
|
||||||
|
* String thinkingTokens = resultStreamer.getThinkingResponseStream().poll();
|
||||||
|
* String responseTokens = resultStreamer.getResponseStream().poll();
|
||||||
|
* System.out.print(thinkingTokens != null ? thinkingTokens.toUpperCase() : "");
|
||||||
|
* System.out.print(responseTokens != null ? responseTokens.toLowerCase() : "");
|
||||||
|
* Thread.sleep(pollIntervalMilliseconds);
|
||||||
|
* if (!resultStreamer.isAlive())
|
||||||
|
* break;
|
||||||
|
* }
|
||||||
|
* System.out.println("Complete thinking response: " + resultStreamer.getCompleteThinkingResponse());
|
||||||
|
* System.out.println("Complete response: " + resultStreamer.getCompleteResponse());
|
||||||
|
* }</pre>
|
||||||
|
*
|
||||||
|
* @param model the Ollama model to use for generating the response
|
||||||
|
* @param prompt the prompt or question text to send to the model
|
||||||
|
* @param raw if {@code true}, returns the raw response from the model
|
||||||
|
* @param think if {@code true}, streams "thinking" tokens as well as response
|
||||||
|
* tokens
|
||||||
|
* @return an {@link OllamaAsyncResultStreamer} handle for polling and
|
||||||
|
* retrieving streamed results
|
||||||
*/
|
*/
|
||||||
public OllamaAsyncResultStreamer generateAsync(String model, String prompt, boolean raw) {
|
public OllamaAsyncResultStreamer generateAsync(String model, String prompt, boolean raw, boolean think) {
|
||||||
OllamaGenerateRequest ollamaRequestModel = new OllamaGenerateRequest(model, prompt);
|
OllamaGenerateRequest ollamaRequestModel = new OllamaGenerateRequest(model, prompt);
|
||||||
ollamaRequestModel.setRaw(raw);
|
ollamaRequestModel.setRaw(raw);
|
||||||
|
ollamaRequestModel.setThink(think);
|
||||||
URI uri = URI.create(this.host + "/api/generate");
|
URI uri = URI.create(this.host + "/api/generate");
|
||||||
OllamaAsyncResultStreamer ollamaAsyncResultStreamer = new OllamaAsyncResultStreamer(
|
OllamaAsyncResultStreamer ollamaAsyncResultStreamer = new OllamaAsyncResultStreamer(
|
||||||
getRequestBuilderDefault(uri), ollamaRequestModel, requestTimeoutSeconds);
|
getRequestBuilderDefault(uri), ollamaRequestModel, requestTimeoutSeconds);
|
||||||
@ -1055,14 +1128,14 @@ public class OllamaAPI {
|
|||||||
* @throws InterruptedException if the operation is interrupted
|
* @throws InterruptedException if the operation is interrupted
|
||||||
*/
|
*/
|
||||||
public OllamaResult generateWithImageFiles(String model, String prompt, List<File> imageFiles, Options options,
|
public OllamaResult generateWithImageFiles(String model, String prompt, List<File> imageFiles, Options options,
|
||||||
OllamaStreamHandler streamHandler) throws OllamaBaseException, IOException, InterruptedException {
|
OllamaStreamHandler streamHandler) throws OllamaBaseException, IOException, InterruptedException {
|
||||||
List<String> images = new ArrayList<>();
|
List<String> images = new ArrayList<>();
|
||||||
for (File imageFile : imageFiles) {
|
for (File imageFile : imageFiles) {
|
||||||
images.add(encodeFileToBase64(imageFile));
|
images.add(encodeFileToBase64(imageFile));
|
||||||
}
|
}
|
||||||
OllamaGenerateRequest ollamaRequestModel = new OllamaGenerateRequest(model, prompt, images);
|
OllamaGenerateRequest ollamaRequestModel = new OllamaGenerateRequest(model, prompt, images);
|
||||||
ollamaRequestModel.setOptions(options.getOptionsMap());
|
ollamaRequestModel.setOptions(options.getOptionsMap());
|
||||||
return generateSyncForOllamaRequestModel(ollamaRequestModel, streamHandler);
|
return generateSyncForOllamaRequestModel(ollamaRequestModel, null, streamHandler);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -1102,7 +1175,7 @@ public class OllamaAPI {
|
|||||||
* @throws URISyntaxException if the URI for the request is malformed
|
* @throws URISyntaxException if the URI for the request is malformed
|
||||||
*/
|
*/
|
||||||
public OllamaResult generateWithImageURLs(String model, String prompt, List<String> imageURLs, Options options,
|
public OllamaResult generateWithImageURLs(String model, String prompt, List<String> imageURLs, Options options,
|
||||||
OllamaStreamHandler streamHandler)
|
OllamaStreamHandler streamHandler)
|
||||||
throws OllamaBaseException, IOException, InterruptedException, URISyntaxException {
|
throws OllamaBaseException, IOException, InterruptedException, URISyntaxException {
|
||||||
List<String> images = new ArrayList<>();
|
List<String> images = new ArrayList<>();
|
||||||
for (String imageURL : imageURLs) {
|
for (String imageURL : imageURLs) {
|
||||||
@ -1110,7 +1183,7 @@ public class OllamaAPI {
|
|||||||
}
|
}
|
||||||
OllamaGenerateRequest ollamaRequestModel = new OllamaGenerateRequest(model, prompt, images);
|
OllamaGenerateRequest ollamaRequestModel = new OllamaGenerateRequest(model, prompt, images);
|
||||||
ollamaRequestModel.setOptions(options.getOptionsMap());
|
ollamaRequestModel.setOptions(options.getOptionsMap());
|
||||||
return generateSyncForOllamaRequestModel(ollamaRequestModel, streamHandler);
|
return generateSyncForOllamaRequestModel(ollamaRequestModel, null, streamHandler);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -1144,20 +1217,20 @@ public class OllamaAPI {
|
|||||||
* @param streamHandler optional callback that will be invoked with each
|
* @param streamHandler optional callback that will be invoked with each
|
||||||
* streamed response; if null, streaming is disabled
|
* streamed response; if null, streaming is disabled
|
||||||
* @return OllamaResult containing the response text and the time taken for the
|
* @return OllamaResult containing the response text and the time taken for the
|
||||||
* response
|
* response
|
||||||
* @throws OllamaBaseException if the response indicates an error status
|
* @throws OllamaBaseException if the response indicates an error status
|
||||||
* @throws IOException if an I/O error occurs during the HTTP request
|
* @throws IOException if an I/O error occurs during the HTTP request
|
||||||
* @throws InterruptedException if the operation is interrupted
|
* @throws InterruptedException if the operation is interrupted
|
||||||
*/
|
*/
|
||||||
public OllamaResult generateWithImages(String model, String prompt, List<byte[]> images, Options options,
|
public OllamaResult generateWithImages(String model, String prompt, List<byte[]> images, Options options,
|
||||||
OllamaStreamHandler streamHandler) throws OllamaBaseException, IOException, InterruptedException {
|
OllamaStreamHandler streamHandler) throws OllamaBaseException, IOException, InterruptedException {
|
||||||
List<String> encodedImages = new ArrayList<>();
|
List<String> encodedImages = new ArrayList<>();
|
||||||
for (byte[] image : images) {
|
for (byte[] image : images) {
|
||||||
encodedImages.add(encodeByteArrayToBase64(image));
|
encodedImages.add(encodeByteArrayToBase64(image));
|
||||||
}
|
}
|
||||||
OllamaGenerateRequest ollamaRequestModel = new OllamaGenerateRequest(model, prompt, encodedImages);
|
OllamaGenerateRequest ollamaRequestModel = new OllamaGenerateRequest(model, prompt, encodedImages);
|
||||||
ollamaRequestModel.setOptions(options.getOptionsMap());
|
ollamaRequestModel.setOptions(options.getOptionsMap());
|
||||||
return generateSyncForOllamaRequestModel(ollamaRequestModel, streamHandler);
|
return generateSyncForOllamaRequestModel(ollamaRequestModel, null, streamHandler);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -1184,7 +1257,7 @@ public class OllamaAPI {
|
|||||||
* @param model the ollama model to ask the question to
|
* @param model the ollama model to ask the question to
|
||||||
* @param messages chat history / message stack to send to the model
|
* @param messages chat history / message stack to send to the model
|
||||||
* @return {@link OllamaChatResult} containing the api response and the message
|
* @return {@link OllamaChatResult} containing the api response and the message
|
||||||
* history including the newly acquired assistant response.
|
* history including the newly acquired assistant response.
|
||||||
* @throws OllamaBaseException any response code than 200 has been returned
|
* @throws OllamaBaseException any response code than 200 has been returned
|
||||||
* @throws IOException in case the responseStream can not be read
|
* @throws IOException in case the responseStream can not be read
|
||||||
* @throws InterruptedException in case the server is not reachable or
|
* @throws InterruptedException in case the server is not reachable or
|
||||||
@ -1223,7 +1296,7 @@ public class OllamaAPI {
|
|||||||
*/
|
*/
|
||||||
public OllamaChatResult chat(OllamaChatRequest request)
|
public OllamaChatResult chat(OllamaChatRequest request)
|
||||||
throws OllamaBaseException, IOException, InterruptedException, ToolInvocationException {
|
throws OllamaBaseException, IOException, InterruptedException, ToolInvocationException {
|
||||||
return chat(request, null);
|
return chat(request, null, null);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -1232,10 +1305,11 @@ public class OllamaAPI {
|
|||||||
* <p>
|
* <p>
|
||||||
* Hint: the OllamaChatRequestModel#getStream() property is not implemented.
|
* Hint: the OllamaChatRequestModel#getStream() property is not implemented.
|
||||||
*
|
*
|
||||||
* @param request request object to be sent to the server
|
* @param request request object to be sent to the server
|
||||||
* @param streamHandler callback handler to handle the last message from stream
|
* @param responseStreamHandler callback handler to handle the last message from
|
||||||
* (caution: all previous tokens from stream will be
|
* stream
|
||||||
* concatenated)
|
* @param thinkingStreamHandler callback handler to handle the last thinking
|
||||||
|
* message from stream
|
||||||
* @return {@link OllamaChatResult}
|
* @return {@link OllamaChatResult}
|
||||||
* @throws OllamaBaseException any response code than 200 has been returned
|
* @throws OllamaBaseException any response code than 200 has been returned
|
||||||
* @throws IOException in case the responseStream can not be read
|
* @throws IOException in case the responseStream can not be read
|
||||||
@ -1248,9 +1322,10 @@ public class OllamaAPI {
|
|||||||
* @throws InterruptedException if the operation is interrupted
|
* @throws InterruptedException if the operation is interrupted
|
||||||
* @throws ToolInvocationException if the tool invocation fails
|
* @throws ToolInvocationException if the tool invocation fails
|
||||||
*/
|
*/
|
||||||
public OllamaChatResult chat(OllamaChatRequest request, OllamaStreamHandler streamHandler)
|
public OllamaChatResult chat(OllamaChatRequest request, OllamaStreamHandler thinkingStreamHandler,
|
||||||
|
OllamaStreamHandler responseStreamHandler)
|
||||||
throws OllamaBaseException, IOException, InterruptedException, ToolInvocationException {
|
throws OllamaBaseException, IOException, InterruptedException, ToolInvocationException {
|
||||||
return chatStreaming(request, new OllamaChatStreamObserver(streamHandler));
|
return chatStreaming(request, new OllamaChatStreamObserver(thinkingStreamHandler, responseStreamHandler));
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -1393,7 +1468,7 @@ public class OllamaAPI {
|
|||||||
registerAnnotatedTools(provider.getDeclaredConstructor().newInstance());
|
registerAnnotatedTools(provider.getDeclaredConstructor().newInstance());
|
||||||
}
|
}
|
||||||
} catch (InstantiationException | NoSuchMethodException | IllegalAccessException
|
} catch (InstantiationException | NoSuchMethodException | IllegalAccessException
|
||||||
| InvocationTargetException e) {
|
| InvocationTargetException e) {
|
||||||
throw new RuntimeException(e);
|
throw new RuntimeException(e);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -1518,10 +1593,12 @@ public class OllamaAPI {
|
|||||||
* the request will be streamed; otherwise, a regular synchronous request will
|
* the request will be streamed; otherwise, a regular synchronous request will
|
||||||
* be made.
|
* be made.
|
||||||
*
|
*
|
||||||
* @param ollamaRequestModel the request model containing necessary parameters
|
* @param ollamaRequestModel the request model containing necessary
|
||||||
* for the Ollama API request.
|
* parameters
|
||||||
* @param streamHandler the stream handler to process streaming responses,
|
* for the Ollama API request.
|
||||||
* or null for non-streaming requests.
|
* @param responseStreamHandler the stream handler to process streaming
|
||||||
|
* responses,
|
||||||
|
* or null for non-streaming requests.
|
||||||
* @return the result of the Ollama API request.
|
* @return the result of the Ollama API request.
|
||||||
* @throws OllamaBaseException if the request fails due to an issue with the
|
* @throws OllamaBaseException if the request fails due to an issue with the
|
||||||
* Ollama API.
|
* Ollama API.
|
||||||
@ -1530,13 +1607,14 @@ public class OllamaAPI {
|
|||||||
* @throws InterruptedException if the thread is interrupted during the request.
|
* @throws InterruptedException if the thread is interrupted during the request.
|
||||||
*/
|
*/
|
||||||
private OllamaResult generateSyncForOllamaRequestModel(OllamaGenerateRequest ollamaRequestModel,
|
private OllamaResult generateSyncForOllamaRequestModel(OllamaGenerateRequest ollamaRequestModel,
|
||||||
OllamaStreamHandler streamHandler) throws OllamaBaseException, IOException, InterruptedException {
|
OllamaStreamHandler thinkingStreamHandler, OllamaStreamHandler responseStreamHandler)
|
||||||
|
throws OllamaBaseException, IOException, InterruptedException {
|
||||||
OllamaGenerateEndpointCaller requestCaller = new OllamaGenerateEndpointCaller(host, auth, requestTimeoutSeconds,
|
OllamaGenerateEndpointCaller requestCaller = new OllamaGenerateEndpointCaller(host, auth, requestTimeoutSeconds,
|
||||||
verbose);
|
verbose);
|
||||||
OllamaResult result;
|
OllamaResult result;
|
||||||
if (streamHandler != null) {
|
if (responseStreamHandler != null) {
|
||||||
ollamaRequestModel.setStream(true);
|
ollamaRequestModel.setStream(true);
|
||||||
result = requestCaller.call(ollamaRequestModel, streamHandler);
|
result = requestCaller.call(ollamaRequestModel, thinkingStreamHandler, responseStreamHandler);
|
||||||
} else {
|
} else {
|
||||||
result = requestCaller.callSync(ollamaRequestModel);
|
result = requestCaller.callSync(ollamaRequestModel);
|
||||||
}
|
}
|
||||||
|
@ -6,27 +6,46 @@ import lombok.RequiredArgsConstructor;
|
|||||||
|
|
||||||
@RequiredArgsConstructor
|
@RequiredArgsConstructor
|
||||||
public class OllamaChatStreamObserver implements OllamaTokenHandler {
|
public class OllamaChatStreamObserver implements OllamaTokenHandler {
|
||||||
private final OllamaStreamHandler streamHandler;
|
private final OllamaStreamHandler thinkingStreamHandler;
|
||||||
|
private final OllamaStreamHandler responseStreamHandler;
|
||||||
|
|
||||||
private String message = "";
|
private String message = "";
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void accept(OllamaChatResponseModel token) {
|
public void accept(OllamaChatResponseModel token) {
|
||||||
if (streamHandler == null || token == null || token.getMessage() == null) {
|
if (responseStreamHandler == null || token == null || token.getMessage() == null) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
String content = token.getMessage().getContent();
|
|
||||||
String thinking = token.getMessage().getThinking();
|
String thinking = token.getMessage().getThinking();
|
||||||
|
String content = token.getMessage().getContent();
|
||||||
|
|
||||||
boolean hasContent = !content.isEmpty();
|
|
||||||
boolean hasThinking = thinking != null && !thinking.isEmpty();
|
boolean hasThinking = thinking != null && !thinking.isEmpty();
|
||||||
|
boolean hasContent = !content.isEmpty();
|
||||||
|
|
||||||
if (hasThinking && !hasContent) {
|
// if (hasThinking && !hasContent) {
|
||||||
message += thinking;
|
//// message += thinking;
|
||||||
} else {
|
// message = thinking;
|
||||||
message += content;
|
// } else {
|
||||||
|
//// message += content;
|
||||||
|
// message = content;
|
||||||
|
// }
|
||||||
|
//
|
||||||
|
// responseStreamHandler.accept(message);
|
||||||
|
|
||||||
|
|
||||||
|
if (!hasContent && hasThinking && thinkingStreamHandler != null) {
|
||||||
|
// message = message + thinking;
|
||||||
|
|
||||||
|
// use only new tokens received, instead of appending the tokens to the previous
|
||||||
|
// ones and sending the full string again
|
||||||
|
thinkingStreamHandler.accept(thinking);
|
||||||
|
} else if (hasContent && responseStreamHandler != null) {
|
||||||
|
// message = message + response;
|
||||||
|
|
||||||
|
// use only new tokens received, instead of appending the tokens to the previous
|
||||||
|
// ones and sending the full string again
|
||||||
|
responseStreamHandler.accept(content);
|
||||||
}
|
}
|
||||||
|
|
||||||
streamHandler.accept(message);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -4,6 +4,7 @@ import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
|
|||||||
import com.fasterxml.jackson.annotation.JsonProperty;
|
import com.fasterxml.jackson.annotation.JsonProperty;
|
||||||
|
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
import lombok.Data;
|
import lombok.Data;
|
||||||
|
|
||||||
@Data
|
@Data
|
||||||
@ -14,11 +15,12 @@ public class OllamaGenerateResponseModel {
|
|||||||
private String response;
|
private String response;
|
||||||
private String thinking;
|
private String thinking;
|
||||||
private boolean done;
|
private boolean done;
|
||||||
|
private @JsonProperty("done_reason") String doneReason;
|
||||||
private List<Integer> context;
|
private List<Integer> context;
|
||||||
private @JsonProperty("total_duration") Long totalDuration;
|
private @JsonProperty("total_duration") Long totalDuration;
|
||||||
private @JsonProperty("load_duration") Long loadDuration;
|
private @JsonProperty("load_duration") Long loadDuration;
|
||||||
private @JsonProperty("prompt_eval_duration") Long promptEvalDuration;
|
|
||||||
private @JsonProperty("eval_duration") Long evalDuration;
|
|
||||||
private @JsonProperty("prompt_eval_count") Integer promptEvalCount;
|
private @JsonProperty("prompt_eval_count") Integer promptEvalCount;
|
||||||
|
private @JsonProperty("prompt_eval_duration") Long promptEvalDuration;
|
||||||
private @JsonProperty("eval_count") Integer evalCount;
|
private @JsonProperty("eval_count") Integer evalCount;
|
||||||
|
private @JsonProperty("eval_duration") Long evalDuration;
|
||||||
}
|
}
|
||||||
|
@ -5,14 +5,16 @@ import java.util.List;
|
|||||||
|
|
||||||
public class OllamaGenerateStreamObserver {
|
public class OllamaGenerateStreamObserver {
|
||||||
|
|
||||||
private OllamaStreamHandler streamHandler;
|
private final OllamaStreamHandler thinkingStreamHandler;
|
||||||
|
private final OllamaStreamHandler responseStreamHandler;
|
||||||
|
|
||||||
private List<OllamaGenerateResponseModel> responseParts = new ArrayList<>();
|
private final List<OllamaGenerateResponseModel> responseParts = new ArrayList<>();
|
||||||
|
|
||||||
private String message = "";
|
private String message = "";
|
||||||
|
|
||||||
public OllamaGenerateStreamObserver(OllamaStreamHandler streamHandler) {
|
public OllamaGenerateStreamObserver(OllamaStreamHandler thinkingStreamHandler, OllamaStreamHandler responseStreamHandler) {
|
||||||
this.streamHandler = streamHandler;
|
this.responseStreamHandler = responseStreamHandler;
|
||||||
|
this.thinkingStreamHandler = thinkingStreamHandler;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void notify(OllamaGenerateResponseModel currentResponsePart) {
|
public void notify(OllamaGenerateResponseModel currentResponsePart) {
|
||||||
@ -27,11 +29,18 @@ public class OllamaGenerateStreamObserver {
|
|||||||
boolean hasResponse = response != null && !response.isEmpty();
|
boolean hasResponse = response != null && !response.isEmpty();
|
||||||
boolean hasThinking = thinking != null && !thinking.isEmpty();
|
boolean hasThinking = thinking != null && !thinking.isEmpty();
|
||||||
|
|
||||||
if (!hasResponse && hasThinking) {
|
if (!hasResponse && hasThinking && thinkingStreamHandler != null) {
|
||||||
message = message + thinking;
|
// message = message + thinking;
|
||||||
} else if (hasResponse) {
|
|
||||||
message = message + response;
|
// use only new tokens received, instead of appending the tokens to the previous
|
||||||
|
// ones and sending the full string again
|
||||||
|
thinkingStreamHandler.accept(thinking);
|
||||||
|
} else if (hasResponse && responseStreamHandler != null) {
|
||||||
|
// message = message + response;
|
||||||
|
|
||||||
|
// use only new tokens received, instead of appending the tokens to the previous
|
||||||
|
// ones and sending the full string again
|
||||||
|
responseStreamHandler.accept(response);
|
||||||
}
|
}
|
||||||
streamHandler.accept(message);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -27,7 +27,7 @@ public class OllamaGenerateEndpointCaller extends OllamaEndpointCaller {
|
|||||||
|
|
||||||
private static final Logger LOG = LoggerFactory.getLogger(OllamaGenerateEndpointCaller.class);
|
private static final Logger LOG = LoggerFactory.getLogger(OllamaGenerateEndpointCaller.class);
|
||||||
|
|
||||||
private OllamaGenerateStreamObserver streamObserver;
|
private OllamaGenerateStreamObserver responseStreamObserver;
|
||||||
|
|
||||||
public OllamaGenerateEndpointCaller(String host, Auth basicAuth, long requestTimeoutSeconds, boolean verbose) {
|
public OllamaGenerateEndpointCaller(String host, Auth basicAuth, long requestTimeoutSeconds, boolean verbose) {
|
||||||
super(host, basicAuth, requestTimeoutSeconds, verbose);
|
super(host, basicAuth, requestTimeoutSeconds, verbose);
|
||||||
@ -48,8 +48,8 @@ public class OllamaGenerateEndpointCaller extends OllamaEndpointCaller {
|
|||||||
if (ollamaResponseModel.getThinking() != null) {
|
if (ollamaResponseModel.getThinking() != null) {
|
||||||
thinkingBuffer.append(ollamaResponseModel.getThinking());
|
thinkingBuffer.append(ollamaResponseModel.getThinking());
|
||||||
}
|
}
|
||||||
if (streamObserver != null) {
|
if (responseStreamObserver != null) {
|
||||||
streamObserver.notify(ollamaResponseModel);
|
responseStreamObserver.notify(ollamaResponseModel);
|
||||||
}
|
}
|
||||||
return ollamaResponseModel.isDone();
|
return ollamaResponseModel.isDone();
|
||||||
} catch (JsonProcessingException e) {
|
} catch (JsonProcessingException e) {
|
||||||
@ -58,9 +58,8 @@ public class OllamaGenerateEndpointCaller extends OllamaEndpointCaller {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public OllamaResult call(OllamaRequestBody body, OllamaStreamHandler streamHandler)
|
public OllamaResult call(OllamaRequestBody body, OllamaStreamHandler thinkingStreamHandler, OllamaStreamHandler responseStreamHandler) throws OllamaBaseException, IOException, InterruptedException {
|
||||||
throws OllamaBaseException, IOException, InterruptedException {
|
responseStreamObserver = new OllamaGenerateStreamObserver(thinkingStreamHandler, responseStreamHandler);
|
||||||
streamObserver = new OllamaGenerateStreamObserver(streamHandler);
|
|
||||||
return callSync(body);
|
return callSync(body);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -73,47 +72,41 @@ public class OllamaGenerateEndpointCaller extends OllamaEndpointCaller {
|
|||||||
* @throws IOException in case the responseStream can not be read
|
* @throws IOException in case the responseStream can not be read
|
||||||
* @throws InterruptedException in case the server is not reachable or network issues happen
|
* @throws InterruptedException in case the server is not reachable or network issues happen
|
||||||
*/
|
*/
|
||||||
|
@SuppressWarnings("DuplicatedCode")
|
||||||
public OllamaResult callSync(OllamaRequestBody body) throws OllamaBaseException, IOException, InterruptedException {
|
public OllamaResult callSync(OllamaRequestBody body) throws OllamaBaseException, IOException, InterruptedException {
|
||||||
// Create Request
|
// Create Request
|
||||||
long startTime = System.currentTimeMillis();
|
long startTime = System.currentTimeMillis();
|
||||||
HttpClient httpClient = HttpClient.newHttpClient();
|
HttpClient httpClient = HttpClient.newHttpClient();
|
||||||
URI uri = URI.create(getHost() + getEndpointSuffix());
|
URI uri = URI.create(getHost() + getEndpointSuffix());
|
||||||
HttpRequest.Builder requestBuilder =
|
HttpRequest.Builder requestBuilder = getRequestBuilderDefault(uri).POST(body.getBodyPublisher());
|
||||||
getRequestBuilderDefault(uri)
|
|
||||||
.POST(
|
|
||||||
body.getBodyPublisher());
|
|
||||||
HttpRequest request = requestBuilder.build();
|
HttpRequest request = requestBuilder.build();
|
||||||
if (isVerbose()) LOG.info("Asking model: " + body.toString());
|
if (isVerbose()) LOG.info("Asking model: {}", body);
|
||||||
HttpResponse<InputStream> response =
|
HttpResponse<InputStream> response = httpClient.send(request, HttpResponse.BodyHandlers.ofInputStream());
|
||||||
httpClient.send(request, HttpResponse.BodyHandlers.ofInputStream());
|
|
||||||
|
|
||||||
int statusCode = response.statusCode();
|
int statusCode = response.statusCode();
|
||||||
InputStream responseBodyStream = response.body();
|
InputStream responseBodyStream = response.body();
|
||||||
StringBuilder responseBuffer = new StringBuilder();
|
StringBuilder responseBuffer = new StringBuilder();
|
||||||
StringBuilder thinkingBuffer = new StringBuilder();
|
StringBuilder thinkingBuffer = new StringBuilder();
|
||||||
try (BufferedReader reader =
|
OllamaGenerateResponseModel ollamaGenerateResponseModel = null;
|
||||||
new BufferedReader(new InputStreamReader(responseBodyStream, StandardCharsets.UTF_8))) {
|
try (BufferedReader reader = new BufferedReader(new InputStreamReader(responseBodyStream, StandardCharsets.UTF_8))) {
|
||||||
String line;
|
String line;
|
||||||
while ((line = reader.readLine()) != null) {
|
while ((line = reader.readLine()) != null) {
|
||||||
if (statusCode == 404) {
|
if (statusCode == 404) {
|
||||||
LOG.warn("Status code: 404 (Not Found)");
|
LOG.warn("Status code: 404 (Not Found)");
|
||||||
OllamaErrorResponse ollamaResponseModel =
|
OllamaErrorResponse ollamaResponseModel = Utils.getObjectMapper().readValue(line, OllamaErrorResponse.class);
|
||||||
Utils.getObjectMapper().readValue(line, OllamaErrorResponse.class);
|
|
||||||
responseBuffer.append(ollamaResponseModel.getError());
|
responseBuffer.append(ollamaResponseModel.getError());
|
||||||
} else if (statusCode == 401) {
|
} else if (statusCode == 401) {
|
||||||
LOG.warn("Status code: 401 (Unauthorized)");
|
LOG.warn("Status code: 401 (Unauthorized)");
|
||||||
OllamaErrorResponse ollamaResponseModel =
|
OllamaErrorResponse ollamaResponseModel = Utils.getObjectMapper().readValue("{\"error\":\"Unauthorized\"}", OllamaErrorResponse.class);
|
||||||
Utils.getObjectMapper()
|
|
||||||
.readValue("{\"error\":\"Unauthorized\"}", OllamaErrorResponse.class);
|
|
||||||
responseBuffer.append(ollamaResponseModel.getError());
|
responseBuffer.append(ollamaResponseModel.getError());
|
||||||
} else if (statusCode == 400) {
|
} else if (statusCode == 400) {
|
||||||
LOG.warn("Status code: 400 (Bad Request)");
|
LOG.warn("Status code: 400 (Bad Request)");
|
||||||
OllamaErrorResponse ollamaResponseModel = Utils.getObjectMapper().readValue(line,
|
OllamaErrorResponse ollamaResponseModel = Utils.getObjectMapper().readValue(line, OllamaErrorResponse.class);
|
||||||
OllamaErrorResponse.class);
|
|
||||||
responseBuffer.append(ollamaResponseModel.getError());
|
responseBuffer.append(ollamaResponseModel.getError());
|
||||||
} else {
|
} else {
|
||||||
boolean finished = parseResponseAndAddToBuffer(line, responseBuffer, thinkingBuffer);
|
boolean finished = parseResponseAndAddToBuffer(line, responseBuffer, thinkingBuffer);
|
||||||
if (finished) {
|
if (finished) {
|
||||||
|
ollamaGenerateResponseModel = Utils.getObjectMapper().readValue(line, OllamaGenerateResponseModel.class);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -121,13 +114,25 @@ public class OllamaGenerateEndpointCaller extends OllamaEndpointCaller {
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (statusCode != 200) {
|
if (statusCode != 200) {
|
||||||
LOG.error("Status code " + statusCode);
|
LOG.error("Status code: {}", statusCode);
|
||||||
throw new OllamaBaseException(responseBuffer.toString());
|
throw new OllamaBaseException(responseBuffer.toString());
|
||||||
} else {
|
} else {
|
||||||
long endTime = System.currentTimeMillis();
|
long endTime = System.currentTimeMillis();
|
||||||
OllamaResult ollamaResult =
|
OllamaResult ollamaResult = new OllamaResult(responseBuffer.toString(), thinkingBuffer.toString(), endTime - startTime, statusCode);
|
||||||
new OllamaResult(responseBuffer.toString(), thinkingBuffer.toString(), endTime - startTime, statusCode);
|
|
||||||
if (isVerbose()) LOG.info("Model response: " + ollamaResult);
|
ollamaResult.setModel(ollamaGenerateResponseModel.getModel());
|
||||||
|
ollamaResult.setCreatedAt(ollamaGenerateResponseModel.getCreatedAt());
|
||||||
|
ollamaResult.setDone(ollamaGenerateResponseModel.isDone());
|
||||||
|
ollamaResult.setDoneReason(ollamaGenerateResponseModel.getDoneReason());
|
||||||
|
ollamaResult.setContext(ollamaGenerateResponseModel.getContext());
|
||||||
|
ollamaResult.setTotalDuration(ollamaGenerateResponseModel.getTotalDuration());
|
||||||
|
ollamaResult.setLoadDuration(ollamaGenerateResponseModel.getLoadDuration());
|
||||||
|
ollamaResult.setPromptEvalCount(ollamaGenerateResponseModel.getPromptEvalCount());
|
||||||
|
ollamaResult.setPromptEvalDuration(ollamaGenerateResponseModel.getPromptEvalDuration());
|
||||||
|
ollamaResult.setEvalCount(ollamaGenerateResponseModel.getEvalCount());
|
||||||
|
ollamaResult.setEvalDuration(ollamaGenerateResponseModel.getEvalDuration());
|
||||||
|
|
||||||
|
if (isVerbose()) LOG.info("Model response: {}", ollamaResult);
|
||||||
return ollamaResult;
|
return ollamaResult;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -26,8 +26,10 @@ import java.time.Duration;
|
|||||||
public class OllamaAsyncResultStreamer extends Thread {
|
public class OllamaAsyncResultStreamer extends Thread {
|
||||||
private final HttpRequest.Builder requestBuilder;
|
private final HttpRequest.Builder requestBuilder;
|
||||||
private final OllamaGenerateRequest ollamaRequestModel;
|
private final OllamaGenerateRequest ollamaRequestModel;
|
||||||
private final OllamaResultStream stream = new OllamaResultStream();
|
private final OllamaResultStream thinkingResponseStream = new OllamaResultStream();
|
||||||
|
private final OllamaResultStream responseStream = new OllamaResultStream();
|
||||||
private String completeResponse;
|
private String completeResponse;
|
||||||
|
private String completeThinkingResponse;
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -54,14 +56,11 @@ public class OllamaAsyncResultStreamer extends Thread {
|
|||||||
@Getter
|
@Getter
|
||||||
private long responseTime = 0;
|
private long responseTime = 0;
|
||||||
|
|
||||||
public OllamaAsyncResultStreamer(
|
public OllamaAsyncResultStreamer(HttpRequest.Builder requestBuilder, OllamaGenerateRequest ollamaRequestModel, long requestTimeoutSeconds) {
|
||||||
HttpRequest.Builder requestBuilder,
|
|
||||||
OllamaGenerateRequest ollamaRequestModel,
|
|
||||||
long requestTimeoutSeconds) {
|
|
||||||
this.requestBuilder = requestBuilder;
|
this.requestBuilder = requestBuilder;
|
||||||
this.ollamaRequestModel = ollamaRequestModel;
|
this.ollamaRequestModel = ollamaRequestModel;
|
||||||
this.completeResponse = "";
|
this.completeResponse = "";
|
||||||
this.stream.add("");
|
this.responseStream.add("");
|
||||||
this.requestTimeoutSeconds = requestTimeoutSeconds;
|
this.requestTimeoutSeconds = requestTimeoutSeconds;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -71,16 +70,8 @@ public class OllamaAsyncResultStreamer extends Thread {
|
|||||||
HttpClient httpClient = HttpClient.newHttpClient();
|
HttpClient httpClient = HttpClient.newHttpClient();
|
||||||
long startTime = System.currentTimeMillis();
|
long startTime = System.currentTimeMillis();
|
||||||
try {
|
try {
|
||||||
HttpRequest request =
|
HttpRequest request = requestBuilder.POST(HttpRequest.BodyPublishers.ofString(Utils.getObjectMapper().writeValueAsString(ollamaRequestModel))).header(Constants.HttpConstants.HEADER_KEY_CONTENT_TYPE, Constants.HttpConstants.APPLICATION_JSON).timeout(Duration.ofSeconds(requestTimeoutSeconds)).build();
|
||||||
requestBuilder
|
HttpResponse<InputStream> response = httpClient.send(request, HttpResponse.BodyHandlers.ofInputStream());
|
||||||
.POST(
|
|
||||||
HttpRequest.BodyPublishers.ofString(
|
|
||||||
Utils.getObjectMapper().writeValueAsString(ollamaRequestModel)))
|
|
||||||
.header(Constants.HttpConstants.HEADER_KEY_CONTENT_TYPE, Constants.HttpConstants.APPLICATION_JSON)
|
|
||||||
.timeout(Duration.ofSeconds(requestTimeoutSeconds))
|
|
||||||
.build();
|
|
||||||
HttpResponse<InputStream> response =
|
|
||||||
httpClient.send(request, HttpResponse.BodyHandlers.ofInputStream());
|
|
||||||
int statusCode = response.statusCode();
|
int statusCode = response.statusCode();
|
||||||
this.httpStatusCode = statusCode;
|
this.httpStatusCode = statusCode;
|
||||||
|
|
||||||
@ -89,25 +80,33 @@ public class OllamaAsyncResultStreamer extends Thread {
|
|||||||
try {
|
try {
|
||||||
reader = new BufferedReader(new InputStreamReader(responseBodyStream, StandardCharsets.UTF_8));
|
reader = new BufferedReader(new InputStreamReader(responseBodyStream, StandardCharsets.UTF_8));
|
||||||
String line;
|
String line;
|
||||||
|
StringBuilder thinkingBuffer = new StringBuilder();
|
||||||
StringBuilder responseBuffer = new StringBuilder();
|
StringBuilder responseBuffer = new StringBuilder();
|
||||||
while ((line = reader.readLine()) != null) {
|
while ((line = reader.readLine()) != null) {
|
||||||
if (statusCode == 404) {
|
if (statusCode == 404) {
|
||||||
OllamaErrorResponse ollamaResponseModel =
|
OllamaErrorResponse ollamaResponseModel = Utils.getObjectMapper().readValue(line, OllamaErrorResponse.class);
|
||||||
Utils.getObjectMapper().readValue(line, OllamaErrorResponse.class);
|
responseStream.add(ollamaResponseModel.getError());
|
||||||
stream.add(ollamaResponseModel.getError());
|
|
||||||
responseBuffer.append(ollamaResponseModel.getError());
|
responseBuffer.append(ollamaResponseModel.getError());
|
||||||
} else {
|
} else {
|
||||||
OllamaGenerateResponseModel ollamaResponseModel =
|
OllamaGenerateResponseModel ollamaResponseModel = Utils.getObjectMapper().readValue(line, OllamaGenerateResponseModel.class);
|
||||||
Utils.getObjectMapper().readValue(line, OllamaGenerateResponseModel.class);
|
String thinkingTokens = ollamaResponseModel.getThinking();
|
||||||
String res = ollamaResponseModel.getResponse();
|
String responseTokens = ollamaResponseModel.getResponse();
|
||||||
stream.add(res);
|
if (thinkingTokens == null) {
|
||||||
|
thinkingTokens = "";
|
||||||
|
}
|
||||||
|
if (responseTokens == null) {
|
||||||
|
responseTokens = "";
|
||||||
|
}
|
||||||
|
thinkingResponseStream.add(thinkingTokens);
|
||||||
|
responseStream.add(responseTokens);
|
||||||
if (!ollamaResponseModel.isDone()) {
|
if (!ollamaResponseModel.isDone()) {
|
||||||
responseBuffer.append(res);
|
responseBuffer.append(responseTokens);
|
||||||
|
thinkingBuffer.append(thinkingTokens);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
this.succeeded = true;
|
this.succeeded = true;
|
||||||
|
this.completeThinkingResponse = thinkingBuffer.toString();
|
||||||
this.completeResponse = responseBuffer.toString();
|
this.completeResponse = responseBuffer.toString();
|
||||||
long endTime = System.currentTimeMillis();
|
long endTime = System.currentTimeMillis();
|
||||||
responseTime = endTime - startTime;
|
responseTime = endTime - startTime;
|
||||||
|
@ -1,15 +1,18 @@
|
|||||||
package io.github.ollama4j.models.response;
|
package io.github.ollama4j.models.response;
|
||||||
|
|
||||||
import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
|
import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
|
||||||
|
import com.fasterxml.jackson.annotation.JsonProperty;
|
||||||
import com.fasterxml.jackson.core.JsonProcessingException;
|
import com.fasterxml.jackson.core.JsonProcessingException;
|
||||||
import com.fasterxml.jackson.core.type.TypeReference;
|
import com.fasterxml.jackson.core.type.TypeReference;
|
||||||
|
|
||||||
|
import io.github.ollama4j.models.generate.OllamaGenerateResponseModel;
|
||||||
import lombok.Data;
|
import lombok.Data;
|
||||||
import lombok.Getter;
|
import lombok.Getter;
|
||||||
|
|
||||||
import static io.github.ollama4j.utils.Utils.getObjectMapper;
|
import static io.github.ollama4j.utils.Utils.getObjectMapper;
|
||||||
|
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -21,30 +24,34 @@ import java.util.Map;
|
|||||||
@JsonIgnoreProperties(ignoreUnknown = true)
|
@JsonIgnoreProperties(ignoreUnknown = true)
|
||||||
public class OllamaResult {
|
public class OllamaResult {
|
||||||
/**
|
/**
|
||||||
* -- GETTER --
|
|
||||||
* Get the completion/response text
|
* Get the completion/response text
|
||||||
*
|
|
||||||
* @return String completion/response text
|
|
||||||
*/
|
*/
|
||||||
private final String response;
|
private final String response;
|
||||||
private final String thinking;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* -- GETTER --
|
* Get the thinking text (if available)
|
||||||
|
*/
|
||||||
|
private final String thinking;
|
||||||
|
/**
|
||||||
* Get the response status code.
|
* Get the response status code.
|
||||||
*
|
|
||||||
* @return int - response status code
|
|
||||||
*/
|
*/
|
||||||
private int httpStatusCode;
|
private int httpStatusCode;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* -- GETTER --
|
|
||||||
* Get the response time in milliseconds.
|
* Get the response time in milliseconds.
|
||||||
*
|
|
||||||
* @return long - response time in milliseconds
|
|
||||||
*/
|
*/
|
||||||
private long responseTime = 0;
|
private long responseTime = 0;
|
||||||
|
|
||||||
|
private String model;
|
||||||
|
private String createdAt;
|
||||||
|
private boolean done;
|
||||||
|
private String doneReason;
|
||||||
|
private List<Integer> context;
|
||||||
|
private Long totalDuration;
|
||||||
|
private Long loadDuration;
|
||||||
|
private Integer promptEvalCount;
|
||||||
|
private Long promptEvalDuration;
|
||||||
|
private Integer evalCount;
|
||||||
|
private Long evalDuration;
|
||||||
|
|
||||||
public OllamaResult(String response, String thinking, long responseTime, int httpStatusCode) {
|
public OllamaResult(String response, String thinking, long responseTime, int httpStatusCode) {
|
||||||
this.response = response;
|
this.response = response;
|
||||||
this.thinking = thinking;
|
this.thinking = thinking;
|
||||||
@ -60,6 +67,17 @@ public class OllamaResult {
|
|||||||
responseMap.put("thinking", this.thinking);
|
responseMap.put("thinking", this.thinking);
|
||||||
responseMap.put("httpStatusCode", this.httpStatusCode);
|
responseMap.put("httpStatusCode", this.httpStatusCode);
|
||||||
responseMap.put("responseTime", this.responseTime);
|
responseMap.put("responseTime", this.responseTime);
|
||||||
|
responseMap.put("model", this.model);
|
||||||
|
responseMap.put("createdAt", this.createdAt);
|
||||||
|
responseMap.put("done", this.done);
|
||||||
|
responseMap.put("doneReason", this.doneReason);
|
||||||
|
responseMap.put("context", this.context);
|
||||||
|
responseMap.put("totalDuration", this.totalDuration);
|
||||||
|
responseMap.put("loadDuration", this.loadDuration);
|
||||||
|
responseMap.put("promptEvalCount", this.promptEvalCount);
|
||||||
|
responseMap.put("promptEvalDuration", this.promptEvalDuration);
|
||||||
|
responseMap.put("evalCount", this.evalCount);
|
||||||
|
responseMap.put("evalDuration", this.evalDuration);
|
||||||
return getObjectMapper().writerWithDefaultPrettyPrinter().writeValueAsString(responseMap);
|
return getObjectMapper().writerWithDefaultPrettyPrinter().writeValueAsString(responseMap);
|
||||||
} catch (JsonProcessingException e) {
|
} catch (JsonProcessingException e) {
|
||||||
throw new RuntimeException(e);
|
throw new RuntimeException(e);
|
||||||
|
@ -202,14 +202,12 @@ class OllamaAPIIntegrationTest {
|
|||||||
throws OllamaBaseException, IOException, URISyntaxException, InterruptedException {
|
throws OllamaBaseException, IOException, URISyntaxException, InterruptedException {
|
||||||
api.pullModel(GENERAL_PURPOSE_MODEL);
|
api.pullModel(GENERAL_PURPOSE_MODEL);
|
||||||
boolean raw = false;
|
boolean raw = false;
|
||||||
boolean thinking = false;
|
|
||||||
StringBuffer sb = new StringBuffer();
|
StringBuffer sb = new StringBuffer();
|
||||||
OllamaResult result = api.generate(GENERAL_PURPOSE_MODEL,
|
OllamaResult result = api.generate(GENERAL_PURPOSE_MODEL,
|
||||||
"What is the capital of France? And what's France's connection with Mona Lisa?", raw,
|
"What is the capital of France? And what's France's connection with Mona Lisa?", raw,
|
||||||
thinking, new OptionsBuilder().build(), (s) -> {
|
new OptionsBuilder().build(), (s) -> {
|
||||||
String substring = s.substring(sb.toString().length());
|
LOG.info(s);
|
||||||
LOG.info(substring);
|
sb.append(s);
|
||||||
sb.append(substring);
|
|
||||||
});
|
});
|
||||||
|
|
||||||
assertNotNull(result);
|
assertNotNull(result);
|
||||||
@ -355,12 +353,10 @@ class OllamaAPIIntegrationTest {
|
|||||||
.withKeepAlive("0m").withOptions(new OptionsBuilder().setTemperature(0.9f).build())
|
.withKeepAlive("0m").withOptions(new OptionsBuilder().setTemperature(0.9f).build())
|
||||||
.build();
|
.build();
|
||||||
|
|
||||||
StringBuffer sb = new StringBuffer();
|
|
||||||
|
|
||||||
OllamaChatResult chatResult = api.chat(requestModel, (s) -> {
|
OllamaChatResult chatResult = api.chat(requestModel, (s) -> {
|
||||||
String substring = s.substring(sb.toString().length());
|
LOG.info(s.toUpperCase());
|
||||||
sb.append(substring);
|
}, (s) -> {
|
||||||
LOG.info(substring);
|
LOG.info(s.toLowerCase());
|
||||||
});
|
});
|
||||||
|
|
||||||
assertNotNull(chatResult, "chatResult should not be null");
|
assertNotNull(chatResult, "chatResult should not be null");
|
||||||
@ -468,9 +464,11 @@ class OllamaAPIIntegrationTest {
|
|||||||
StringBuffer sb = new StringBuffer();
|
StringBuffer sb = new StringBuffer();
|
||||||
|
|
||||||
OllamaChatResult chatResult = api.chat(requestModel, (s) -> {
|
OllamaChatResult chatResult = api.chat(requestModel, (s) -> {
|
||||||
String substring = s.substring(sb.toString().length());
|
LOG.info(s.toUpperCase());
|
||||||
sb.append(substring);
|
sb.append(s);
|
||||||
LOG.info(substring);
|
}, (s) -> {
|
||||||
|
LOG.info(s.toLowerCase());
|
||||||
|
sb.append(s);
|
||||||
});
|
});
|
||||||
assertNotNull(chatResult);
|
assertNotNull(chatResult);
|
||||||
assertNotNull(chatResult.getResponseModel());
|
assertNotNull(chatResult.getResponseModel());
|
||||||
@ -491,10 +489,13 @@ class OllamaAPIIntegrationTest {
|
|||||||
StringBuffer sb = new StringBuffer();
|
StringBuffer sb = new StringBuffer();
|
||||||
|
|
||||||
OllamaChatResult chatResult = api.chat(requestModel, (s) -> {
|
OllamaChatResult chatResult = api.chat(requestModel, (s) -> {
|
||||||
String substring = s.substring(sb.toString().length());
|
sb.append(s);
|
||||||
sb.append(substring);
|
LOG.info(s.toUpperCase());
|
||||||
LOG.info(substring);
|
}, (s) -> {
|
||||||
|
sb.append(s);
|
||||||
|
LOG.info(s.toLowerCase());
|
||||||
});
|
});
|
||||||
|
|
||||||
assertNotNull(chatResult);
|
assertNotNull(chatResult);
|
||||||
assertNotNull(chatResult.getResponseModel());
|
assertNotNull(chatResult.getResponseModel());
|
||||||
assertNotNull(chatResult.getResponseModel().getMessage());
|
assertNotNull(chatResult.getResponseModel().getMessage());
|
||||||
@ -586,9 +587,8 @@ class OllamaAPIIntegrationTest {
|
|||||||
|
|
||||||
OllamaResult result = api.generateWithImageFiles(VISION_MODEL, "What is in this image?",
|
OllamaResult result = api.generateWithImageFiles(VISION_MODEL, "What is in this image?",
|
||||||
List.of(imageFile), new OptionsBuilder().build(), (s) -> {
|
List.of(imageFile), new OptionsBuilder().build(), (s) -> {
|
||||||
String substring = s.substring(sb.toString().length());
|
LOG.info(s);
|
||||||
LOG.info(substring);
|
sb.append(s);
|
||||||
sb.append(substring);
|
|
||||||
});
|
});
|
||||||
assertNotNull(result);
|
assertNotNull(result);
|
||||||
assertNotNull(result.getResponse());
|
assertNotNull(result.getResponse());
|
||||||
@ -603,10 +603,10 @@ class OllamaAPIIntegrationTest {
|
|||||||
api.pullModel(THINKING_TOOL_MODEL);
|
api.pullModel(THINKING_TOOL_MODEL);
|
||||||
|
|
||||||
boolean raw = false;
|
boolean raw = false;
|
||||||
boolean thinking = true;
|
boolean think = true;
|
||||||
|
|
||||||
OllamaResult result = api.generate(THINKING_TOOL_MODEL, "Who are you?", raw, thinking,
|
OllamaResult result = api.generate(THINKING_TOOL_MODEL, "Who are you?", raw, think,
|
||||||
new OptionsBuilder().build(), null);
|
new OptionsBuilder().build());
|
||||||
assertNotNull(result);
|
assertNotNull(result);
|
||||||
assertNotNull(result.getResponse());
|
assertNotNull(result.getResponse());
|
||||||
assertFalse(result.getResponse().isEmpty());
|
assertFalse(result.getResponse().isEmpty());
|
||||||
@ -621,15 +621,19 @@ class OllamaAPIIntegrationTest {
|
|||||||
api.pullModel(THINKING_TOOL_MODEL);
|
api.pullModel(THINKING_TOOL_MODEL);
|
||||||
|
|
||||||
boolean raw = false;
|
boolean raw = false;
|
||||||
boolean thinking = true;
|
|
||||||
|
|
||||||
StringBuffer sb = new StringBuffer();
|
StringBuffer sb = new StringBuffer();
|
||||||
OllamaResult result = api.generate(THINKING_TOOL_MODEL, "Who are you?", raw, thinking,
|
OllamaResult result = api.generate(THINKING_TOOL_MODEL, "Who are you?", raw,
|
||||||
new OptionsBuilder().build(), (s) -> {
|
new OptionsBuilder().build(),
|
||||||
String substring = s.substring(sb.toString().length());
|
(thinkingToken) -> {
|
||||||
sb.append(substring);
|
sb.append(thinkingToken);
|
||||||
LOG.info(substring);
|
LOG.info(thinkingToken);
|
||||||
});
|
},
|
||||||
|
(resToken) -> {
|
||||||
|
sb.append(resToken);
|
||||||
|
LOG.info(resToken);
|
||||||
|
}
|
||||||
|
);
|
||||||
assertNotNull(result);
|
assertNotNull(result);
|
||||||
assertNotNull(result.getResponse());
|
assertNotNull(result.getResponse());
|
||||||
assertFalse(result.getResponse().isEmpty());
|
assertFalse(result.getResponse().isEmpty());
|
||||||
|
@ -155,7 +155,7 @@ class TestMockedAPIs {
|
|||||||
try {
|
try {
|
||||||
when(ollamaAPI.generateWithImageFiles(
|
when(ollamaAPI.generateWithImageFiles(
|
||||||
model, prompt, Collections.emptyList(), new OptionsBuilder().build()))
|
model, prompt, Collections.emptyList(), new OptionsBuilder().build()))
|
||||||
.thenReturn(new OllamaResult("","", 0, 200));
|
.thenReturn(new OllamaResult("", "", 0, 200));
|
||||||
ollamaAPI.generateWithImageFiles(
|
ollamaAPI.generateWithImageFiles(
|
||||||
model, prompt, Collections.emptyList(), new OptionsBuilder().build());
|
model, prompt, Collections.emptyList(), new OptionsBuilder().build());
|
||||||
verify(ollamaAPI, times(1))
|
verify(ollamaAPI, times(1))
|
||||||
@ -174,7 +174,7 @@ class TestMockedAPIs {
|
|||||||
try {
|
try {
|
||||||
when(ollamaAPI.generateWithImageURLs(
|
when(ollamaAPI.generateWithImageURLs(
|
||||||
model, prompt, Collections.emptyList(), new OptionsBuilder().build()))
|
model, prompt, Collections.emptyList(), new OptionsBuilder().build()))
|
||||||
.thenReturn(new OllamaResult("","", 0, 200));
|
.thenReturn(new OllamaResult("", "", 0, 200));
|
||||||
ollamaAPI.generateWithImageURLs(
|
ollamaAPI.generateWithImageURLs(
|
||||||
model, prompt, Collections.emptyList(), new OptionsBuilder().build());
|
model, prompt, Collections.emptyList(), new OptionsBuilder().build());
|
||||||
verify(ollamaAPI, times(1))
|
verify(ollamaAPI, times(1))
|
||||||
@ -190,10 +190,10 @@ class TestMockedAPIs {
|
|||||||
OllamaAPI ollamaAPI = Mockito.mock(OllamaAPI.class);
|
OllamaAPI ollamaAPI = Mockito.mock(OllamaAPI.class);
|
||||||
String model = OllamaModelType.LLAMA2;
|
String model = OllamaModelType.LLAMA2;
|
||||||
String prompt = "some prompt text";
|
String prompt = "some prompt text";
|
||||||
when(ollamaAPI.generateAsync(model, prompt, false))
|
when(ollamaAPI.generateAsync(model, prompt, false, false))
|
||||||
.thenReturn(new OllamaAsyncResultStreamer(null, null, 3));
|
.thenReturn(new OllamaAsyncResultStreamer(null, null, 3));
|
||||||
ollamaAPI.generateAsync(model, prompt, false);
|
ollamaAPI.generateAsync(model, prompt, false, false);
|
||||||
verify(ollamaAPI, times(1)).generateAsync(model, prompt, false);
|
verify(ollamaAPI, times(1)).generateAsync(model, prompt, false, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
Loading…
x
Reference in New Issue
Block a user