mirror of
https://github.com/amithkoujalgi/ollama4j.git
synced 2025-10-13 17:08:57 +02:00
Refactor MetricsRecorder and OllamaAPI for improved label handling and documentation clarity
This commit is contained in:
parent
827bedb696
commit
d400998fa2
@ -50,7 +50,11 @@ import lombok.Setter;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
/** The base Ollama API class. */
|
||||
/**
|
||||
* The base Ollama API class for interacting with the Ollama server.
|
||||
* <p>
|
||||
* This class provides methods for model management, chat, embeddings, tool registration, and more.
|
||||
*/
|
||||
@SuppressWarnings({"DuplicatedCode", "resource", "SpellCheckingInspection"})
|
||||
public class OllamaAPI {
|
||||
|
||||
@ -62,31 +66,33 @@ public class OllamaAPI {
|
||||
|
||||
/**
|
||||
* The request timeout in seconds for API calls.
|
||||
*
|
||||
* <p>Default is 10 seconds. This value determines how long the client will wait for a response
|
||||
* <p>
|
||||
* Default is 10 seconds. This value determines how long the client will wait for a response
|
||||
* from the Ollama server before timing out.
|
||||
*/
|
||||
@Setter private long requestTimeoutSeconds = 10;
|
||||
|
||||
/** The read timeout in seconds for image URLs. */
|
||||
@Setter private int imageURLReadTimeoutSeconds = 10;
|
||||
|
||||
/** The connect timeout in seconds for image URLs. */
|
||||
@Setter private int imageURLConnectTimeoutSeconds = 10;
|
||||
|
||||
/**
|
||||
* The maximum number of retries for tool calls during chat interactions.
|
||||
*
|
||||
* <p>This value controls how many times the API will attempt to call a tool in the event of a
|
||||
* <p>
|
||||
* This value controls how many times the API will attempt to call a tool in the event of a
|
||||
* failure. Default is 3.
|
||||
*/
|
||||
@Setter private int maxChatToolCallRetries = 3;
|
||||
|
||||
/**
|
||||
* The number of retries to attempt when pulling a model from the Ollama server.
|
||||
*
|
||||
* <p>If set to 0, no retries will be performed. If greater than 0, the API will retry pulling
|
||||
* <p>
|
||||
* If set to 0, no retries will be performed. If greater than 0, the API will retry pulling
|
||||
* the model up to the specified number of times in case of failure.
|
||||
*
|
||||
* <p>Default is 0 (no retries).
|
||||
* <p>
|
||||
* Default is 0 (no retries).
|
||||
*/
|
||||
@Setter
|
||||
@SuppressWarnings({"FieldMayBeFinal", "FieldCanBeLocal"})
|
||||
@ -94,15 +100,14 @@ public class OllamaAPI {
|
||||
|
||||
/**
|
||||
* Enable or disable Prometheus metrics collection.
|
||||
*
|
||||
* <p>When enabled, the API will collect and expose metrics for request counts, durations, model
|
||||
* <p>
|
||||
* When enabled, the API will collect and expose metrics for request counts, durations, model
|
||||
* usage, and other operational statistics. Default is false.
|
||||
*/
|
||||
@Setter private boolean metricsEnabled = false;
|
||||
|
||||
/**
|
||||
* Instantiates the Ollama API with default Ollama host: <a
|
||||
* href="http://localhost:11434">http://localhost:11434</a>
|
||||
* Instantiates the Ollama API with the default Ollama host: {@code http://localhost:11434}
|
||||
*/
|
||||
public OllamaAPI() {
|
||||
this.host = "http://localhost:11434";
|
||||
@ -110,9 +115,9 @@ public class OllamaAPI {
|
||||
}
|
||||
|
||||
/**
|
||||
* Instantiates the Ollama API with specified Ollama host address.
|
||||
* Instantiates the Ollama API with a specified Ollama host address.
|
||||
*
|
||||
* @param host the host address of Ollama server
|
||||
* @param host the host address of the Ollama server
|
||||
*/
|
||||
public OllamaAPI(String host) {
|
||||
if (host.endsWith("/")) {
|
||||
@ -125,7 +130,7 @@ public class OllamaAPI {
|
||||
}
|
||||
|
||||
/**
|
||||
* Set basic authentication for accessing Ollama server that's behind a reverse-proxy/gateway.
|
||||
* Set basic authentication for accessing an Ollama server that's behind a reverse-proxy/gateway.
|
||||
*
|
||||
* @param username the username
|
||||
* @param password the password
|
||||
@ -135,7 +140,7 @@ public class OllamaAPI {
|
||||
}
|
||||
|
||||
/**
|
||||
* Set Bearer authentication for accessing Ollama server that's behind a reverse-proxy/gateway.
|
||||
* Set Bearer authentication for accessing an Ollama server that's behind a reverse-proxy/gateway.
|
||||
*
|
||||
* @param bearerToken the Bearer authentication token to provide
|
||||
*/
|
||||
@ -144,13 +149,14 @@ public class OllamaAPI {
|
||||
}
|
||||
|
||||
/**
|
||||
* API to check the reachability of Ollama server.
|
||||
* Checks the reachability of the Ollama server.
|
||||
*
|
||||
* @return true if the server is reachable, false otherwise.
|
||||
* @return true if the server is reachable, false otherwise
|
||||
* @throws OllamaBaseException if the ping fails
|
||||
*/
|
||||
public boolean ping() throws OllamaBaseException {
|
||||
long startTime = System.currentTimeMillis();
|
||||
String url = this.host + "/api/tags";
|
||||
String url = "/api/tags";
|
||||
int statusCode = 0;
|
||||
Object out = null;
|
||||
try {
|
||||
@ -158,7 +164,7 @@ public class OllamaAPI {
|
||||
HttpRequest httpRequest;
|
||||
HttpResponse<String> response;
|
||||
httpRequest =
|
||||
getRequestBuilderDefault(new URI(url))
|
||||
getRequestBuilderDefault(new URI(this.host + url))
|
||||
.header(
|
||||
Constants.HttpConstants.HEADER_KEY_ACCEPT,
|
||||
Constants.HttpConstants.APPLICATION_JSON)
|
||||
@ -182,13 +188,11 @@ public class OllamaAPI {
|
||||
* Provides a list of running models and details about each model currently loaded into memory.
|
||||
*
|
||||
* @return ModelsProcessResponse containing details about the running models
|
||||
* @throws IOException if an I/O error occurs during the HTTP request
|
||||
* @throws InterruptedException if the operation is interrupted
|
||||
* @throws OllamaBaseException if the response indicates an error status
|
||||
*/
|
||||
public ModelsProcessResponse ps() throws OllamaBaseException {
|
||||
long startTime = System.currentTimeMillis();
|
||||
String url = this.host + "/api/ps";
|
||||
String url = "/api/ps";
|
||||
int statusCode = 0;
|
||||
Object out = null;
|
||||
try {
|
||||
@ -196,7 +200,7 @@ public class OllamaAPI {
|
||||
HttpRequest httpRequest = null;
|
||||
try {
|
||||
httpRequest =
|
||||
getRequestBuilderDefault(new URI(url))
|
||||
getRequestBuilderDefault(new URI(this.host + url))
|
||||
.header(
|
||||
Constants.HttpConstants.HEADER_KEY_ACCEPT,
|
||||
Constants.HttpConstants.APPLICATION_JSON)
|
||||
@ -231,19 +235,16 @@ public class OllamaAPI {
|
||||
*
|
||||
* @return a list of models available on the server
|
||||
* @throws OllamaBaseException if the response indicates an error status
|
||||
* @throws IOException if an I/O error occurs during the HTTP request
|
||||
* @throws InterruptedException if the operation is interrupted
|
||||
* @throws URISyntaxException if the URI for the request is malformed
|
||||
*/
|
||||
public List<Model> listModels() throws OllamaBaseException {
|
||||
long startTime = System.currentTimeMillis();
|
||||
String url = this.host + "/api/tags";
|
||||
String url = "/api/tags";
|
||||
int statusCode = 0;
|
||||
Object out = null;
|
||||
try {
|
||||
HttpClient httpClient = HttpClient.newHttpClient();
|
||||
HttpRequest httpRequest =
|
||||
getRequestBuilderDefault(new URI(url))
|
||||
getRequestBuilderDefault(new URI(this.host + url))
|
||||
.header(
|
||||
Constants.HttpConstants.HEADER_KEY_ACCEPT,
|
||||
Constants.HttpConstants.APPLICATION_JSON)
|
||||
@ -271,7 +272,15 @@ public class OllamaAPI {
|
||||
}
|
||||
}
|
||||
|
||||
/** Handles retry backoff for pullModel. */
|
||||
/**
|
||||
* Handles retry backoff for pullModel.
|
||||
*
|
||||
* @param modelName the name of the model being pulled
|
||||
* @param currentRetry the current retry attempt (zero-based)
|
||||
* @param maxRetries the maximum number of retries allowed
|
||||
* @param baseDelayMillis the base delay in milliseconds for exponential backoff
|
||||
* @throws InterruptedException if the thread is interrupted during sleep
|
||||
*/
|
||||
private void handlePullRetry(
|
||||
String modelName, int currentRetry, int maxRetries, long baseDelayMillis)
|
||||
throws InterruptedException {
|
||||
@ -298,15 +307,21 @@ public class OllamaAPI {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Internal method to pull a model from the Ollama server.
|
||||
*
|
||||
* @param modelName the name of the model to pull
|
||||
* @throws OllamaBaseException if the pull fails
|
||||
*/
|
||||
private void doPullModel(String modelName) throws OllamaBaseException {
|
||||
long startTime = System.currentTimeMillis();
|
||||
String url = this.host + "/api/pull";
|
||||
String url = "/api/pull";
|
||||
int statusCode = 0;
|
||||
Object out = null;
|
||||
try {
|
||||
String jsonData = new ModelRequest(modelName).toString();
|
||||
HttpRequest request =
|
||||
getRequestBuilderDefault(new URI(url))
|
||||
getRequestBuilderDefault(new URI(this.host + url))
|
||||
.POST(HttpRequest.BodyPublishers.ofString(jsonData))
|
||||
.header(
|
||||
Constants.HttpConstants.HEADER_KEY_ACCEPT,
|
||||
@ -349,8 +364,13 @@ public class OllamaAPI {
|
||||
}
|
||||
|
||||
/**
|
||||
* Processes a single ModelPullResponse, handling errors and logging status. Returns true if the
|
||||
* response indicates a successful pull.
|
||||
* Processes a single ModelPullResponse, handling errors and logging status.
|
||||
* Returns true if the response indicates a successful pull.
|
||||
*
|
||||
* @param modelPullResponse the response from the model pull
|
||||
* @param modelName the name of the model
|
||||
* @return true if the pull was successful, false otherwise
|
||||
* @throws OllamaBaseException if the response contains an error
|
||||
*/
|
||||
@SuppressWarnings("RedundantIfStatement")
|
||||
private boolean processModelPullResponse(ModelPullResponse modelPullResponse, String modelName)
|
||||
@ -373,15 +393,21 @@ public class OllamaAPI {
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the Ollama server version.
|
||||
*
|
||||
* @return the version string
|
||||
* @throws OllamaBaseException if the request fails
|
||||
*/
|
||||
public String getVersion() throws OllamaBaseException {
|
||||
String url = this.host + "/api/version";
|
||||
String url = "/api/version";
|
||||
long startTime = System.currentTimeMillis();
|
||||
int statusCode = 0;
|
||||
Object out = null;
|
||||
try {
|
||||
HttpClient httpClient = HttpClient.newHttpClient();
|
||||
HttpRequest httpRequest =
|
||||
getRequestBuilderDefault(new URI(url))
|
||||
getRequestBuilderDefault(new URI(this.host + url))
|
||||
.header(
|
||||
Constants.HttpConstants.HEADER_KEY_ACCEPT,
|
||||
Constants.HttpConstants.APPLICATION_JSON)
|
||||
@ -410,15 +436,12 @@ public class OllamaAPI {
|
||||
}
|
||||
|
||||
/**
|
||||
* Pulls a model using the specified Ollama library model tag. The model is identified by a name
|
||||
* and a tag, which are combined into a single identifier in the format "name:tag" to pull the
|
||||
* corresponding model.
|
||||
* Pulls a model using the specified Ollama library model tag.
|
||||
* The model is identified by a name and a tag, which are combined into a single identifier
|
||||
* in the format "name:tag" to pull the corresponding model.
|
||||
*
|
||||
* @param modelName the name/tag of the model to be pulled. Ex: llama3:latest
|
||||
* @throws OllamaBaseException if the response indicates an error status
|
||||
* @throws IOException if an I/O error occurs during the HTTP request
|
||||
* @throws InterruptedException if the operation is interrupted
|
||||
* @throws URISyntaxException if the URI for the request is malformed
|
||||
*/
|
||||
public void pullModel(String modelName) throws OllamaBaseException {
|
||||
try {
|
||||
@ -427,7 +450,7 @@ public class OllamaAPI {
|
||||
return;
|
||||
}
|
||||
int numberOfRetries = 0;
|
||||
long baseDelayMillis = 3000L; // 1 second base delay
|
||||
long baseDelayMillis = 3000L; // 3 seconds base delay
|
||||
while (numberOfRetries < numberOfRetriesForModelPull) {
|
||||
try {
|
||||
this.doPullModel(modelName);
|
||||
@ -455,22 +478,19 @@ public class OllamaAPI {
|
||||
/**
|
||||
* Gets model details from the Ollama server.
|
||||
*
|
||||
* @param modelName the model
|
||||
* @param modelName the model name
|
||||
* @return the model details
|
||||
* @throws OllamaBaseException if the response indicates an error status
|
||||
* @throws IOException if an I/O error occurs during the HTTP request
|
||||
* @throws InterruptedException if the operation is interrupted
|
||||
* @throws URISyntaxException if the URI for the request is malformed
|
||||
*/
|
||||
public ModelDetail getModelDetails(String modelName) throws OllamaBaseException {
|
||||
long startTime = System.currentTimeMillis();
|
||||
String url = this.host + "/api/show";
|
||||
String url = "/api/show";
|
||||
int statusCode = 0;
|
||||
Object out = null;
|
||||
try {
|
||||
String jsonData = new ModelRequest(modelName).toString();
|
||||
HttpRequest request =
|
||||
getRequestBuilderDefault(new URI(url))
|
||||
getRequestBuilderDefault(new URI(this.host + url))
|
||||
.header(
|
||||
Constants.HttpConstants.HEADER_KEY_ACCEPT,
|
||||
Constants.HttpConstants.APPLICATION_JSON)
|
||||
@ -498,24 +518,21 @@ public class OllamaAPI {
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a custom model. Read more about custom model creation <a href=
|
||||
* "https://github.com/ollama/ollama/blob/main/docs/api.md#create-a-model">here</a>.
|
||||
* Creates a custom model. Read more about custom model creation
|
||||
* <a href="https://github.com/ollama/ollama/blob/main/docs/api.md#create-a-model">here</a>.
|
||||
*
|
||||
* @param customModelRequest custom model spec
|
||||
* @throws OllamaBaseException if the response indicates an error status
|
||||
* @throws IOException if an I/O error occurs during the HTTP request
|
||||
* @throws InterruptedException if the operation is interrupted
|
||||
* @throws URISyntaxException if the URI for the request is malformed
|
||||
*/
|
||||
public void createModel(CustomModelRequest customModelRequest) throws OllamaBaseException {
|
||||
long startTime = System.currentTimeMillis();
|
||||
String url = this.host + "/api/create";
|
||||
String url = "/api/create";
|
||||
int statusCode = 0;
|
||||
Object out = null;
|
||||
try {
|
||||
String jsonData = customModelRequest.toString();
|
||||
HttpRequest request =
|
||||
getRequestBuilderDefault(new URI(url))
|
||||
getRequestBuilderDefault(new URI(this.host + url))
|
||||
.header(
|
||||
Constants.HttpConstants.HEADER_KEY_ACCEPT,
|
||||
Constants.HttpConstants.APPLICATION_JSON)
|
||||
@ -562,26 +579,22 @@ public class OllamaAPI {
|
||||
}
|
||||
|
||||
/**
|
||||
* Delete a model from Ollama server.
|
||||
* Deletes a model from the Ollama server.
|
||||
*
|
||||
* @param modelName the name of the model to be deleted.
|
||||
* @param ignoreIfNotPresent ignore errors if the specified model is not present on Ollama
|
||||
* server.
|
||||
* @param modelName the name of the model to be deleted
|
||||
* @param ignoreIfNotPresent ignore errors if the specified model is not present on the Ollama server
|
||||
* @throws OllamaBaseException if the response indicates an error status
|
||||
* @throws IOException if an I/O error occurs during the HTTP request
|
||||
* @throws InterruptedException if the operation is interrupted
|
||||
* @throws URISyntaxException if the URI for the request is malformed
|
||||
*/
|
||||
public void deleteModel(String modelName, boolean ignoreIfNotPresent)
|
||||
throws OllamaBaseException {
|
||||
long startTime = System.currentTimeMillis();
|
||||
String url = this.host + "/api/delete";
|
||||
String url = "/api/delete";
|
||||
int statusCode = 0;
|
||||
Object out = null;
|
||||
try {
|
||||
String jsonData = new ModelRequest(modelName).toString();
|
||||
HttpRequest request =
|
||||
getRequestBuilderDefault(new URI(url))
|
||||
getRequestBuilderDefault(new URI(this.host + url))
|
||||
.method(
|
||||
"DELETE",
|
||||
HttpRequest.BodyPublishers.ofString(
|
||||
@ -615,12 +628,17 @@ public class OllamaAPI {
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
If an empty prompt is provided and the keep_alive parameter is set to 0, a model will be unloaded from memory.
|
||||
/**
|
||||
* Unloads a model from memory.
|
||||
* <p>
|
||||
* If an empty prompt is provided and the keep_alive parameter is set to 0, a model will be unloaded from memory.
|
||||
*
|
||||
* @param modelName the name of the model to unload
|
||||
* @throws OllamaBaseException if the response indicates an error status
|
||||
*/
|
||||
public void unloadModel(String modelName) throws OllamaBaseException {
|
||||
long startTime = System.currentTimeMillis();
|
||||
String url = this.host + "/api/generate";
|
||||
String url = "/api/generate";
|
||||
int statusCode = 0;
|
||||
Object out = null;
|
||||
try {
|
||||
@ -630,7 +648,7 @@ public class OllamaAPI {
|
||||
jsonMap.put("keep_alive", 0);
|
||||
String jsonData = objectMapper.writeValueAsString(jsonMap);
|
||||
HttpRequest request =
|
||||
getRequestBuilderDefault(new URI(url))
|
||||
getRequestBuilderDefault(new URI(this.host + url))
|
||||
.method(
|
||||
"POST",
|
||||
HttpRequest.BodyPublishers.ofString(
|
||||
@ -669,20 +687,18 @@ public class OllamaAPI {
|
||||
* @param modelRequest request for '/api/embed' endpoint
|
||||
* @return embeddings
|
||||
* @throws OllamaBaseException if the response indicates an error status
|
||||
* @throws IOException if an I/O error occurs during the HTTP request
|
||||
* @throws InterruptedException if the operation is interrupted
|
||||
*/
|
||||
public OllamaEmbedResponseModel embed(OllamaEmbedRequestModel modelRequest)
|
||||
throws OllamaBaseException {
|
||||
long startTime = System.currentTimeMillis();
|
||||
String url = this.host + "/api/embed";
|
||||
String url = "/api/embed";
|
||||
int statusCode = 0;
|
||||
Object out = null;
|
||||
try {
|
||||
String jsonData = Utils.getObjectMapper().writeValueAsString(modelRequest);
|
||||
HttpClient httpClient = HttpClient.newHttpClient();
|
||||
HttpRequest request =
|
||||
HttpRequest.newBuilder(new URI(url))
|
||||
HttpRequest.newBuilder(new URI(this.host + url))
|
||||
.header(
|
||||
Constants.HttpConstants.HEADER_KEY_ACCEPT,
|
||||
Constants.HttpConstants.APPLICATION_JSON)
|
||||
@ -706,6 +722,18 @@ public class OllamaAPI {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Generates a response from a model using the specified parameters and stream observer.
|
||||
*
|
||||
* @param model the model name
|
||||
* @param prompt the prompt to send
|
||||
* @param raw whether to return the raw response
|
||||
* @param think whether to stream "thinking" tokens
|
||||
* @param options additional options
|
||||
* @param streamObserver the stream observer for handling streamed responses
|
||||
* @return the OllamaResult containing the response
|
||||
* @throws OllamaBaseException if the request fails
|
||||
*/
|
||||
public OllamaResult generate(
|
||||
String model,
|
||||
String prompt,
|
||||
@ -741,22 +769,20 @@ public class OllamaAPI {
|
||||
|
||||
/**
|
||||
* Generates structured output from the specified AI model and prompt.
|
||||
* <p>
|
||||
* Note: When formatting is specified, the 'think' parameter is not allowed.
|
||||
*
|
||||
* <p>Note: When formatting is specified, the 'think' parameter is not allowed.
|
||||
*
|
||||
* @param model The name or identifier of the AI model to use for generating the response.
|
||||
* @param model The name or identifier of the AI model to use for generating the response.
|
||||
* @param prompt The input text or prompt to provide to the AI model.
|
||||
* @param format A map containing the format specification for the structured output.
|
||||
* @return An instance of {@link OllamaResult} containing the structured response.
|
||||
* @throws OllamaBaseException if the response indicates an error status.
|
||||
* @throws IOException if an I/O error occurs during the HTTP request.
|
||||
* @throws InterruptedException if the operation is interrupted.
|
||||
*/
|
||||
@SuppressWarnings("LoggingSimilarMessage")
|
||||
public OllamaResult generateWithFormat(String model, String prompt, Map<String, Object> format)
|
||||
throws OllamaBaseException {
|
||||
long startTime = System.currentTimeMillis();
|
||||
String url = this.host + "/api/generate";
|
||||
String url = "/api/generate";
|
||||
int statusCode = 0;
|
||||
Object out = null;
|
||||
try {
|
||||
@ -770,7 +796,7 @@ public class OllamaAPI {
|
||||
HttpClient httpClient = HttpClient.newHttpClient();
|
||||
|
||||
HttpRequest request =
|
||||
getRequestBuilderDefault(new URI(url))
|
||||
getRequestBuilderDefault(new URI(this.host + url))
|
||||
.header(
|
||||
Constants.HttpConstants.HEADER_KEY_ACCEPT,
|
||||
Constants.HttpConstants.APPLICATION_JSON)
|
||||
@ -832,13 +858,14 @@ public class OllamaAPI {
|
||||
/**
|
||||
* Generates a response using the specified AI model and prompt, then automatically detects and
|
||||
* invokes any tool calls present in the model's output.
|
||||
*
|
||||
* <p>This method operates in blocking mode. It first augments the prompt with all registered
|
||||
* <p>
|
||||
* This method operates in blocking mode. It first augments the prompt with all registered
|
||||
* tool specifications (unless the prompt already begins with {@code [AVAILABLE_TOOLS]}), sends
|
||||
* the prompt to the model, and parses the model's response for tool call instructions. If tool
|
||||
* calls are found, each is invoked using the registered tool implementations, and their results
|
||||
* are collected. <b>Typical usage:</b>
|
||||
*
|
||||
* are collected.
|
||||
* <p>
|
||||
* Typical usage:
|
||||
* <pre>{@code
|
||||
* OllamaToolsResult result = ollamaAPI.generateWithTools(
|
||||
* "my-model",
|
||||
@ -850,17 +877,14 @@ public class OllamaAPI {
|
||||
* Map<ToolFunctionCallSpec, Object> toolResults = result.getToolResults();
|
||||
* }</pre>
|
||||
*
|
||||
* @param model the name or identifier of the AI model to use for generating the response
|
||||
* @param prompt the input text or prompt to provide to the AI model
|
||||
* @param options additional options or configurations to use when generating the response
|
||||
* @param model the name or identifier of the AI model to use for generating the response
|
||||
* @param prompt the input text or prompt to provide to the AI model
|
||||
* @param options additional options or configurations to use when generating the response
|
||||
* @param streamHandler handler for streaming responses; if {@code null}, streaming is disabled
|
||||
* @return an {@link OllamaToolsResult} containing the model's response and the results of any
|
||||
* invoked tools. If the model does not request any tool calls, the tool results map will be
|
||||
* empty.
|
||||
* @throws OllamaBaseException if the Ollama API returns an error status
|
||||
* @throws IOException if an I/O error occurs during the HTTP request
|
||||
* @throws InterruptedException if the operation is interrupted
|
||||
* @throws ToolInvocationException if a tool call fails to execute
|
||||
*/
|
||||
public OllamaToolsResult generateWithTools(
|
||||
String model, String prompt, Options options, OllamaGenerateTokenHandler streamHandler)
|
||||
@ -927,13 +951,12 @@ public class OllamaAPI {
|
||||
|
||||
/**
|
||||
* Asynchronously generates a response for a prompt using a model running on the Ollama server.
|
||||
*
|
||||
* <p>This method returns an {@link OllamaAsyncResultStreamer} handle that can be used to poll
|
||||
* <p>
|
||||
* This method returns an {@link OllamaAsyncResultStreamer} handle that can be used to poll
|
||||
* for status and retrieve streamed "thinking" and response tokens from the model. The call is
|
||||
* non-blocking.
|
||||
*
|
||||
* <p><b>Example usage:</b>
|
||||
*
|
||||
* <p>
|
||||
* Example usage:
|
||||
* <pre>{@code
|
||||
* OllamaAsyncResultStreamer resultStreamer = ollamaAPI.generate("gpt-oss:20b", "Who are you", false, true);
|
||||
* int pollIntervalMilliseconds = 1000;
|
||||
@ -950,24 +973,24 @@ public class OllamaAPI {
|
||||
* System.out.println("Complete response: " + resultStreamer.getCompleteResponse());
|
||||
* }</pre>
|
||||
*
|
||||
* @param model the Ollama model to use for generating the response
|
||||
* @param model the Ollama model to use for generating the response
|
||||
* @param prompt the prompt or question text to send to the model
|
||||
* @param raw if {@code true}, returns the raw response from the model
|
||||
* @param think if {@code true}, streams "thinking" tokens as well as response tokens
|
||||
* @return an {@link OllamaAsyncResultStreamer} handle for polling and retrieving streamed
|
||||
* results
|
||||
* @param raw if {@code true}, returns the raw response from the model
|
||||
* @param think if {@code true}, streams "thinking" tokens as well as response tokens
|
||||
* @return an {@link OllamaAsyncResultStreamer} handle for polling and retrieving streamed results
|
||||
* @throws OllamaBaseException if the request fails
|
||||
*/
|
||||
public OllamaAsyncResultStreamer generate(
|
||||
String model, String prompt, boolean raw, boolean think) throws OllamaBaseException {
|
||||
long startTime = System.currentTimeMillis();
|
||||
String url = this.host + "/api/generate";
|
||||
String url = "/api/generate";
|
||||
try {
|
||||
OllamaGenerateRequest ollamaRequestModel = new OllamaGenerateRequest(model, prompt);
|
||||
ollamaRequestModel.setRaw(raw);
|
||||
ollamaRequestModel.setThink(think);
|
||||
OllamaAsyncResultStreamer ollamaAsyncResultStreamer =
|
||||
new OllamaAsyncResultStreamer(
|
||||
getRequestBuilderDefault(new URI(url)),
|
||||
getRequestBuilderDefault(new URI(this.host + url)),
|
||||
ollamaRequestModel,
|
||||
requestTimeoutSeconds);
|
||||
ollamaAsyncResultStreamer.start();
|
||||
@ -980,33 +1003,28 @@ public class OllamaAPI {
|
||||
}
|
||||
|
||||
/**
|
||||
* Generates a response from a model running on the Ollama server using one or more images as
|
||||
* input.
|
||||
*
|
||||
* <p>This method allows you to provide images (as {@link File}, {@code byte[]}, or image URL
|
||||
* Generates a response from a model running on the Ollama server using one or more images as input.
|
||||
* <p>
|
||||
* This method allows you to provide images (as {@link File}, {@code byte[]}, or image URL
|
||||
* {@link String}) along with a prompt to the specified model. The images are automatically
|
||||
* encoded as base64 before being sent. Additional model options can be specified via the {@link
|
||||
* Options} parameter.
|
||||
*
|
||||
* <p>If a {@code streamHandler} is provided, the response will be streamed and the handler will
|
||||
* <p>
|
||||
* If a {@code streamHandler} is provided, the response will be streamed and the handler will
|
||||
* be called for each streamed response chunk. If {@code streamHandler} is {@code null},
|
||||
* streaming is disabled and the full response is returned synchronously.
|
||||
*
|
||||
* @param model the name of the Ollama model to use for generating the response
|
||||
* @param prompt the prompt or question text to send to the model
|
||||
* @param images a list of images to use for the question; each element must be a {@link File},
|
||||
* {@code byte[]}, or a URL {@link String}
|
||||
* @param options the {@link Options} object containing model parameters; see <a
|
||||
* href="https://github.com/jmorganca/ollama/blob/main/docs/modelfile.md#valid-parameters-and-values">Ollama
|
||||
* model options documentation</a>
|
||||
* @param model the name of the Ollama model to use for generating the response
|
||||
* @param prompt the prompt or question text to send to the model
|
||||
* @param images a list of images to use for the question; each element must be a {@link File},
|
||||
* {@code byte[]}, or a URL {@link String}
|
||||
* @param options the {@link Options} object containing model parameters; see
|
||||
* <a href="https://github.com/jmorganca/ollama/blob/main/docs/modelfile.md#valid-parameters-and-values">Ollama model options documentation</a>
|
||||
* @param format a map specifying the output format, or null for default
|
||||
* @param streamHandler an optional callback that is invoked for each streamed response chunk;
|
||||
* if {@code null}, disables streaming and returns the full response synchronously
|
||||
* if {@code null}, disables streaming and returns the full response synchronously
|
||||
* @return an {@link OllamaResult} containing the response text and time taken for the response
|
||||
* @throws OllamaBaseException if the response indicates an error status or an invalid image
|
||||
* type is provided
|
||||
* @throws IOException if an I/O error occurs during the HTTP request
|
||||
* @throws InterruptedException if the operation is interrupted
|
||||
* @throws URISyntaxException if an image URL is malformed
|
||||
* @throws OllamaBaseException if the response indicates an error status or an invalid image type is provided
|
||||
*/
|
||||
public OllamaResult generateWithImages(
|
||||
String model,
|
||||
@ -1056,19 +1074,14 @@ public class OllamaAPI {
|
||||
/**
|
||||
* Ask a question to a model using an {@link OllamaChatRequest} and set up streaming response.
|
||||
* This can be constructed using an {@link OllamaChatRequestBuilder}.
|
||||
* <p>
|
||||
* Hint: the OllamaChatRequestModel#getStream() property is not implemented.
|
||||
*
|
||||
* <p>Hint: the OllamaChatRequestModel#getStream() property is not implemented.
|
||||
*
|
||||
* @param request request object to be sent to the server
|
||||
* @param request request object to be sent to the server
|
||||
* @param tokenHandler callback handler to handle the last token from stream (caution: the
|
||||
* previous tokens from stream will not be concatenated)
|
||||
* previous tokens from stream will not be concatenated)
|
||||
* @return {@link OllamaChatResult}
|
||||
* @throws OllamaBaseException any response code than 200 has been returned
|
||||
* @throws IOException in case the responseStream can not be read
|
||||
* @throws InterruptedException in case the server is not reachable or network issues happen
|
||||
* @throws OllamaBaseException if the response indicates an error status
|
||||
* @throws IOException if an I/O error occurs during the HTTP request
|
||||
* @throws InterruptedException if the operation is interrupted
|
||||
*/
|
||||
public OllamaChatResult chat(OllamaChatRequest request, OllamaChatTokenHandler tokenHandler)
|
||||
throws OllamaBaseException {
|
||||
@ -1143,7 +1156,7 @@ public class OllamaAPI {
|
||||
* Registers a single tool in the tool registry using the provided tool specification.
|
||||
*
|
||||
* @param toolSpecification the specification of the tool to register. It contains the tool's
|
||||
* function name and other relevant information.
|
||||
* function name and other relevant information.
|
||||
*/
|
||||
public void registerTool(Tools.ToolSpecification toolSpecification) {
|
||||
toolRegistry.addTool(toolSpecification.getFunctionName(), toolSpecification);
|
||||
@ -1151,11 +1164,11 @@ public class OllamaAPI {
|
||||
}
|
||||
|
||||
/**
|
||||
* Registers multiple tools in the tool registry using a list of tool specifications. Iterates
|
||||
* over the list and adds each tool specification to the registry.
|
||||
* Registers multiple tools in the tool registry using a list of tool specifications.
|
||||
* Iterates over the list and adds each tool specification to the registry.
|
||||
*
|
||||
* @param toolSpecifications a list of tool specifications to register. Each specification
|
||||
* contains information about a tool, such as its function name.
|
||||
* contains information about a tool, such as its function name.
|
||||
*/
|
||||
public void registerTools(List<Tools.ToolSpecification> toolSpecifications) {
|
||||
for (Tools.ToolSpecification toolSpecification : toolSpecifications) {
|
||||
@ -1177,9 +1190,8 @@ public class OllamaAPI {
|
||||
* providers. This method scans the caller's class for the {@link OllamaToolService} annotation
|
||||
* and recursively registers annotated tools from all the providers specified in the annotation.
|
||||
*
|
||||
* @throws IllegalStateException if the caller's class is not annotated with {@link
|
||||
* OllamaToolService}.
|
||||
* @throws RuntimeException if any reflection-based instantiation or invocation fails.
|
||||
* @throws OllamaBaseException if the caller's class is not annotated with {@link OllamaToolService}
|
||||
* or if reflection-based instantiation or invocation fails
|
||||
*/
|
||||
public void registerAnnotatedTools() throws OllamaBaseException {
|
||||
try {
|
||||
@ -1211,13 +1223,13 @@ public class OllamaAPI {
|
||||
}
|
||||
|
||||
/**
|
||||
* Registers tools based on the annotations found on the methods of the provided object. This
|
||||
* method scans the methods of the given object and registers tools using the {@link ToolSpec}
|
||||
* Registers tools based on the annotations found on the methods of the provided object.
|
||||
* This method scans the methods of the given object and registers tools using the {@link ToolSpec}
|
||||
* annotation and associated {@link ToolProperty} annotations. It constructs tool specifications
|
||||
* and stores them in a tool registry.
|
||||
*
|
||||
* @param object the object whose methods are to be inspected for annotated tools.
|
||||
* @throws RuntimeException if any reflection-based instantiation or invocation fails.
|
||||
* @param object the object whose methods are to be inspected for annotated tools
|
||||
* @throws RuntimeException if any reflection-based instantiation or invocation fails
|
||||
*/
|
||||
public void registerAnnotatedTools(Object object) {
|
||||
Class<?> objectClass = object.getClass();
|
||||
@ -1325,9 +1337,9 @@ public class OllamaAPI {
|
||||
/**
|
||||
* Utility method to encode a file into a Base64 encoded string.
|
||||
*
|
||||
* @param file the file to be encoded into Base64.
|
||||
* @return a Base64 encoded string representing the contents of the file.
|
||||
* @throws IOException if an I/O error occurs during reading the file.
|
||||
* @param file the file to be encoded into Base64
|
||||
* @return a Base64 encoded string representing the contents of the file
|
||||
* @throws IOException if an I/O error occurs during reading the file
|
||||
*/
|
||||
private static String encodeFileToBase64(File file) throws IOException {
|
||||
return Base64.getEncoder().encodeToString(Files.readAllBytes(file.toPath()));
|
||||
@ -1336,26 +1348,25 @@ public class OllamaAPI {
|
||||
/**
|
||||
* Utility method to encode a byte array into a Base64 encoded string.
|
||||
*
|
||||
* @param bytes the byte array to be encoded into Base64.
|
||||
* @return a Base64 encoded string representing the byte array.
|
||||
* @param bytes the byte array to be encoded into Base64
|
||||
* @return a Base64 encoded string representing the byte array
|
||||
*/
|
||||
private static String encodeByteArrayToBase64(byte[] bytes) {
|
||||
return Base64.getEncoder().encodeToString(bytes);
|
||||
}
|
||||
|
||||
/**
|
||||
* Generates a request for the Ollama API and returns the result. This method synchronously
|
||||
* calls the Ollama API. If a stream handler is provided, the request will be streamed;
|
||||
* otherwise, a regular synchronous request will be made.
|
||||
* Generates a request for the Ollama API and returns the result.
|
||||
* This method synchronously calls the Ollama API. If a stream handler is provided,
|
||||
* the request will be streamed; otherwise, a regular synchronous request will be made.
|
||||
*
|
||||
* @param ollamaRequestModel the request model containing necessary parameters for the Ollama
|
||||
* API request.
|
||||
* @param responseStreamHandler the stream handler to process streaming responses, or null for
|
||||
* non-streaming requests.
|
||||
* @return the result of the Ollama API request.
|
||||
* @throws OllamaBaseException if the request fails due to an issue with the Ollama API.
|
||||
* @throws IOException if an I/O error occurs during the request process.
|
||||
* @throws InterruptedException if the thread is interrupted during the request.
|
||||
* @param ollamaRequestModel the request model containing necessary parameters for the Ollama API request
|
||||
* @param thinkingStreamHandler the stream handler for "thinking" tokens, or null if not used
|
||||
* @param responseStreamHandler the stream handler to process streaming responses, or null for non-streaming requests
|
||||
* @return the result of the Ollama API request
|
||||
* @throws OllamaBaseException if the request fails due to an issue with the Ollama API
|
||||
* @throws IOException if an I/O error occurs during the request process
|
||||
* @throws InterruptedException if the thread is interrupted during the request
|
||||
*/
|
||||
private OllamaResult generateSyncForOllamaRequestModel(
|
||||
OllamaGenerateRequest ollamaRequestModel,
|
||||
@ -1404,6 +1415,13 @@ public class OllamaAPI {
|
||||
return auth != null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Invokes a registered tool function by name and arguments.
|
||||
*
|
||||
* @param toolFunctionCallSpec the tool function call specification
|
||||
* @return the result of the tool function
|
||||
* @throws ToolInvocationException if the tool is not found or invocation fails
|
||||
*/
|
||||
private Object invokeTool(ToolFunctionCallSpec toolFunctionCallSpec)
|
||||
throws ToolInvocationException {
|
||||
try {
|
||||
|
@ -15,20 +15,20 @@ import java.util.Map;
|
||||
|
||||
public class MetricsRecorder {
|
||||
|
||||
// Corrected: Removed duplicate "format" label and ensured label count matches usage
|
||||
private static final Counter requests =
|
||||
Counter.build()
|
||||
.name("ollama_api_requests_total")
|
||||
.help("Total requests to Ollama API")
|
||||
.labelNames(
|
||||
"endpoint",
|
||||
"status",
|
||||
"model",
|
||||
"raw",
|
||||
"streaming",
|
||||
"format",
|
||||
"thinking",
|
||||
"http_status",
|
||||
"options")
|
||||
"options",
|
||||
"format")
|
||||
.register();
|
||||
|
||||
private static final Histogram requestLatency =
|
||||
@ -40,17 +40,17 @@ public class MetricsRecorder {
|
||||
"model",
|
||||
"raw",
|
||||
"streaming",
|
||||
"format",
|
||||
"thinking",
|
||||
"http_status",
|
||||
"options")
|
||||
"options",
|
||||
"format")
|
||||
.register();
|
||||
|
||||
private static final Histogram responseSize =
|
||||
Histogram.build()
|
||||
.name("ollama_api_response_size_bytes")
|
||||
.help("Response size in bytes")
|
||||
.labelNames("endpoint", "model", "options") // Added "options"
|
||||
.labelNames("endpoint", "model", "options")
|
||||
.register();
|
||||
|
||||
public static void record(
|
||||
@ -77,9 +77,9 @@ public class MetricsRecorder {
|
||||
formatString = format.toString();
|
||||
}
|
||||
|
||||
// Ensure the number of labels matches the labelNames above (8 labels)
|
||||
requests.labels(
|
||||
endpoint,
|
||||
"success",
|
||||
safe(model),
|
||||
String.valueOf(raw),
|
||||
String.valueOf(streaming),
|
||||
@ -89,6 +89,8 @@ public class MetricsRecorder {
|
||||
safe(formatString))
|
||||
.inc();
|
||||
double durationSeconds = (endTime - startTime) / 1000.0;
|
||||
|
||||
// Ensure the number of labels matches the labelNames above (8 labels)
|
||||
requestLatency
|
||||
.labels(
|
||||
endpoint,
|
||||
|
Loading…
x
Reference in New Issue
Block a user