Refactor OllamaAPI and related classes for improved embedding model support and tool registration

This update modifies the OllamaAPI class to enhance support for embedding models by renaming related classes and introducing new request and response models. The OllamaEmbedRequestModel and OllamaEmbedResponseModel classes have been added, along with their corresponding builder class. Additionally, the tool registration process has been improved with the introduction of annotations for automatic tool discovery. Deprecated methods and commented-out code have been removed for clarity, and Javadoc comments have been updated for consistency across the API.
This commit is contained in:
amithkoujalgi 2025-09-26 22:13:55 +05:30
parent f5ca5bdca3
commit 305bab9819
No known key found for this signature in database
GPG Key ID: E29A37746AF94B70
22 changed files with 202 additions and 391 deletions

View File

@ -15,8 +15,8 @@ import io.github.ollama4j.exceptions.ToolInvocationException;
import io.github.ollama4j.metrics.MetricsRecorder; import io.github.ollama4j.metrics.MetricsRecorder;
import io.github.ollama4j.models.chat.*; import io.github.ollama4j.models.chat.*;
import io.github.ollama4j.models.chat.OllamaChatTokenHandler; import io.github.ollama4j.models.chat.OllamaChatTokenHandler;
import io.github.ollama4j.models.embeddings.OllamaEmbedRequestModel; import io.github.ollama4j.models.embed.OllamaEmbedRequestModel;
import io.github.ollama4j.models.embeddings.OllamaEmbedResponseModel; import io.github.ollama4j.models.embed.OllamaEmbedResponseModel;
import io.github.ollama4j.models.generate.OllamaGenerateRequest; import io.github.ollama4j.models.generate.OllamaGenerateRequest;
import io.github.ollama4j.models.generate.OllamaGenerateStreamObserver; import io.github.ollama4j.models.generate.OllamaGenerateStreamObserver;
import io.github.ollama4j.models.generate.OllamaGenerateTokenHandler; import io.github.ollama4j.models.generate.OllamaGenerateTokenHandler;
@ -24,9 +24,15 @@ import io.github.ollama4j.models.ps.ModelsProcessResponse;
import io.github.ollama4j.models.request.*; import io.github.ollama4j.models.request.*;
import io.github.ollama4j.models.response.*; import io.github.ollama4j.models.response.*;
import io.github.ollama4j.tools.*; import io.github.ollama4j.tools.*;
import io.github.ollama4j.tools.annotations.OllamaToolService;
import io.github.ollama4j.tools.annotations.ToolProperty;
import io.github.ollama4j.tools.annotations.ToolSpec;
import io.github.ollama4j.utils.Constants; import io.github.ollama4j.utils.Constants;
import io.github.ollama4j.utils.Utils; import io.github.ollama4j.utils.Utils;
import java.io.*; import java.io.*;
import java.lang.reflect.InvocationTargetException;
import java.lang.reflect.Method;
import java.lang.reflect.Parameter;
import java.net.URI; import java.net.URI;
import java.net.URISyntaxException; import java.net.URISyntaxException;
import java.net.http.HttpClient; import java.net.http.HttpClient;
@ -42,10 +48,9 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
/** /**
* The base Ollama API class for interacting with the Ollama server. * The main API class for interacting with the Ollama server.
* *
* <p>This class provides methods for model management, chat, embeddings, tool registration, and * <p>This class provides methods for model management, chat, embeddings, tool registration, and more.
* more.
*/ */
@SuppressWarnings({"DuplicatedCode", "resource", "SpellCheckingInspection"}) @SuppressWarnings({"DuplicatedCode", "resource", "SpellCheckingInspection"})
public class OllamaAPI { public class OllamaAPI {
@ -59,8 +64,8 @@ public class OllamaAPI {
/** /**
* The request timeout in seconds for API calls. * The request timeout in seconds for API calls.
* * <p>
* <p>Default is 10 seconds. This value determines how long the client will wait for a response * Default is 10 seconds. This value determines how long the client will wait for a response
* from the Ollama server before timing out. * from the Ollama server before timing out.
*/ */
@Setter private long requestTimeoutSeconds = 10; @Setter private long requestTimeoutSeconds = 10;
@ -73,19 +78,19 @@ public class OllamaAPI {
/** /**
* The maximum number of retries for tool calls during chat interactions. * The maximum number of retries for tool calls during chat interactions.
* * <p>
* <p>This value controls how many times the API will attempt to call a tool in the event of a * This value controls how many times the API will attempt to call a tool in the event of a
* failure. Default is 3. * failure. Default is 3.
*/ */
@Setter private int maxChatToolCallRetries = 3; @Setter private int maxChatToolCallRetries = 3;
/** /**
* The number of retries to attempt when pulling a model from the Ollama server. * The number of retries to attempt when pulling a model from the Ollama server.
* * <p>
* <p>If set to 0, no retries will be performed. If greater than 0, the API will retry pulling * If set to 0, no retries will be performed. If greater than 0, the API will retry pulling
* the model up to the specified number of times in case of failure. * the model up to the specified number of times in case of failure.
* * <p>
* <p>Default is 0 (no retries). * Default is 0 (no retries).
*/ */
@Setter @Setter
@SuppressWarnings({"FieldMayBeFinal", "FieldCanBeLocal"}) @SuppressWarnings({"FieldMayBeFinal", "FieldCanBeLocal"})
@ -93,13 +98,15 @@ public class OllamaAPI {
/** /**
* Enable or disable Prometheus metrics collection. * Enable or disable Prometheus metrics collection.
* * <p>
* <p>When enabled, the API will collect and expose metrics for request counts, durations, model * When enabled, the API will collect and expose metrics for request counts, durations, model
* usage, and other operational statistics. Default is false. * usage, and other operational statistics. Default is false.
*/ */
@Setter private boolean metricsEnabled = false; @Setter private boolean metricsEnabled = false;
/** Instantiates the Ollama API with the default Ollama host: {@code http://localhost:11434} */ /**
* Instantiates the Ollama API with the default Ollama host: {@code http://localhost:11434}
*/
public OllamaAPI() { public OllamaAPI() {
this.host = "http://localhost:11434"; this.host = "http://localhost:11434";
// initializeMetrics(); // initializeMetrics();
@ -121,8 +128,7 @@ public class OllamaAPI {
} }
/** /**
* Set basic authentication for accessing an Ollama server that's behind a * Set basic authentication for accessing an Ollama server that's behind a reverse-proxy/gateway.
* reverse-proxy/gateway.
* *
* @param username the username * @param username the username
* @param password the password * @param password the password
@ -132,8 +138,7 @@ public class OllamaAPI {
} }
/** /**
* Set Bearer authentication for accessing an Ollama server that's behind a * Set Bearer authentication for accessing an Ollama server that's behind a reverse-proxy/gateway.
* reverse-proxy/gateway.
* *
* @param bearerToken the Bearer authentication token to provide * @param bearerToken the Bearer authentication token to provide
*/ */
@ -357,8 +362,8 @@ public class OllamaAPI {
} }
/** /**
* Processes a single ModelPullResponse, handling errors and logging status. Returns true if the * Processes a single ModelPullResponse, handling errors and logging status.
* response indicates a successful pull. * Returns true if the response indicates a successful pull.
* *
* @param modelPullResponse the response from the model pull * @param modelPullResponse the response from the model pull
* @param modelName the name of the model * @param modelName the name of the model
@ -429,9 +434,9 @@ public class OllamaAPI {
} }
/** /**
* Pulls a model using the specified Ollama library model tag. The model is identified by a name * Pulls a model using the specified Ollama library model tag.
* and a tag, which are combined into a single identifier in the format "name:tag" to pull the * The model is identified by a name and a tag, which are combined into a single identifier
* corresponding model. * in the format "name:tag" to pull the corresponding model.
* *
* @param modelName the name/tag of the model to be pulled. Ex: llama3:latest * @param modelName the name/tag of the model to be pulled. Ex: llama3:latest
* @throws OllamaBaseException if the response indicates an error status * @throws OllamaBaseException if the response indicates an error status
@ -511,8 +516,8 @@ public class OllamaAPI {
} }
/** /**
* Creates a custom model. Read more about custom model creation <a * Creates a custom model. Read more about custom model creation
* href="https://github.com/ollama/ollama/blob/main/docs/api.md#create-a-model">here</a>. * <a href="https://github.com/ollama/ollama/blob/main/docs/api.md#create-a-model">here</a>.
* *
* @param customModelRequest custom model spec * @param customModelRequest custom model spec
* @throws OllamaBaseException if the response indicates an error status * @throws OllamaBaseException if the response indicates an error status
@ -575,8 +580,7 @@ public class OllamaAPI {
* Deletes a model from the Ollama server. * Deletes a model from the Ollama server.
* *
* @param modelName the name of the model to be deleted * @param modelName the name of the model to be deleted
* @param ignoreIfNotPresent ignore errors if the specified model is not present on the Ollama * @param ignoreIfNotPresent ignore errors if the specified model is not present on the Ollama server
* server
* @throws OllamaBaseException if the response indicates an error status * @throws OllamaBaseException if the response indicates an error status
*/ */
public void deleteModel(String modelName, boolean ignoreIfNotPresent) public void deleteModel(String modelName, boolean ignoreIfNotPresent)
@ -624,8 +628,8 @@ public class OllamaAPI {
/** /**
* Unloads a model from memory. * Unloads a model from memory.
* * <p>
* <p>If an empty prompt is provided and the keep_alive parameter is set to 0, a model will be * If an empty prompt is provided and the keep_alive parameter is set to 0, a model will be
* unloaded from memory. * unloaded from memory.
* *
* @param modelName the name of the model to unload * @param modelName the name of the model to unload
@ -722,9 +726,13 @@ public class OllamaAPI {
} }
/** /**
* Generates a response from a model using the specified parameters and stream observer. If * Generates a response from a model using the specified parameters and stream observer.
* {@code streamObserver} is provided, streaming is enabled; otherwise, a synchronous call is * If {@code streamObserver} is provided, streaming is enabled; otherwise, a synchronous call is made.
* made. *
* @param request the generation request
* @param streamObserver the stream observer for streaming responses, or null for synchronous
* @return the result of the generation
* @throws OllamaBaseException if the request fails
*/ */
public OllamaResult generate( public OllamaResult generate(
OllamaGenerateRequest request, OllamaGenerateStreamObserver streamObserver) OllamaGenerateRequest request, OllamaGenerateStreamObserver streamObserver)
@ -751,13 +759,10 @@ public class OllamaAPI {
} }
} }
// (No javadoc for private helper, as is standard)
private OllamaResult generateWithToolsInternal( private OllamaResult generateWithToolsInternal(
OllamaGenerateRequest request, OllamaGenerateStreamObserver streamObserver) OllamaGenerateRequest request, OllamaGenerateStreamObserver streamObserver)
throws OllamaBaseException { throws OllamaBaseException {
// List<Tools.PromptFuncDefinition> tools = new ArrayList<>();
// for (Tools.ToolSpecification spec : toolRegistry.getRegisteredSpecs()) {
// tools.add(spec.getToolPrompt());
// }
ArrayList<OllamaChatMessage> msgs = new ArrayList<>(); ArrayList<OllamaChatMessage> msgs = new ArrayList<>();
OllamaChatRequest chatRequest = new OllamaChatRequest(); OllamaChatRequest chatRequest = new OllamaChatRequest();
chatRequest.setModel(request.getModel()); chatRequest.setModel(request.getModel());
@ -786,6 +791,16 @@ public class OllamaAPI {
-1); -1);
} }
/**
* Generates a response from a model asynchronously, returning a streamer for results.
*
* @param model the model name
* @param prompt the prompt to send
* @param raw whether to use raw mode
* @param think whether to use "think" mode
* @return an OllamaAsyncResultStreamer for streaming results
* @throws OllamaBaseException if the request fails
*/
public OllamaAsyncResultStreamer generateAsync( public OllamaAsyncResultStreamer generateAsync(
String model, String prompt, boolean raw, boolean think) throws OllamaBaseException { String model, String prompt, boolean raw, boolean think) throws OllamaBaseException {
long startTime = System.currentTimeMillis(); long startTime = System.currentTimeMillis();
@ -812,10 +827,10 @@ public class OllamaAPI {
} }
/** /**
* Ask a question to a model using an {@link OllamaChatRequest} and set up streaming response. * Sends a chat request to a model using an {@link OllamaChatRequest} and sets up streaming response.
* This can be constructed using an {@link OllamaChatRequestBuilder}. * This can be constructed using an {@link OllamaChatRequestBuilder}.
* *
* <p>Hint: the OllamaChatRequestModel#getStream() property is not implemented. * <p>Note: the OllamaChatRequestModel#getStream() property is not implemented.
* *
* @param request request object to be sent to the server * @param request request object to be sent to the server
* @param tokenHandler callback handler to handle the last token from stream (caution: the * @param tokenHandler callback handler to handle the last token from stream (caution: the
@ -911,8 +926,8 @@ public class OllamaAPI {
/** /**
* Registers multiple tools in the tool registry. * Registers multiple tools in the tool registry.
* *
* @param tools a list of {@link Tools.Tool} objects to register. Each tool contains * @param tools a list of {@link Tools.Tool} objects to register. Each tool contains its
* its specification and function. * specification and function.
*/ */
public void registerTools(List<Tools.Tool> tools) { public void registerTools(List<Tools.Tool> tools) {
toolRegistry.addTools(tools); toolRegistry.addTools(tools);
@ -927,135 +942,101 @@ public class OllamaAPI {
LOG.debug("All tools have been deregistered."); LOG.debug("All tools have been deregistered.");
} }
// /**
// /** * Registers tools based on the annotations found on the methods of the caller's class and its
// * Registers tools based on the annotations found on the methods of the caller's class and * providers. This method scans the caller's class for the {@link OllamaToolService} annotation
// its * and recursively registers annotated tools from all the providers specified in the annotation.
// * providers. This method scans the caller's class for the {@link OllamaToolService} *
// annotation * @throws OllamaBaseException if the caller's class is not annotated with {@link
// * and recursively registers annotated tools from all the providers specified in the * OllamaToolService} or if reflection-based instantiation or invocation fails
// annotation. */
// * public void registerAnnotatedTools() throws OllamaBaseException {
// * @throws OllamaBaseException if the caller's class is not annotated with {@link try {
// * OllamaToolService} or if reflection-based instantiation or invocation fails Class<?> callerClass = null;
// */ try {
// public void registerAnnotatedTools() throws OllamaBaseException { callerClass =
// try { Class.forName(Thread.currentThread().getStackTrace()[2].getClassName());
// Class<?> callerClass = null; } catch (ClassNotFoundException e) {
// try { throw new OllamaBaseException(e.getMessage(), e);
// callerClass = }
//
// Class.forName(Thread.currentThread().getStackTrace()[2].getClassName()); OllamaToolService ollamaToolServiceAnnotation =
// } catch (ClassNotFoundException e) { callerClass.getDeclaredAnnotation(OllamaToolService.class);
// throw new OllamaBaseException(e.getMessage(), e); if (ollamaToolServiceAnnotation == null) {
// } throw new IllegalStateException(
// callerClass + " is not annotated as " + OllamaToolService.class);
// OllamaToolService ollamaToolServiceAnnotation = }
// callerClass.getDeclaredAnnotation(OllamaToolService.class);
// if (ollamaToolServiceAnnotation == null) { Class<?>[] providers = ollamaToolServiceAnnotation.providers();
// throw new IllegalStateException( for (Class<?> provider : providers) {
// callerClass + " is not annotated as " + OllamaToolService.class); registerAnnotatedTools(provider.getDeclaredConstructor().newInstance());
// } }
// } catch (InstantiationException
// Class<?>[] providers = ollamaToolServiceAnnotation.providers(); | NoSuchMethodException
// for (Class<?> provider : providers) { | IllegalAccessException
// registerAnnotatedTools(provider.getDeclaredConstructor().newInstance()); | InvocationTargetException e) {
// } throw new OllamaBaseException(e.getMessage());
// } catch (InstantiationException }
// | NoSuchMethodException }
// | IllegalAccessException
// | InvocationTargetException e) { /**
// throw new OllamaBaseException(e.getMessage()); * Registers tools based on the annotations found on the methods of the provided object.
// } * This method scans the methods of the given object and registers tools using the {@link ToolSpec}
// } * annotation and associated {@link ToolProperty} annotations. It constructs tool specifications
// * and stores them in a tool registry.
// /** *
// * Registers tools based on the annotations found on the methods of the provided object. * @param object the object whose methods are to be inspected for annotated tools
// This * @throws RuntimeException if any reflection-based instantiation or invocation fails
// * method scans the methods of the given object and registers tools using the {@link */
// ToolSpec} public void registerAnnotatedTools(Object object) {
// * annotation and associated {@link ToolProperty} annotations. It constructs tool Class<?> objectClass = object.getClass();
// specifications Method[] methods = objectClass.getMethods();
// * and stores them in a tool registry. for (Method m : methods) {
// * ToolSpec toolSpec = m.getDeclaredAnnotation(ToolSpec.class);
// * @param object the object whose methods are to be inspected for annotated tools if (toolSpec == null) {
// * @throws RuntimeException if any reflection-based instantiation or invocation fails continue;
// */ }
// public void registerAnnotatedTools(Object object) { String operationName = !toolSpec.name().isBlank() ? toolSpec.name() : m.getName();
// Class<?> objectClass = object.getClass(); String operationDesc = !toolSpec.desc().isBlank() ? toolSpec.desc() : operationName;
// Method[] methods = objectClass.getMethods();
// for (Method m : methods) { final Map<String, Tools.Property> params = new HashMap<String, Tools.Property>() {};
// ToolSpec toolSpec = m.getDeclaredAnnotation(ToolSpec.class); LinkedHashMap<String, String> methodParams = new LinkedHashMap<>();
// if (toolSpec == null) { for (Parameter parameter : m.getParameters()) {
// continue; final ToolProperty toolPropertyAnn =
// } parameter.getDeclaredAnnotation(ToolProperty.class);
// String operationName = !toolSpec.name().isBlank() ? toolSpec.name() : m.getName(); String propType = parameter.getType().getTypeName();
// String operationDesc = !toolSpec.desc().isBlank() ? toolSpec.desc() : if (toolPropertyAnn == null) {
// operationName; methodParams.put(parameter.getName(), null);
// continue;
// final Tools.PropsBuilder propsBuilder = new Tools.PropsBuilder(); }
// LinkedHashMap<String, String> methodParams = new LinkedHashMap<>(); String propName =
// for (Parameter parameter : m.getParameters()) { !toolPropertyAnn.name().isBlank()
// final ToolProperty toolPropertyAnn = ? toolPropertyAnn.name()
// parameter.getDeclaredAnnotation(ToolProperty.class); : parameter.getName();
// String propType = parameter.getType().getTypeName(); methodParams.put(propName, propType);
// if (toolPropertyAnn == null) { params.put(
// methodParams.put(parameter.getName(), null); propName,
// continue; Tools.Property.builder()
// } .type(propType)
// String propName = .description(toolPropertyAnn.desc())
// !toolPropertyAnn.name().isBlank() .required(toolPropertyAnn.required())
// ? toolPropertyAnn.name() .build());
// : parameter.getName(); }
// methodParams.put(propName, propType); Tools.ToolSpec toolSpecification =
// propsBuilder.withProperty( Tools.ToolSpec.builder()
// propName, .name(operationName)
// Tools.PromptFuncDefinition.Property.builder() .description(operationDesc)
// .type(propType) .parameters(Tools.Parameters.of(params))
// .description(toolPropertyAnn.desc()) .build();
// .required(toolPropertyAnn.required()) ReflectionalToolFunction reflectionalToolFunction =
// .build()); new ReflectionalToolFunction(object, m, methodParams);
// } toolRegistry.addTool(
// final Map<String, Tools.PromptFuncDefinition.Property> params = Tools.Tool.builder()
// propsBuilder.build(); .toolFunction(reflectionalToolFunction)
// List<String> reqProps = .toolSpec(toolSpecification)
// params.entrySet().stream() .build());
// .filter(e -> e.getValue().isRequired()) }
// .map(Map.Entry::getKey) }
// .collect(Collectors.toList());
//
// Tools.ToolSpecification toolSpecification =
// Tools.ToolSpecification.builder()
// .functionName(operationName)
// .functionDescription(operationDesc)
// .toolPrompt(
// Tools.PromptFuncDefinition.builder()
// .type("function")
// .function(
// Tools.PromptFuncDefinition.PromptFuncSpec
// .builder()
// .name(operationName)
// .description(operationDesc)
// .parameters(
// Tools.PromptFuncDefinition
//
// .Parameters.builder()
// .type("object")
//
// .properties(params)
//
// .required(reqProps)
// .build())
// .build())
// .build())
// .build();
//
// ReflectionalToolFunction reflectionalToolFunction =
// new ReflectionalToolFunction(object, m, methodParams);
// toolSpecification.setToolFunction(reflectionalToolFunction);
// toolRegistry.addTool(toolSpecification.getFunctionName(), toolSpecification);
// }
// }
/** /**
* Adds a custom role. * Adds a custom role.
@ -1111,19 +1092,15 @@ public class OllamaAPI {
} }
/** /**
* Generates a request for the Ollama API and returns the result. This method synchronously * Generates a request for the Ollama API and returns the result.
* calls the Ollama API. If a stream handler is provided, the request will be streamed; * This method synchronously calls the Ollama API. If a stream handler is provided,
* otherwise, a regular synchronous request will be made. * the request will be streamed; otherwise, a regular synchronous request will be made.
* *
* @param ollamaRequestModel the request model containing necessary parameters for the Ollama * @param ollamaRequestModel the request model containing necessary parameters for the Ollama API request
* API request
* @param thinkingStreamHandler the stream handler for "thinking" tokens, or null if not used * @param thinkingStreamHandler the stream handler for "thinking" tokens, or null if not used
* @param responseStreamHandler the stream handler to process streaming responses, or null for * @param responseStreamHandler the stream handler to process streaming responses, or null for non-streaming requests
* non-streaming requests
* @return the result of the Ollama API request * @return the result of the Ollama API request
* @throws OllamaBaseException if the request fails due to an issue with the Ollama API * @throws OllamaBaseException if the request fails due to an issue with the Ollama API
* @throws IOException if an I/O error occurs during the request process
* @throws InterruptedException if the thread is interrupted during the request
*/ */
private OllamaResult generateSyncForOllamaRequestModel( private OllamaResult generateSyncForOllamaRequestModel(
OllamaGenerateRequest ollamaRequestModel, OllamaGenerateRequest ollamaRequestModel,
@ -1192,157 +1169,4 @@ public class OllamaAPI {
private boolean isAuthSet() { private boolean isAuthSet() {
return auth != null; return auth != null;
} }
// /**
// * Invokes a registered tool function by name and arguments.
// *
// * @param toolFunctionCallSpec the tool function call specification
// * @return the result of the tool function
// * @throws ToolInvocationException if the tool is not found or invocation fails
// */
// private Object invokeTool(ToolFunctionCallSpec toolFunctionCallSpec)
// throws ToolInvocationException {
// try {
// String methodName = toolFunctionCallSpec.getName();
// Map<String, Object> arguments = toolFunctionCallSpec.getArguments();
// ToolFunction function = toolRegistry.getToolFunction(methodName);
// LOG.debug("Invoking function {} with arguments {}", methodName, arguments);
// if (function == null) {
// throw new ToolNotFoundException(
// "No such tool: "
// + methodName
// + ". Please register the tool before invoking it.");
// }
// return function.apply(arguments);
// } catch (Exception e) {
// throw new ToolInvocationException(
// "Failed to invoke tool: " + toolFunctionCallSpec.getName(), e);
// }
// }
// /**
// * Initialize metrics collection if enabled.
// */
// private void initializeMetrics() {
// if (metricsEnabled) {
// OllamaMetricsService.initialize();
// LOG.info("Prometheus metrics collection enabled for Ollama4j client");
// }
// }
//
// /**
// * Record metrics for an API request.
// *
// * @param endpoint the API endpoint
// * @param method the HTTP method
// * @param durationSeconds the request duration
// * @param success whether the request was successful
// * @param errorType the error type if the request failed
// */
// private void recordMetrics(
// String endpoint,
// String method,
// double durationSeconds,
// boolean success,
// String errorType) {
// if (!metricsEnabled) {
// return;
// }
//
// if (success) {
// OllamaMetricsService.recordRequest(endpoint, method, durationSeconds);
// } else {
// OllamaMetricsService.recordRequestError(endpoint, method, durationSeconds,
// errorType);
// }
// }
// /**
// * Record metrics for model usage.
// *
// * @param modelName the model name
// * @param operation the operation performed
// * @param durationSeconds the operation duration
// */
// private void recordModelMetrics(String modelName, String operation, double
// durationSeconds) {
// if (!metricsEnabled) {
// return;
// }
//
// OllamaMetricsService.recordModelUsage(modelName, operation, durationSeconds);
// }
// /**
// * Record token generation metrics.
// *
// * @param modelName the model name
// * @param tokenCount the number of tokens generated
// */
// private void recordTokenMetrics(String modelName, int tokenCount) {
// if (!metricsEnabled) {
// return;
// }
//
// OllamaMetricsService.recordTokensGenerated(modelName, tokenCount);
// }
// /**
// * Execute a method with metrics collection.
// *
// * @param endpoint the API endpoint
// * @param method the HTTP method
// * @param operation the operation name for model metrics
// * @param modelName the model name (can be null)
// * @param runnable the operation to execute
// * @return the result of the operation
// * @throws Exception if the operation fails
// */
// private <T> T executeWithMetrics(
// String endpoint,
// String method,
// String operation,
// String modelName,
// MetricsOperation<T> runnable)
// throws Exception {
// long startTime = System.nanoTime();
// boolean success = false;
// String errorType = null;
//
// try {
// OllamaMetricsService.incrementActiveConnections();
// T result = runnable.execute();
// success = true;
// return result;
// } catch (OllamaBaseException e) {
// errorType = "ollama_error";
// throw e;
// } catch (IOException e) {
// errorType = "io_error";
// throw e;
// } catch (InterruptedException e) {
// errorType = "interrupted";
// throw e;
// } catch (Exception e) {
// errorType = "unknown_error";
// throw e;
// } finally {
// OllamaMetricsService.decrementActiveConnections();
// double durationSeconds = (System.nanoTime() - startTime) / 1_000_000_000.0;
//
// recordMetrics(endpoint, method, durationSeconds, success, errorType);
//
// if (modelName != null) {
// recordModelMetrics(modelName, operation, durationSeconds);
// }
// }
// }
// /**
// * Functional interface for operations that need metrics collection.
// */
// @FunctionalInterface
// private interface MetricsOperation<T> {
// T execute() throws Exception;
// }
} }

View File

@ -25,6 +25,7 @@ import lombok.*;
* href="https://github.com/ollama/ollama/blob/main/docs/api.md#generate-a-chat-completion">Generate * href="https://github.com/ollama/ollama/blob/main/docs/api.md#generate-a-chat-completion">Generate
* chat completion</a> * chat completion</a>
*/ */
@SuppressWarnings("NullableProblems")
@Data @Data
@AllArgsConstructor @AllArgsConstructor
@RequiredArgsConstructor @RequiredArgsConstructor

View File

@ -34,8 +34,6 @@ public class OllamaChatMessageRole {
} }
public static OllamaChatMessageRole newCustomRole(String roleName) { public static OllamaChatMessageRole newCustomRole(String roleName) {
// OllamaChatMessageRole customRole = new OllamaChatMessageRole(roleName);
// roles.add(customRole);
return new OllamaChatMessageRole(roleName); return new OllamaChatMessageRole(roleName);
} }

View File

@ -36,13 +36,6 @@ public class OllamaChatRequestBuilder {
request.setMessages(new ArrayList<>()); request.setMessages(new ArrayList<>());
} }
// private OllamaChatRequestBuilder(String model, List<OllamaChatMessage> messages) {
// request = new OllamaChatRequest(model, false, messages);
// }
// public static OllamaChatRequestBuilder builder(String model) {
// return new OllamaChatRequestBuilder(model, new ArrayList<>());
// }
public static OllamaChatRequestBuilder builder() { public static OllamaChatRequestBuilder builder() {
return new OllamaChatRequestBuilder(); return new OllamaChatRequestBuilder();
} }

View File

@ -44,19 +44,4 @@ public class OllamaChatResult {
throw new RuntimeException(e); throw new RuntimeException(e);
} }
} }
@Deprecated
public String getResponse() {
return responseModel != null ? responseModel.getMessage().getResponse() : "";
}
@Deprecated
public int getHttpStatusCode() {
return 200;
}
@Deprecated
public long getResponseTime() {
return responseModel != null ? responseModel.getTotalDuration() : 0L;
}
} }

View File

@ -33,12 +33,8 @@ public class OllamaChatStreamObserver implements OllamaChatTokenHandler {
boolean hasResponse = response != null && !response.isEmpty(); boolean hasResponse = response != null && !response.isEmpty();
if (!hasResponse && hasThinking && thinkingStreamHandler != null) { if (!hasResponse && hasThinking && thinkingStreamHandler != null) {
// use only new tokens received, instead of appending the tokens to the previous
// ones and sending the full string again
thinkingStreamHandler.accept(thinking); thinkingStreamHandler.accept(thinking);
} else if (hasResponse) { } else if (hasResponse) {
// use only new tokens received, instead of appending the tokens to the previous
// ones and sending the full string again
responseStreamHandler.accept(response); responseStreamHandler.accept(response);
} }
} }

View File

@ -6,7 +6,7 @@
* you may not use this file except in compliance with the License. * you may not use this file except in compliance with the License.
* *
*/ */
package io.github.ollama4j.models.embeddings; package io.github.ollama4j.models.embed;
import io.github.ollama4j.utils.Options; import io.github.ollama4j.utils.Options;
import java.util.List; import java.util.List;

View File

@ -6,7 +6,7 @@
* you may not use this file except in compliance with the License. * you may not use this file except in compliance with the License.
* *
*/ */
package io.github.ollama4j.models.embeddings; package io.github.ollama4j.models.embed;
import static io.github.ollama4j.utils.Utils.getObjectMapper; import static io.github.ollama4j.utils.Utils.getObjectMapper;
@ -16,6 +16,7 @@ import java.util.List;
import java.util.Map; import java.util.Map;
import lombok.*; import lombok.*;
@SuppressWarnings("NullableProblems")
@Data @Data
@RequiredArgsConstructor @RequiredArgsConstructor
@NoArgsConstructor @NoArgsConstructor

View File

@ -6,7 +6,7 @@
* you may not use this file except in compliance with the License. * you may not use this file except in compliance with the License.
* *
*/ */
package io.github.ollama4j.models.embeddings; package io.github.ollama4j.models.embed;
import com.fasterxml.jackson.annotation.JsonProperty; import com.fasterxml.jackson.annotation.JsonProperty;
import java.util.List; import java.util.List;

View File

@ -39,12 +39,8 @@ public class OllamaGenerateStreamObserver {
boolean hasThinking = thinking != null && !thinking.isEmpty(); boolean hasThinking = thinking != null && !thinking.isEmpty();
if (!hasResponse && hasThinking && thinkingStreamHandler != null) { if (!hasResponse && hasThinking && thinkingStreamHandler != null) {
// use only new tokens received, instead of appending the tokens to the previous
// ones and sending the full string again
thinkingStreamHandler.accept(thinking); thinkingStreamHandler.accept(thinking);
} else if (hasResponse && responseStreamHandler != null) { } else if (hasResponse && responseStreamHandler != null) {
// use only new tokens received, instead of appending the tokens to the previous
// ones and sending the full string again
responseStreamHandler.accept(response); responseStreamHandler.accept(response);
} }
} }

View File

@ -41,7 +41,7 @@ public class ModelsProcessResponse {
private ModelDetails details; private ModelDetails details;
@JsonProperty("expires_at") @JsonProperty("expires_at")
private String expiresAt; // Consider using LocalDateTime if you need to process date/time private String expiresAt;
@JsonProperty("size_vram") @JsonProperty("size_vram")
private long sizeVram; private long sizeVram;

View File

@ -26,7 +26,7 @@ public class CustomModelRequest {
private Map<String, String> files; private Map<String, String> files;
private Map<String, String> adapters; private Map<String, String> adapters;
private String template; private String template;
private Object license; // Using Object to handle both String and List<String> private Object license;
private String system; private String system;
private Map<String, Object> parameters; private Map<String, Object> parameters;
private List<Object> messages; private List<Object> messages;

View File

@ -59,10 +59,10 @@ public class OllamaChatEndpointCaller extends OllamaEndpointCaller {
try { try {
OllamaChatResponseModel ollamaResponseModel = OllamaChatResponseModel ollamaResponseModel =
Utils.getObjectMapper().readValue(line, OllamaChatResponseModel.class); Utils.getObjectMapper().readValue(line, OllamaChatResponseModel.class);
// it seems that under heavy load ollama responds with an empty chat message part in the // It seems that under heavy load Ollama responds with an empty chat message part in the
// streamed response // streamed response.
// thus, we null check the message and hope that the next streamed response has some // Thus, we null check the message and hope that the next streamed response has some
// message content again // message content again.
OllamaChatMessage message = ollamaResponseModel.getMessage(); OllamaChatMessage message = ollamaResponseModel.getMessage();
if (message != null) { if (message != null) {
if (message.getThinking() != null) { if (message.getThinking() != null) {

View File

@ -24,7 +24,7 @@ public abstract class OllamaEndpointCaller {
private final Auth auth; private final Auth auth;
private final long requestTimeoutSeconds; private final long requestTimeoutSeconds;
public OllamaEndpointCaller(String host, Auth auth, long requestTimeoutSeconds) { protected OllamaEndpointCaller(String host, Auth auth, long requestTimeoutSeconds) {
this.host = host; this.host = host;
this.auth = auth; this.auth = auth;
this.requestTimeoutSeconds = requestTimeoutSeconds; this.requestTimeoutSeconds = requestTimeoutSeconds;

View File

@ -86,7 +86,6 @@ public class OllamaGenerateEndpointCaller extends OllamaEndpointCaller {
@SuppressWarnings("DuplicatedCode") @SuppressWarnings("DuplicatedCode")
public OllamaResult callSync(OllamaRequestBody body) public OllamaResult callSync(OllamaRequestBody body)
throws OllamaBaseException, IOException, InterruptedException { throws OllamaBaseException, IOException, InterruptedException {
// Create Request
long startTime = System.currentTimeMillis(); long startTime = System.currentTimeMillis();
HttpClient httpClient = HttpClient.newHttpClient(); HttpClient httpClient = HttpClient.newHttpClient();
URI uri = URI.create(getHost() + endpoint); URI uri = URI.create(getHost() + endpoint);

View File

@ -136,14 +136,12 @@ public class OllamaAsyncResultStreamer extends Thread {
try { try {
reader.close(); reader.close();
} catch (IOException e) { } catch (IOException e) {
// Optionally log or handle
} }
} }
if (responseBodyStream != null) { if (responseBodyStream != null) {
try { try {
responseBodyStream.close(); responseBodyStream.close();
} catch (IOException e) { } catch (IOException e) {
// Optionally log or handle
} }
} }
} }

View File

@ -21,6 +21,8 @@ import lombok.Data;
import lombok.NoArgsConstructor; import lombok.NoArgsConstructor;
public class Tools { public class Tools {
private Tools() {}
@Data @Data
@Builder @Builder
@NoArgsConstructor @NoArgsConstructor

View File

@ -15,16 +15,23 @@ import java.lang.annotation.RetentionPolicy;
import java.lang.annotation.Target; import java.lang.annotation.Target;
/** /**
* Annotates a class that calls {@link io.github.ollama4j.OllamaAPI} such that the Method * Annotation to mark a class as an Ollama tool service.
* {@link OllamaAPI#registerAnnotatedTools()} can be used to auto-register all provided classes (resp. all * <p>
* contained Methods of the provider classes annotated with {@link ToolSpec}). * When a class is annotated with {@code @OllamaToolService}, the method
* {@link OllamaAPI#registerAnnotatedTools()} can be used to automatically register all tool provider
* classes specified in the {@link #providers()} array. All methods in those provider classes that are
* annotated with {@link ToolSpec} will be registered as tools.
* </p>
*/ */
@Target(ElementType.TYPE) @Target(ElementType.TYPE)
@Retention(RetentionPolicy.RUNTIME) @Retention(RetentionPolicy.RUNTIME)
public @interface OllamaToolService { public @interface OllamaToolService {
/** /**
* @return Classes with no-arg constructor that will be used for tool-registration. * Specifies the provider classes whose methods annotated with {@link ToolSpec} should be registered as tools.
* Each provider class must have a public no-argument constructor.
*
* @return an array of provider classes to be used for tool registration
*/ */
Class<?>[] providers(); Class<?>[] providers();
} }

View File

@ -15,21 +15,30 @@ import java.lang.annotation.RetentionPolicy;
import java.lang.annotation.Target; import java.lang.annotation.Target;
/** /**
* Annotates Methods of classes that should be registered as tools by {@link OllamaAPI#registerAnnotatedTools()} * Annotation to mark a method as a tool that can be registered automatically by
* automatically. * {@link OllamaAPI#registerAnnotatedTools()}.
* <p>
* Methods annotated with {@code @ToolSpec} will be discovered and registered as tools
* when the containing class is specified as a provider in {@link OllamaToolService}.
* </p>
*/ */
@Target(ElementType.METHOD) @Target(ElementType.METHOD)
@Retention(RetentionPolicy.RUNTIME) @Retention(RetentionPolicy.RUNTIME)
public @interface ToolSpec { public @interface ToolSpec {
/** /**
* @return tool-name that the method should be used as. Defaults to the methods name. * Specifies the name of the tool as exposed to the LLM.
* If left empty, the method's name will be used as the tool name.
*
* @return the tool name
*/ */
String name() default ""; String name() default "";
/** /**
* @return a detailed description of the method that can be interpreted by the llm, whether it should call the tool * Provides a detailed description of the tool's functionality.
* or not. * This description is used by the LLM to determine when to call the tool.
*
* @return the tool description
*/ */
String desc(); String desc();
} }

View File

@ -9,12 +9,14 @@
package io.github.ollama4j.utils; package io.github.ollama4j.utils;
import java.util.Map; import java.util.Map;
import lombok.Builder;
import lombok.Data; import lombok.Data;
/** /**
* Class for options for Ollama model. * Class for options for Ollama model.
*/ */
@Data @Data
@Builder
public class Options { public class Options {
private final Map<String, Object> optionsMap; private final Map<String, Object> optionsMap;

View File

@ -16,8 +16,8 @@ import io.github.ollama4j.OllamaAPI;
import io.github.ollama4j.exceptions.OllamaBaseException; import io.github.ollama4j.exceptions.OllamaBaseException;
import io.github.ollama4j.exceptions.RoleNotFoundException; import io.github.ollama4j.exceptions.RoleNotFoundException;
import io.github.ollama4j.models.chat.OllamaChatMessageRole; import io.github.ollama4j.models.chat.OllamaChatMessageRole;
import io.github.ollama4j.models.embeddings.OllamaEmbedRequestModel; import io.github.ollama4j.models.embed.OllamaEmbedRequestModel;
import io.github.ollama4j.models.embeddings.OllamaEmbedResponseModel; import io.github.ollama4j.models.embed.OllamaEmbedResponseModel;
import io.github.ollama4j.models.generate.OllamaGenerateRequest; import io.github.ollama4j.models.generate.OllamaGenerateRequest;
import io.github.ollama4j.models.generate.OllamaGenerateRequestBuilder; import io.github.ollama4j.models.generate.OllamaGenerateRequestBuilder;
import io.github.ollama4j.models.generate.OllamaGenerateStreamObserver; import io.github.ollama4j.models.generate.OllamaGenerateStreamObserver;

View File

@ -10,8 +10,8 @@ package io.github.ollama4j.unittests.jackson;
import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertEquals;
import io.github.ollama4j.models.embeddings.OllamaEmbedRequestBuilder; import io.github.ollama4j.models.embed.OllamaEmbedRequestBuilder;
import io.github.ollama4j.models.embeddings.OllamaEmbedRequestModel; import io.github.ollama4j.models.embed.OllamaEmbedRequestModel;
import io.github.ollama4j.utils.OptionsBuilder; import io.github.ollama4j.utils.OptionsBuilder;
import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test; import org.junit.jupiter.api.Test;