Refactor tool handling and add model unload support

Replaces the 'clientHandlesTools' flag with 'useTools' for tool execution control, defaulting to true. Adds support for model unloading via keep_alive parameter. Updates chat request and builder to use the new flag. Improves integration tests and documentation to reflect these changes. Fixes constructor order in OllamaGenerateStreamObserver and adds ignoreUnknown to ModelsProcessResponse.
This commit is contained in:
amithkoujalgi 2025-09-20 14:46:46 +05:30
parent 6147c7d697
commit cac94e0faf
No known key found for this signature in database
GPG Key ID: E29A37746AF94B70
8 changed files with 849 additions and 198 deletions

View File

@ -29,17 +29,17 @@ session. The tool invocation and response handling are all managed internally by
This tool calling can also be done using the streaming API.
### Client-managed tool calls (clientHandlesTools)
### Client-managed tool calls (useTools)
By default, ollama4j automatically executes tool calls returned by the model during chat, runs the corresponding registered Java methods, and appends the tool results back into the conversation. For some applications, you may want to intercept tool calls and decide yourself when and how to execute them (for example, to queue them, to show a confirmation UI to the user, to run them in a sandbox, or to perform multistep orchestration).
To enable this behavior, set the clientHandlesTools flag to true on your OllamaAPI instance. When enabled, ollama4j will stop autoexecuting tools and will instead return tool calls inside the assistant message. You can then inspect the tool calls and execute them manually.
To enable this behavior, set the useTools flag to true on your OllamaAPI instance. When enabled, ollama4j will stop autoexecuting tools and will instead return tool calls inside the assistant message. You can then inspect the tool calls and execute them manually.
Notes:
- Default value: clientHandlesTools is false for backward compatibility.
- When clientHandlesTools is false, ollama4j autoexecutes tools and loops internally until tools are resolved or max retries is reached.
- When clientHandlesTools is true, ollama4j will not execute tools; you are responsible for invoking tools and passing results back as TOOL messages, then recalling chat() to continue.
- Default value: useTools is true.
- When useTools is false, ollama4j autoexecutes tools and loops internally until tools are resolved or max retries is reached.
- When useTools is true, ollama4j will not execute tools; you are responsible for invoking tools and passing results back as TOOL messages, then recalling chat() to continue.
### Annotation-Based Tool Registration

View File

@ -5,7 +5,7 @@
* Licensed under the MIT License (the "License");
* you may not use this file except in compliance with the License.
*
*/
*/
package io.github.ollama4j;
import com.fasterxml.jackson.core.JsonParseException;
@ -92,13 +92,9 @@ public class OllamaAPI {
@SuppressWarnings({"FieldMayBeFinal", "FieldCanBeLocal"})
private int numberOfRetriesForModelPull = 0;
/**
* When set to true, tools will not be automatically executed by the library. Instead, tool
* calls will be returned to the client for manual handling.
*
* <p>Default is false for backward compatibility.
*/
@Setter private boolean clientHandlesTools = false;
@Setter
@SuppressWarnings({"FieldMayBeFinal", "FieldCanBeLocal"})
private int modelKeepAliveTime = 0;
/**
* Instantiates the Ollama API with default Ollama host: <a
@ -535,6 +531,44 @@ public class OllamaAPI {
}
}
/*
If an empty prompt is provided and the keep_alive parameter is set to 0, a model will be unloaded from memory.
*/
public void unloadModel(String modelName)
throws URISyntaxException, IOException, InterruptedException, OllamaBaseException {
String url = this.host + "/api/generate";
ObjectMapper objectMapper = new ObjectMapper();
Map<String, Object> jsonMap = new java.util.HashMap<>();
jsonMap.put("model", modelName);
jsonMap.put("keep_alive", 0);
String jsonData = objectMapper.writeValueAsString(jsonMap);
HttpRequest request =
getRequestBuilderDefault(new URI(url))
.method(
"POST",
HttpRequest.BodyPublishers.ofString(
jsonData, StandardCharsets.UTF_8))
.header(
Constants.HttpConstants.HEADER_KEY_ACCEPT,
Constants.HttpConstants.APPLICATION_JSON)
.header(
Constants.HttpConstants.HEADER_KEY_CONTENT_TYPE,
Constants.HttpConstants.APPLICATION_JSON)
.build();
HttpClient client = HttpClient.newHttpClient();
HttpResponse<String> response = client.send(request, HttpResponse.BodyHandlers.ofString());
int statusCode = response.statusCode();
String responseBody = response.body();
if (statusCode == 404
&& responseBody.contains("model")
&& responseBody.contains("not found")) {
return;
}
if (statusCode != 200) {
throw new OllamaBaseException(statusCode + " - " + responseBody);
}
}
/**
* Generate embeddings using a {@link OllamaEmbedRequestModel}.
*
@ -905,11 +939,14 @@ public class OllamaAPI {
new OllamaChatEndpointCaller(host, auth, requestTimeoutSeconds);
OllamaChatResult result;
// add all registered tools to Request
request.setTools(
toolRegistry.getRegisteredSpecs().stream()
.map(Tools.ToolSpecification::getToolPrompt)
.collect(Collectors.toList()));
// only add tools if tools flag is set
if (request.isUseTools()) {
// add all registered tools to request
request.setTools(
toolRegistry.getRegisteredSpecs().stream()
.map(Tools.ToolSpecification::getToolPrompt)
.collect(Collectors.toList()));
}
if (tokenHandler != null) {
request.setStream(true);
@ -918,10 +955,6 @@ public class OllamaAPI {
result = requestCaller.callSync(request);
}
if (clientHandlesTools) {
return result;
}
// check if toolCallIsWanted
List<OllamaChatToolCalls> toolCalls = result.getResponseModel().getMessage().getToolCalls();
int toolCallTries = 0;

View File

@ -5,9 +5,10 @@
* Licensed under the MIT License (the "License");
* you may not use this file except in compliance with the License.
*
*/
*/
package io.github.ollama4j.models.chat;
import com.fasterxml.jackson.annotation.JsonProperty;
import io.github.ollama4j.models.request.OllamaCommonRequest;
import io.github.ollama4j.tools.Tools;
import io.github.ollama4j.utils.OllamaRequestBody;
@ -19,8 +20,8 @@ import lombok.Setter;
* Defines a Request to use against the ollama /api/chat endpoint.
*
* @see <a href=
* "https://github.com/ollama/ollama/blob/main/docs/api.md#generate-a-chat-completion">Generate
* Chat Completion</a>
* "https://github.com/ollama/ollama/blob/main/docs/api.md#generate-a-chat-completion">Generate
* Chat Completion</a>
*/
@Getter
@Setter
@ -32,6 +33,19 @@ public class OllamaChatRequest extends OllamaCommonRequest implements OllamaRequ
private boolean think;
@JsonProperty("keep_alive")
private int modelKeepAliveTime;
/**
* Controls whether tools are automatically executed.
* <p>
* If set to {@code true} (the default), tools will be automatically used/applied by the library.
* If set to {@code false}, tool calls will be returned to the client for manual handling.
* <p>
* Disabling this should be an explicit operation.
*/
private boolean useTools = true;
public OllamaChatRequest() {}
public OllamaChatRequest(String model, boolean think, List<OllamaChatMessage> messages) {

View File

@ -5,7 +5,7 @@
* Licensed under the MIT License (the "License");
* you may not use this file except in compliance with the License.
*
*/
*/
package io.github.ollama4j.models.chat;
import io.github.ollama4j.utils.Options;
@ -17,6 +17,8 @@ import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.stream.Collectors;
import lombok.Setter;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@ -28,6 +30,9 @@ public class OllamaChatRequestBuilder {
private int imageURLConnectTimeoutSeconds = 10;
private int imageURLReadTimeoutSeconds = 10;
@Setter
private boolean useTools = true;
public OllamaChatRequestBuilder withImageURLConnectTimeoutSeconds(
int imageURLConnectTimeoutSeconds) {
this.imageURLConnectTimeoutSeconds = imageURLConnectTimeoutSeconds;
@ -50,6 +55,7 @@ public class OllamaChatRequestBuilder {
}
public OllamaChatRequest build() {
request.setUseTools(useTools);
return request;
}

View File

@ -22,8 +22,8 @@ public class OllamaGenerateStreamObserver {
public OllamaGenerateStreamObserver(
OllamaGenerateTokenHandler thinkingStreamHandler,
OllamaGenerateTokenHandler responseStreamHandler) {
this.responseStreamHandler = responseStreamHandler;
this.thinkingStreamHandler = thinkingStreamHandler;
this.responseStreamHandler = responseStreamHandler;
}
public void notify(OllamaGenerateResponseModel currentResponsePart) {

View File

@ -23,6 +23,7 @@ public class ModelsProcessResponse {
@Data
@NoArgsConstructor
@JsonIgnoreProperties(ignoreUnknown = true)
public static class ModelProcess {
@JsonProperty("name")
private String name;

View File

@ -1,4 +1,5 @@
USE_EXTERNAL_OLLAMA_HOST=true
OLLAMA_HOST=http://192.168.29.229:11434/
REQUEST_TIMEOUT_SECONDS=120
NUMBER_RETRIES_FOR_MODEL_PULL=3
NUMBER_RETRIES_FOR_MODEL_PULL=3
MODEL_KEEP_ALIVE_TIME=0