mirror of
https://github.com/amithkoujalgi/ollama4j.git
synced 2025-10-14 01:18:58 +02:00
Refactor tool handling and add model unload support
Replaces the 'clientHandlesTools' flag with 'useTools' for tool execution control, defaulting to true. Adds support for model unloading via keep_alive parameter. Updates chat request and builder to use the new flag. Improves integration tests and documentation to reflect these changes. Fixes constructor order in OllamaGenerateStreamObserver and adds ignoreUnknown to ModelsProcessResponse.
This commit is contained in:
parent
6147c7d697
commit
cac94e0faf
@ -29,17 +29,17 @@ session. The tool invocation and response handling are all managed internally by
|
||||
|
||||
This tool calling can also be done using the streaming API.
|
||||
|
||||
### Client-managed tool calls (clientHandlesTools)
|
||||
### Client-managed tool calls (useTools)
|
||||
|
||||
By default, ollama4j automatically executes tool calls returned by the model during chat, runs the corresponding registered Java methods, and appends the tool results back into the conversation. For some applications, you may want to intercept tool calls and decide yourself when and how to execute them (for example, to queue them, to show a confirmation UI to the user, to run them in a sandbox, or to perform multi‑step orchestration).
|
||||
|
||||
To enable this behavior, set the clientHandlesTools flag to true on your OllamaAPI instance. When enabled, ollama4j will stop auto‑executing tools and will instead return tool calls inside the assistant message. You can then inspect the tool calls and execute them manually.
|
||||
To enable this behavior, set the useTools flag to true on your OllamaAPI instance. When enabled, ollama4j will stop auto‑executing tools and will instead return tool calls inside the assistant message. You can then inspect the tool calls and execute them manually.
|
||||
|
||||
|
||||
Notes:
|
||||
- Default value: clientHandlesTools is false for backward compatibility.
|
||||
- When clientHandlesTools is false, ollama4j auto‑executes tools and loops internally until tools are resolved or max retries is reached.
|
||||
- When clientHandlesTools is true, ollama4j will not execute tools; you are responsible for invoking tools and passing results back as TOOL messages, then re‑calling chat() to continue.
|
||||
- Default value: useTools is true.
|
||||
- When useTools is false, ollama4j auto‑executes tools and loops internally until tools are resolved or max retries is reached.
|
||||
- When useTools is true, ollama4j will not execute tools; you are responsible for invoking tools and passing results back as TOOL messages, then re‑calling chat() to continue.
|
||||
|
||||
### Annotation-Based Tool Registration
|
||||
|
||||
|
@ -5,7 +5,7 @@
|
||||
* Licensed under the MIT License (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
*
|
||||
*/
|
||||
*/
|
||||
package io.github.ollama4j;
|
||||
|
||||
import com.fasterxml.jackson.core.JsonParseException;
|
||||
@ -92,13 +92,9 @@ public class OllamaAPI {
|
||||
@SuppressWarnings({"FieldMayBeFinal", "FieldCanBeLocal"})
|
||||
private int numberOfRetriesForModelPull = 0;
|
||||
|
||||
/**
|
||||
* When set to true, tools will not be automatically executed by the library. Instead, tool
|
||||
* calls will be returned to the client for manual handling.
|
||||
*
|
||||
* <p>Default is false for backward compatibility.
|
||||
*/
|
||||
@Setter private boolean clientHandlesTools = false;
|
||||
@Setter
|
||||
@SuppressWarnings({"FieldMayBeFinal", "FieldCanBeLocal"})
|
||||
private int modelKeepAliveTime = 0;
|
||||
|
||||
/**
|
||||
* Instantiates the Ollama API with default Ollama host: <a
|
||||
@ -535,6 +531,44 @@ public class OllamaAPI {
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
If an empty prompt is provided and the keep_alive parameter is set to 0, a model will be unloaded from memory.
|
||||
*/
|
||||
public void unloadModel(String modelName)
|
||||
throws URISyntaxException, IOException, InterruptedException, OllamaBaseException {
|
||||
String url = this.host + "/api/generate";
|
||||
ObjectMapper objectMapper = new ObjectMapper();
|
||||
Map<String, Object> jsonMap = new java.util.HashMap<>();
|
||||
jsonMap.put("model", modelName);
|
||||
jsonMap.put("keep_alive", 0);
|
||||
String jsonData = objectMapper.writeValueAsString(jsonMap);
|
||||
HttpRequest request =
|
||||
getRequestBuilderDefault(new URI(url))
|
||||
.method(
|
||||
"POST",
|
||||
HttpRequest.BodyPublishers.ofString(
|
||||
jsonData, StandardCharsets.UTF_8))
|
||||
.header(
|
||||
Constants.HttpConstants.HEADER_KEY_ACCEPT,
|
||||
Constants.HttpConstants.APPLICATION_JSON)
|
||||
.header(
|
||||
Constants.HttpConstants.HEADER_KEY_CONTENT_TYPE,
|
||||
Constants.HttpConstants.APPLICATION_JSON)
|
||||
.build();
|
||||
HttpClient client = HttpClient.newHttpClient();
|
||||
HttpResponse<String> response = client.send(request, HttpResponse.BodyHandlers.ofString());
|
||||
int statusCode = response.statusCode();
|
||||
String responseBody = response.body();
|
||||
if (statusCode == 404
|
||||
&& responseBody.contains("model")
|
||||
&& responseBody.contains("not found")) {
|
||||
return;
|
||||
}
|
||||
if (statusCode != 200) {
|
||||
throw new OllamaBaseException(statusCode + " - " + responseBody);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate embeddings using a {@link OllamaEmbedRequestModel}.
|
||||
*
|
||||
@ -905,11 +939,14 @@ public class OllamaAPI {
|
||||
new OllamaChatEndpointCaller(host, auth, requestTimeoutSeconds);
|
||||
OllamaChatResult result;
|
||||
|
||||
// add all registered tools to Request
|
||||
// only add tools if tools flag is set
|
||||
if (request.isUseTools()) {
|
||||
// add all registered tools to request
|
||||
request.setTools(
|
||||
toolRegistry.getRegisteredSpecs().stream()
|
||||
.map(Tools.ToolSpecification::getToolPrompt)
|
||||
.collect(Collectors.toList()));
|
||||
}
|
||||
|
||||
if (tokenHandler != null) {
|
||||
request.setStream(true);
|
||||
@ -918,10 +955,6 @@ public class OllamaAPI {
|
||||
result = requestCaller.callSync(request);
|
||||
}
|
||||
|
||||
if (clientHandlesTools) {
|
||||
return result;
|
||||
}
|
||||
|
||||
// check if toolCallIsWanted
|
||||
List<OllamaChatToolCalls> toolCalls = result.getResponseModel().getMessage().getToolCalls();
|
||||
int toolCallTries = 0;
|
||||
|
@ -5,9 +5,10 @@
|
||||
* Licensed under the MIT License (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
*
|
||||
*/
|
||||
*/
|
||||
package io.github.ollama4j.models.chat;
|
||||
|
||||
import com.fasterxml.jackson.annotation.JsonProperty;
|
||||
import io.github.ollama4j.models.request.OllamaCommonRequest;
|
||||
import io.github.ollama4j.tools.Tools;
|
||||
import io.github.ollama4j.utils.OllamaRequestBody;
|
||||
@ -32,6 +33,19 @@ public class OllamaChatRequest extends OllamaCommonRequest implements OllamaRequ
|
||||
|
||||
private boolean think;
|
||||
|
||||
@JsonProperty("keep_alive")
|
||||
private int modelKeepAliveTime;
|
||||
|
||||
/**
|
||||
* Controls whether tools are automatically executed.
|
||||
* <p>
|
||||
* If set to {@code true} (the default), tools will be automatically used/applied by the library.
|
||||
* If set to {@code false}, tool calls will be returned to the client for manual handling.
|
||||
* <p>
|
||||
* Disabling this should be an explicit operation.
|
||||
*/
|
||||
private boolean useTools = true;
|
||||
|
||||
public OllamaChatRequest() {}
|
||||
|
||||
public OllamaChatRequest(String model, boolean think, List<OllamaChatMessage> messages) {
|
||||
|
@ -5,7 +5,7 @@
|
||||
* Licensed under the MIT License (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
*
|
||||
*/
|
||||
*/
|
||||
package io.github.ollama4j.models.chat;
|
||||
|
||||
import io.github.ollama4j.utils.Options;
|
||||
@ -17,6 +17,8 @@ import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import lombok.Setter;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
@ -28,6 +30,9 @@ public class OllamaChatRequestBuilder {
|
||||
private int imageURLConnectTimeoutSeconds = 10;
|
||||
private int imageURLReadTimeoutSeconds = 10;
|
||||
|
||||
@Setter
|
||||
private boolean useTools = true;
|
||||
|
||||
public OllamaChatRequestBuilder withImageURLConnectTimeoutSeconds(
|
||||
int imageURLConnectTimeoutSeconds) {
|
||||
this.imageURLConnectTimeoutSeconds = imageURLConnectTimeoutSeconds;
|
||||
@ -50,6 +55,7 @@ public class OllamaChatRequestBuilder {
|
||||
}
|
||||
|
||||
public OllamaChatRequest build() {
|
||||
request.setUseTools(useTools);
|
||||
return request;
|
||||
}
|
||||
|
||||
|
@ -22,8 +22,8 @@ public class OllamaGenerateStreamObserver {
|
||||
public OllamaGenerateStreamObserver(
|
||||
OllamaGenerateTokenHandler thinkingStreamHandler,
|
||||
OllamaGenerateTokenHandler responseStreamHandler) {
|
||||
this.responseStreamHandler = responseStreamHandler;
|
||||
this.thinkingStreamHandler = thinkingStreamHandler;
|
||||
this.responseStreamHandler = responseStreamHandler;
|
||||
}
|
||||
|
||||
public void notify(OllamaGenerateResponseModel currentResponsePart) {
|
||||
|
@ -23,6 +23,7 @@ public class ModelsProcessResponse {
|
||||
|
||||
@Data
|
||||
@NoArgsConstructor
|
||||
@JsonIgnoreProperties(ignoreUnknown = true)
|
||||
public static class ModelProcess {
|
||||
@JsonProperty("name")
|
||||
private String name;
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -2,3 +2,4 @@ USE_EXTERNAL_OLLAMA_HOST=true
|
||||
OLLAMA_HOST=http://192.168.29.229:11434/
|
||||
REQUEST_TIMEOUT_SECONDS=120
|
||||
NUMBER_RETRIES_FOR_MODEL_PULL=3
|
||||
MODEL_KEEP_ALIVE_TIME=0
|
Loading…
x
Reference in New Issue
Block a user