forked from Mirror/ollama4j
Refactor tool handling and add model unload support
Replaces the 'clientHandlesTools' flag with 'useTools' for tool execution control, defaulting to true. Adds support for model unloading via keep_alive parameter. Updates chat request and builder to use the new flag. Improves integration tests and documentation to reflect these changes. Fixes constructor order in OllamaGenerateStreamObserver and adds ignoreUnknown to ModelsProcessResponse.
This commit is contained in:
@@ -5,7 +5,7 @@
|
||||
* Licensed under the MIT License (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
*
|
||||
*/
|
||||
*/
|
||||
package io.github.ollama4j;
|
||||
|
||||
import com.fasterxml.jackson.core.JsonParseException;
|
||||
@@ -92,13 +92,9 @@ public class OllamaAPI {
|
||||
@SuppressWarnings({"FieldMayBeFinal", "FieldCanBeLocal"})
|
||||
private int numberOfRetriesForModelPull = 0;
|
||||
|
||||
/**
|
||||
* When set to true, tools will not be automatically executed by the library. Instead, tool
|
||||
* calls will be returned to the client for manual handling.
|
||||
*
|
||||
* <p>Default is false for backward compatibility.
|
||||
*/
|
||||
@Setter private boolean clientHandlesTools = false;
|
||||
@Setter
|
||||
@SuppressWarnings({"FieldMayBeFinal", "FieldCanBeLocal"})
|
||||
private int modelKeepAliveTime = 0;
|
||||
|
||||
/**
|
||||
* Instantiates the Ollama API with default Ollama host: <a
|
||||
@@ -535,6 +531,44 @@ public class OllamaAPI {
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
If an empty prompt is provided and the keep_alive parameter is set to 0, a model will be unloaded from memory.
|
||||
*/
|
||||
public void unloadModel(String modelName)
|
||||
throws URISyntaxException, IOException, InterruptedException, OllamaBaseException {
|
||||
String url = this.host + "/api/generate";
|
||||
ObjectMapper objectMapper = new ObjectMapper();
|
||||
Map<String, Object> jsonMap = new java.util.HashMap<>();
|
||||
jsonMap.put("model", modelName);
|
||||
jsonMap.put("keep_alive", 0);
|
||||
String jsonData = objectMapper.writeValueAsString(jsonMap);
|
||||
HttpRequest request =
|
||||
getRequestBuilderDefault(new URI(url))
|
||||
.method(
|
||||
"POST",
|
||||
HttpRequest.BodyPublishers.ofString(
|
||||
jsonData, StandardCharsets.UTF_8))
|
||||
.header(
|
||||
Constants.HttpConstants.HEADER_KEY_ACCEPT,
|
||||
Constants.HttpConstants.APPLICATION_JSON)
|
||||
.header(
|
||||
Constants.HttpConstants.HEADER_KEY_CONTENT_TYPE,
|
||||
Constants.HttpConstants.APPLICATION_JSON)
|
||||
.build();
|
||||
HttpClient client = HttpClient.newHttpClient();
|
||||
HttpResponse<String> response = client.send(request, HttpResponse.BodyHandlers.ofString());
|
||||
int statusCode = response.statusCode();
|
||||
String responseBody = response.body();
|
||||
if (statusCode == 404
|
||||
&& responseBody.contains("model")
|
||||
&& responseBody.contains("not found")) {
|
||||
return;
|
||||
}
|
||||
if (statusCode != 200) {
|
||||
throw new OllamaBaseException(statusCode + " - " + responseBody);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate embeddings using a {@link OllamaEmbedRequestModel}.
|
||||
*
|
||||
@@ -905,11 +939,14 @@ public class OllamaAPI {
|
||||
new OllamaChatEndpointCaller(host, auth, requestTimeoutSeconds);
|
||||
OllamaChatResult result;
|
||||
|
||||
// add all registered tools to Request
|
||||
request.setTools(
|
||||
toolRegistry.getRegisteredSpecs().stream()
|
||||
.map(Tools.ToolSpecification::getToolPrompt)
|
||||
.collect(Collectors.toList()));
|
||||
// only add tools if tools flag is set
|
||||
if (request.isUseTools()) {
|
||||
// add all registered tools to request
|
||||
request.setTools(
|
||||
toolRegistry.getRegisteredSpecs().stream()
|
||||
.map(Tools.ToolSpecification::getToolPrompt)
|
||||
.collect(Collectors.toList()));
|
||||
}
|
||||
|
||||
if (tokenHandler != null) {
|
||||
request.setStream(true);
|
||||
@@ -918,10 +955,6 @@ public class OllamaAPI {
|
||||
result = requestCaller.callSync(request);
|
||||
}
|
||||
|
||||
if (clientHandlesTools) {
|
||||
return result;
|
||||
}
|
||||
|
||||
// check if toolCallIsWanted
|
||||
List<OllamaChatToolCalls> toolCalls = result.getResponseModel().getMessage().getToolCalls();
|
||||
int toolCallTries = 0;
|
||||
|
||||
@@ -5,9 +5,10 @@
|
||||
* Licensed under the MIT License (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
*
|
||||
*/
|
||||
*/
|
||||
package io.github.ollama4j.models.chat;
|
||||
|
||||
import com.fasterxml.jackson.annotation.JsonProperty;
|
||||
import io.github.ollama4j.models.request.OllamaCommonRequest;
|
||||
import io.github.ollama4j.tools.Tools;
|
||||
import io.github.ollama4j.utils.OllamaRequestBody;
|
||||
@@ -19,8 +20,8 @@ import lombok.Setter;
|
||||
* Defines a Request to use against the ollama /api/chat endpoint.
|
||||
*
|
||||
* @see <a href=
|
||||
* "https://github.com/ollama/ollama/blob/main/docs/api.md#generate-a-chat-completion">Generate
|
||||
* Chat Completion</a>
|
||||
* "https://github.com/ollama/ollama/blob/main/docs/api.md#generate-a-chat-completion">Generate
|
||||
* Chat Completion</a>
|
||||
*/
|
||||
@Getter
|
||||
@Setter
|
||||
@@ -32,6 +33,19 @@ public class OllamaChatRequest extends OllamaCommonRequest implements OllamaRequ
|
||||
|
||||
private boolean think;
|
||||
|
||||
@JsonProperty("keep_alive")
|
||||
private int modelKeepAliveTime;
|
||||
|
||||
/**
|
||||
* Controls whether tools are automatically executed.
|
||||
* <p>
|
||||
* If set to {@code true} (the default), tools will be automatically used/applied by the library.
|
||||
* If set to {@code false}, tool calls will be returned to the client for manual handling.
|
||||
* <p>
|
||||
* Disabling this should be an explicit operation.
|
||||
*/
|
||||
private boolean useTools = true;
|
||||
|
||||
public OllamaChatRequest() {}
|
||||
|
||||
public OllamaChatRequest(String model, boolean think, List<OllamaChatMessage> messages) {
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
* Licensed under the MIT License (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
*
|
||||
*/
|
||||
*/
|
||||
package io.github.ollama4j.models.chat;
|
||||
|
||||
import io.github.ollama4j.utils.Options;
|
||||
@@ -17,6 +17,8 @@ import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import lombok.Setter;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
@@ -28,6 +30,9 @@ public class OllamaChatRequestBuilder {
|
||||
private int imageURLConnectTimeoutSeconds = 10;
|
||||
private int imageURLReadTimeoutSeconds = 10;
|
||||
|
||||
@Setter
|
||||
private boolean useTools = true;
|
||||
|
||||
public OllamaChatRequestBuilder withImageURLConnectTimeoutSeconds(
|
||||
int imageURLConnectTimeoutSeconds) {
|
||||
this.imageURLConnectTimeoutSeconds = imageURLConnectTimeoutSeconds;
|
||||
@@ -50,6 +55,7 @@ public class OllamaChatRequestBuilder {
|
||||
}
|
||||
|
||||
public OllamaChatRequest build() {
|
||||
request.setUseTools(useTools);
|
||||
return request;
|
||||
}
|
||||
|
||||
|
||||
@@ -22,8 +22,8 @@ public class OllamaGenerateStreamObserver {
|
||||
public OllamaGenerateStreamObserver(
|
||||
OllamaGenerateTokenHandler thinkingStreamHandler,
|
||||
OllamaGenerateTokenHandler responseStreamHandler) {
|
||||
this.responseStreamHandler = responseStreamHandler;
|
||||
this.thinkingStreamHandler = thinkingStreamHandler;
|
||||
this.responseStreamHandler = responseStreamHandler;
|
||||
}
|
||||
|
||||
public void notify(OllamaGenerateResponseModel currentResponsePart) {
|
||||
|
||||
@@ -23,6 +23,7 @@ public class ModelsProcessResponse {
|
||||
|
||||
@Data
|
||||
@NoArgsConstructor
|
||||
@JsonIgnoreProperties(ignoreUnknown = true)
|
||||
public static class ModelProcess {
|
||||
@JsonProperty("name")
|
||||
private String name;
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,4 +1,5 @@
|
||||
USE_EXTERNAL_OLLAMA_HOST=true
|
||||
OLLAMA_HOST=http://192.168.29.229:11434/
|
||||
REQUEST_TIMEOUT_SECONDS=120
|
||||
NUMBER_RETRIES_FOR_MODEL_PULL=3
|
||||
NUMBER_RETRIES_FOR_MODEL_PULL=3
|
||||
MODEL_KEEP_ALIVE_TIME=0
|
||||
Reference in New Issue
Block a user