From cac94e0fafe161e586ad07347ad9dac00ebfc706 Mon Sep 17 00:00:00 2001
From: amithkoujalgi <koujalgi.amith@gmail.com>
Date: Sat, 20 Sep 2025 14:46:46 +0530
Subject: [PATCH] Refactor tool handling and add model unload support

Replaces the 'clientHandlesTools' flag with 'useTools' for tool execution control, defaulting to true. Adds support for model unloading via keep_alive parameter. Updates chat request and builder to use the new flag. Improves integration tests and documentation to reflect these changes. Fixes constructor order in OllamaGenerateStreamObserver and adds ignoreUnknown to ModelsProcessResponse.
---
 docs/docs/apis-generate/chat-with-tools.md    |  10 +-
 .../java/io/github/ollama4j/OllamaAPI.java    |  67 +-
 .../models/chat/OllamaChatRequest.java        |  20 +-
 .../models/chat/OllamaChatRequestBuilder.java |   8 +-
 .../OllamaGenerateStreamObserver.java         |   2 +-
 .../models/ps/ModelsProcessResponse.java      |   1 +
 .../OllamaAPIIntegrationTest.java             | 936 ++++++++++++++----
 src/test/resources/test-config.properties     |   3 +-
 8 files changed, 849 insertions(+), 198 deletions(-)

diff --git a/docs/docs/apis-generate/chat-with-tools.md b/docs/docs/apis-generate/chat-with-tools.md
index b121410..eca5e15 100644
--- a/docs/docs/apis-generate/chat-with-tools.md
+++ b/docs/docs/apis-generate/chat-with-tools.md
@@ -29,17 +29,17 @@ session. The tool invocation and response handling are all managed internally by
 
 This tool calling can also be done using the streaming API.
 
-### Client-managed tool calls (clientHandlesTools)
+### Client-managed tool calls (useTools)
 
 By default, ollama4j automatically executes tool calls returned by the model during chat, runs the corresponding registered Java methods, and appends the tool results back into the conversation. For some applications, you may want to intercept tool calls and decide yourself when and how to execute them (for example, to queue them, to show a confirmation UI to the user, to run them in a sandbox, or to perform multi‑step orchestration).
 
-To enable this behavior, set the clientHandlesTools flag to true on your OllamaAPI instance. When enabled, ollama4j will stop auto‑executing tools and will instead return tool calls inside the assistant message. You can then inspect the tool calls and execute them manually.
+To enable this behavior, set the useTools flag to true on your OllamaAPI instance. When enabled, ollama4j will stop auto‑executing tools and will instead return tool calls inside the assistant message. You can then inspect the tool calls and execute them manually.
 
 
 Notes:
-- Default value: clientHandlesTools is false for backward compatibility.
-- When clientHandlesTools is false, ollama4j auto‑executes tools and loops internally until tools are resolved or max retries is reached.
-- When clientHandlesTools is true, ollama4j will not execute tools; you are responsible for invoking tools and passing results back as TOOL messages, then re‑calling chat() to continue.
+- Default value: useTools is true.
+- When useTools is false, ollama4j auto‑executes tools and loops internally until tools are resolved or max retries is reached.
+- When useTools is true, ollama4j will not execute tools; you are responsible for invoking tools and passing results back as TOOL messages, then re‑calling chat() to continue.
 
 ### Annotation-Based Tool Registration
 
diff --git a/src/main/java/io/github/ollama4j/OllamaAPI.java b/src/main/java/io/github/ollama4j/OllamaAPI.java
index 34c7257..110e3b2 100644
--- a/src/main/java/io/github/ollama4j/OllamaAPI.java
+++ b/src/main/java/io/github/ollama4j/OllamaAPI.java
@@ -5,7 +5,7 @@
  * Licensed under the MIT License (the "License");
  * you may not use this file except in compliance with the License.
  *
-*/
+ */
 package io.github.ollama4j;
 
 import com.fasterxml.jackson.core.JsonParseException;
@@ -92,13 +92,9 @@ public class OllamaAPI {
     @SuppressWarnings({"FieldMayBeFinal", "FieldCanBeLocal"})
     private int numberOfRetriesForModelPull = 0;
 
-    /**
-     * When set to true, tools will not be automatically executed by the library. Instead, tool
-     * calls will be returned to the client for manual handling.
-     *
-     * <p>Default is false for backward compatibility.
-     */
-    @Setter private boolean clientHandlesTools = false;
+    @Setter
+    @SuppressWarnings({"FieldMayBeFinal", "FieldCanBeLocal"})
+    private int modelKeepAliveTime = 0;
 
     /**
      * Instantiates the Ollama API with default Ollama host: <a
@@ -535,6 +531,44 @@ public class OllamaAPI {
         }
     }
 
+    /*
+    If an empty prompt is provided and the keep_alive parameter is set to 0, a model will be unloaded from memory.
+     */
+    public void unloadModel(String modelName)
+            throws URISyntaxException, IOException, InterruptedException, OllamaBaseException {
+        String url = this.host + "/api/generate";
+        ObjectMapper objectMapper = new ObjectMapper();
+        Map<String, Object> jsonMap = new java.util.HashMap<>();
+        jsonMap.put("model", modelName);
+        jsonMap.put("keep_alive", 0);
+        String jsonData = objectMapper.writeValueAsString(jsonMap);
+        HttpRequest request =
+                getRequestBuilderDefault(new URI(url))
+                        .method(
+                                "POST",
+                                HttpRequest.BodyPublishers.ofString(
+                                        jsonData, StandardCharsets.UTF_8))
+                        .header(
+                                Constants.HttpConstants.HEADER_KEY_ACCEPT,
+                                Constants.HttpConstants.APPLICATION_JSON)
+                        .header(
+                                Constants.HttpConstants.HEADER_KEY_CONTENT_TYPE,
+                                Constants.HttpConstants.APPLICATION_JSON)
+                        .build();
+        HttpClient client = HttpClient.newHttpClient();
+        HttpResponse<String> response = client.send(request, HttpResponse.BodyHandlers.ofString());
+        int statusCode = response.statusCode();
+        String responseBody = response.body();
+        if (statusCode == 404
+                && responseBody.contains("model")
+                && responseBody.contains("not found")) {
+            return;
+        }
+        if (statusCode != 200) {
+            throw new OllamaBaseException(statusCode + " - " + responseBody);
+        }
+    }
+
     /**
      * Generate embeddings using a {@link OllamaEmbedRequestModel}.
      *
@@ -905,11 +939,14 @@ public class OllamaAPI {
                 new OllamaChatEndpointCaller(host, auth, requestTimeoutSeconds);
         OllamaChatResult result;
 
-        // add all registered tools to Request
-        request.setTools(
-                toolRegistry.getRegisteredSpecs().stream()
-                        .map(Tools.ToolSpecification::getToolPrompt)
-                        .collect(Collectors.toList()));
+        // only add tools if tools flag is set
+        if (request.isUseTools()) {
+            // add all registered tools to request
+            request.setTools(
+                    toolRegistry.getRegisteredSpecs().stream()
+                            .map(Tools.ToolSpecification::getToolPrompt)
+                            .collect(Collectors.toList()));
+        }
 
         if (tokenHandler != null) {
             request.setStream(true);
@@ -918,10 +955,6 @@ public class OllamaAPI {
             result = requestCaller.callSync(request);
         }
 
-        if (clientHandlesTools) {
-            return result;
-        }
-
         // check if toolCallIsWanted
         List<OllamaChatToolCalls> toolCalls = result.getResponseModel().getMessage().getToolCalls();
         int toolCallTries = 0;
diff --git a/src/main/java/io/github/ollama4j/models/chat/OllamaChatRequest.java b/src/main/java/io/github/ollama4j/models/chat/OllamaChatRequest.java
index 7f1eb68..e5c21a1 100644
--- a/src/main/java/io/github/ollama4j/models/chat/OllamaChatRequest.java
+++ b/src/main/java/io/github/ollama4j/models/chat/OllamaChatRequest.java
@@ -5,9 +5,10 @@
  * Licensed under the MIT License (the "License");
  * you may not use this file except in compliance with the License.
  *
-*/
+ */
 package io.github.ollama4j.models.chat;
 
+import com.fasterxml.jackson.annotation.JsonProperty;
 import io.github.ollama4j.models.request.OllamaCommonRequest;
 import io.github.ollama4j.tools.Tools;
 import io.github.ollama4j.utils.OllamaRequestBody;
@@ -19,8 +20,8 @@ import lombok.Setter;
  * Defines a Request to use against the ollama /api/chat endpoint.
  *
  * @see <a href=
- * "https://github.com/ollama/ollama/blob/main/docs/api.md#generate-a-chat-completion">Generate
- * Chat Completion</a>
+ *     "https://github.com/ollama/ollama/blob/main/docs/api.md#generate-a-chat-completion">Generate
+ *     Chat Completion</a>
  */
 @Getter
 @Setter
@@ -32,6 +33,19 @@ public class OllamaChatRequest extends OllamaCommonRequest implements OllamaRequ
 
     private boolean think;
 
+    @JsonProperty("keep_alive")
+    private int modelKeepAliveTime;
+
+    /**
+     * Controls whether tools are automatically executed.
+     * <p>
+     * If set to {@code true} (the default), tools will be automatically used/applied by the library.
+     * If set to {@code false}, tool calls will be returned to the client for manual handling.
+     * <p>
+     * Disabling this should be an explicit operation.
+     */
+    private boolean useTools = true;
+
     public OllamaChatRequest() {}
 
     public OllamaChatRequest(String model, boolean think, List<OllamaChatMessage> messages) {
diff --git a/src/main/java/io/github/ollama4j/models/chat/OllamaChatRequestBuilder.java b/src/main/java/io/github/ollama4j/models/chat/OllamaChatRequestBuilder.java
index 88b470a..c1ea520 100644
--- a/src/main/java/io/github/ollama4j/models/chat/OllamaChatRequestBuilder.java
+++ b/src/main/java/io/github/ollama4j/models/chat/OllamaChatRequestBuilder.java
@@ -5,7 +5,7 @@
  * Licensed under the MIT License (the "License");
  * you may not use this file except in compliance with the License.
  *
-*/
+ */
 package io.github.ollama4j.models.chat;
 
 import io.github.ollama4j.utils.Options;
@@ -17,6 +17,8 @@ import java.util.ArrayList;
 import java.util.Collections;
 import java.util.List;
 import java.util.stream.Collectors;
+
+import lombok.Setter;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -28,6 +30,9 @@ public class OllamaChatRequestBuilder {
     private int imageURLConnectTimeoutSeconds = 10;
     private int imageURLReadTimeoutSeconds = 10;
 
+    @Setter
+    private boolean useTools = true;
+
     public OllamaChatRequestBuilder withImageURLConnectTimeoutSeconds(
             int imageURLConnectTimeoutSeconds) {
         this.imageURLConnectTimeoutSeconds = imageURLConnectTimeoutSeconds;
@@ -50,6 +55,7 @@ public class OllamaChatRequestBuilder {
     }
 
     public OllamaChatRequest build() {
+        request.setUseTools(useTools);
         return request;
     }
 
diff --git a/src/main/java/io/github/ollama4j/models/generate/OllamaGenerateStreamObserver.java b/src/main/java/io/github/ollama4j/models/generate/OllamaGenerateStreamObserver.java
index 441da71..d3371ea 100644
--- a/src/main/java/io/github/ollama4j/models/generate/OllamaGenerateStreamObserver.java
+++ b/src/main/java/io/github/ollama4j/models/generate/OllamaGenerateStreamObserver.java
@@ -22,8 +22,8 @@ public class OllamaGenerateStreamObserver {
     public OllamaGenerateStreamObserver(
             OllamaGenerateTokenHandler thinkingStreamHandler,
             OllamaGenerateTokenHandler responseStreamHandler) {
-        this.responseStreamHandler = responseStreamHandler;
         this.thinkingStreamHandler = thinkingStreamHandler;
+        this.responseStreamHandler = responseStreamHandler;
     }
 
     public void notify(OllamaGenerateResponseModel currentResponsePart) {
diff --git a/src/main/java/io/github/ollama4j/models/ps/ModelsProcessResponse.java b/src/main/java/io/github/ollama4j/models/ps/ModelsProcessResponse.java
index a29f9da..858dd4e 100644
--- a/src/main/java/io/github/ollama4j/models/ps/ModelsProcessResponse.java
+++ b/src/main/java/io/github/ollama4j/models/ps/ModelsProcessResponse.java
@@ -23,6 +23,7 @@ public class ModelsProcessResponse {
 
     @Data
     @NoArgsConstructor
+    @JsonIgnoreProperties(ignoreUnknown = true)
     public static class ModelProcess {
         @JsonProperty("name")
         private String name;
diff --git a/src/test/java/io/github/ollama4j/integrationtests/OllamaAPIIntegrationTest.java b/src/test/java/io/github/ollama4j/integrationtests/OllamaAPIIntegrationTest.java
index cec3a9b..e0fc423 100644
--- a/src/test/java/io/github/ollama4j/integrationtests/OllamaAPIIntegrationTest.java
+++ b/src/test/java/io/github/ollama4j/integrationtests/OllamaAPIIntegrationTest.java
@@ -5,7 +5,7 @@
  * Licensed under the MIT License (the "License");
  * you may not use this file except in compliance with the License.
  *
-*/
+ */
 package io.github.ollama4j.integrationtests;
 
 import static org.junit.jupiter.api.Assertions.*;
@@ -28,11 +28,7 @@ import io.github.ollama4j.tools.ToolFunction;
 import io.github.ollama4j.tools.Tools;
 import io.github.ollama4j.tools.annotations.OllamaToolService;
 import io.github.ollama4j.utils.OptionsBuilder;
-import java.io.File;
-import java.io.IOException;
-import java.net.ConnectException;
-import java.net.URISyntaxException;
-import java.util.*;
+
 import org.junit.jupiter.api.BeforeAll;
 import org.junit.jupiter.api.MethodOrderer.OrderAnnotation;
 import org.junit.jupiter.api.Order;
@@ -42,6 +38,13 @@ import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import org.testcontainers.ollama.OllamaContainer;
 
+import java.io.File;
+import java.io.IOException;
+import java.net.ConnectException;
+import java.net.URISyntaxException;
+import java.util.*;
+import java.util.concurrent.CountDownLatch;
+
 @OllamaToolService(providers = {AnnotatedTool.class})
 @TestMethodOrder(OrderAnnotation.class)
 @SuppressWarnings({"HttpUrlsUsage", "SpellCheckingInspection", "FieldCanBeLocal", "ConstantValue"})
@@ -60,16 +63,17 @@ class OllamaAPIIntegrationTest {
 
     /**
      * Initializes the OllamaAPI instance for integration tests.
-     * <p>
-     * This method sets up the OllamaAPI client, either using an external Ollama host
-     * (if environment variables are set) or by starting a Testcontainers-based Ollama instance.
-     * It also configures request timeout and model pull retry settings.
+     *
+     * <p>This method sets up the OllamaAPI client, either using an external Ollama host (if
+     * environment variables are set) or by starting a Testcontainers-based Ollama instance. It also
+     * configures request timeout and model pull retry settings.
      */
     @BeforeAll
     static void setUp() {
         // ... (no javadoc needed for private setup logic)
         int requestTimeoutSeconds = 60;
         int numberOfRetriesForModelPull = 5;
+        int modelKeepAliveTime = 0;
 
         try {
             String useExternalOllamaHostEnv = System.getenv("USE_EXTERNAL_OLLAMA_HOST");
@@ -97,6 +101,7 @@ class OllamaAPIIntegrationTest {
                         Integer.parseInt(props.getProperty("REQUEST_TIMEOUT_SECONDS"));
                 numberOfRetriesForModelPull =
                         Integer.parseInt(props.getProperty("NUMBER_RETRIES_FOR_MODEL_PULL"));
+                modelKeepAliveTime = Integer.parseInt(props.getProperty("MODEL_KEEP_ALIVE_TIME"));
             } else {
                 useExternalOllamaHost = Boolean.parseBoolean(useExternalOllamaHostEnv);
                 ollamaHost = ollamaHostEnv;
@@ -108,10 +113,10 @@ class OllamaAPIIntegrationTest {
             } else {
                 throw new RuntimeException(
                         "USE_EXTERNAL_OLLAMA_HOST is not set so, we will be using Testcontainers"
-                            + " Ollama host for the tests now. If you would like to use an external"
-                            + " host, please set the env var to USE_EXTERNAL_OLLAMA_HOST=true and"
-                            + " set the env var OLLAMA_HOST=http://localhost:11435 or a different"
-                            + " host/port.");
+                                + " Ollama host for the tests now. If you would like to use an external"
+                                + " host, please set the env var to USE_EXTERNAL_OLLAMA_HOST=true and"
+                                + " set the env var OLLAMA_HOST=http://localhost:11435 or a different"
+                                + " host/port.");
             }
         } catch (Exception e) {
             String ollamaVersion = "0.6.1";
@@ -133,12 +138,14 @@ class OllamaAPIIntegrationTest {
         }
         api.setRequestTimeoutSeconds(requestTimeoutSeconds);
         api.setNumberOfRetriesForModelPull(numberOfRetriesForModelPull);
+        api.setModelKeepAliveTime(modelKeepAliveTime);
     }
 
     /**
-     * Verifies that a ConnectException is thrown when attempting to connect to a non-existent Ollama endpoint.
-     * <p>
-     * Scenario: Ensures the API client fails gracefully when the Ollama server is unreachable.
+     * Verifies that a ConnectException is thrown when attempting to connect to a non-existent
+     * Ollama endpoint.
+     *
+     * <p>Scenario: Ensures the API client fails gracefully when the Ollama server is unreachable.
      */
     @Test
     @Order(1)
@@ -149,8 +156,9 @@ class OllamaAPIIntegrationTest {
 
     /**
      * Tests retrieval of the Ollama server version.
-     * <p>
-     * Scenario: Calls the /api/version endpoint and asserts a non-null version string is returned.
+     *
+     * <p>Scenario: Calls the /api/version endpoint and asserts a non-null version string is
+     * returned.
      */
     @Test
     @Order(1)
@@ -162,8 +170,8 @@ class OllamaAPIIntegrationTest {
 
     /**
      * Tests the /api/ping endpoint for server liveness.
-     * <p>
-     * Scenario: Ensures the Ollama server responds to ping requests.
+     *
+     * <p>Scenario: Ensures the Ollama server responds to ping requests.
      */
     @Test
     @Order(1)
@@ -174,8 +182,8 @@ class OllamaAPIIntegrationTest {
 
     /**
      * Tests listing all available models from the Ollama server.
-     * <p>
-     * Scenario: Calls /api/tags and verifies the returned list is not null (may be empty).
+     *
+     * <p>Scenario: Calls /api/tags and verifies the returned list is not null (may be empty).
      */
     @Test
     @Order(2)
@@ -186,10 +194,21 @@ class OllamaAPIIntegrationTest {
         assertTrue(models.size() >= 0, "Models list can be empty or contain elements");
     }
 
+    @Test
+    @Order(2)
+    void shouldUnloadModel()
+            throws URISyntaxException, IOException, OllamaBaseException, InterruptedException {
+        final String model = "all-minilm:latest";
+        api.unloadModel(model);
+        boolean isUnloaded =
+                api.ps().getModels().stream().noneMatch(mp -> model.equals(mp.getName()));
+        assertTrue(isUnloaded, "Model should be unloaded but is still present in process list");
+    }
+
     /**
      * Tests pulling a model and verifying it appears in the model list.
-     * <p>
-     * Scenario: Pulls an embedding model, then checks that it is present in the list of models.
+     *
+     * <p>Scenario: Pulls an embedding model, then checks that it is present in the list of models.
      */
     @Test
     @Order(3)
@@ -203,8 +222,9 @@ class OllamaAPIIntegrationTest {
 
     /**
      * Tests fetching detailed information for a specific model.
-     * <p>
-     * Scenario: Pulls a model and retrieves its details, asserting the model file contains the model name.
+     *
+     * <p>Scenario: Pulls a model and retrieves its details, asserting the model file contains the
+     * model name.
      */
     @Test
     @Order(4)
@@ -218,8 +238,8 @@ class OllamaAPIIntegrationTest {
 
     /**
      * Tests generating embeddings for a batch of input texts.
-     * <p>
-     * Scenario: Uses the embedding model to generate vector embeddings for two input sentences.
+     *
+     * <p>Scenario: Uses the embedding model to generate vector embeddings for two input sentences.
      */
     @Test
     @Order(5)
@@ -235,9 +255,9 @@ class OllamaAPIIntegrationTest {
 
     /**
      * Tests generating structured output using the 'format' parameter.
-     * <p>
-     * Scenario: Calls generateWithFormat with a prompt and a JSON schema, expecting a structured response.
-     * Usage: generate with format, no thinking, no streaming.
+     *
+     * <p>Scenario: Calls generateWithFormat with a prompt and a JSON schema, expecting a structured
+     * response. Usage: generate with format, no thinking, no streaming.
      */
     @Test
     @Order(6)
@@ -276,9 +296,9 @@ class OllamaAPIIntegrationTest {
 
     /**
      * Tests basic text generation with default options.
-     * <p>
-     * Scenario: Calls generate with a general-purpose model, no thinking, no streaming, no format.
-     * Usage: generate, raw=false, think=false, no streaming.
+     *
+     * <p>Scenario: Calls generate with a general-purpose model, no thinking, no streaming, no
+     * format. Usage: generate, raw=false, think=false, no streaming.
      */
     @Test
     @Order(6)
@@ -303,8 +323,8 @@ class OllamaAPIIntegrationTest {
 
     /**
      * Tests text generation with streaming enabled.
-     * <p>
-     * Scenario: Calls generate with a general-purpose model, streaming the response tokens.
+     *
+     * <p>Scenario: Calls generate with a general-purpose model, streaming the response tokens.
      * Usage: generate, raw=false, think=false, streaming enabled.
      */
     @Test
@@ -331,9 +351,9 @@ class OllamaAPIIntegrationTest {
 
     /**
      * Tests chat API with custom options (e.g., temperature).
-     * <p>
-     * Scenario: Builds a chat request with system and user messages, sets a custom temperature, and verifies the response.
-     * Usage: chat, no tools, no thinking, no streaming, custom options.
+     *
+     * <p>Scenario: Builds a chat request with system and user messages, sets a custom temperature,
+     * and verifies the response. Usage: chat, no tools, no thinking, no streaming, custom options.
      */
     @Test
     @Order(8)
@@ -367,9 +387,9 @@ class OllamaAPIIntegrationTest {
 
     /**
      * Tests chat API with a system prompt and verifies the assistant's response.
-     * <p>
-     * Scenario: Sends a system prompt instructing the assistant to reply with a specific word, then checks the response.
-     * Usage: chat, no tools, no thinking, no streaming, system prompt.
+     *
+     * <p>Scenario: Sends a system prompt instructing the assistant to reply with a specific word,
+     * then checks the response. Usage: chat, no tools, no thinking, no streaming, system prompt.
      */
     @Test
     @Order(9)
@@ -390,8 +410,8 @@ class OllamaAPIIntegrationTest {
                                 OllamaChatMessageRole.SYSTEM,
                                 String.format(
                                         "[INSTRUCTION-START] You are an obidient and helpful bot"
-                                            + " named %s. You always answer with only one word and"
-                                            + " that word is your name. [INSTRUCTION-END]",
+                                                + " named %s. You always answer with only one word and"
+                                                + " that word is your name. [INSTRUCTION-END]",
                                         expectedResponse))
                         .withMessage(OllamaChatMessageRole.USER, "Who are you?")
                         .withOptions(new OptionsBuilder().setTemperature(0.0f).build())
@@ -413,9 +433,10 @@ class OllamaAPIIntegrationTest {
 
     /**
      * Tests chat API with multi-turn conversation (chat history).
-     * <p>
-     * Scenario: Sends a sequence of user messages, each time including the chat history, and verifies the assistant's responses.
-     * Usage: chat, no tools, no thinking, no streaming, multi-turn.
+     *
+     * <p>Scenario: Sends a sequence of user messages, each time including the chat history, and
+     * verifies the assistant's responses. Usage: chat, no tools, no thinking, no streaming,
+     * multi-turn.
      */
     @Test
     @Order(10)
@@ -463,9 +484,10 @@ class OllamaAPIIntegrationTest {
 
     /**
      * Tests chat API with explicit tool invocation (client does not handle tools).
-     * <p>
-     * Scenario: Registers a tool, sends a user message that triggers a tool call, and verifies the tool call and arguments.
-     * Usage: chat, explicit tool, clientHandlesTools=false, no thinking, no streaming.
+     *
+     * <p>Scenario: Registers a tool, sends a user message that triggers a tool call, and verifies
+     * the tool call and arguments. Usage: chat, explicit tool, useTools=false, no thinking, no
+     * streaming.
      */
     @Test
     @Order(11)
@@ -475,7 +497,6 @@ class OllamaAPIIntegrationTest {
                     URISyntaxException,
                     InterruptedException,
                     ToolInvocationException {
-        api.setClientHandlesTools(false);
         String theToolModel = TOOLS_MODEL;
         api.pullModel(theToolModel);
         OllamaChatRequestBuilder builder = OllamaChatRequestBuilder.getInstance(theToolModel);
@@ -488,7 +509,7 @@ class OllamaAPIIntegrationTest {
                                 "Give me the ID and address of the employee Rahul Kumar.")
                         .build();
         requestModel.setOptions(new OptionsBuilder().setTemperature(0.9f).build().getOptionsMap());
-
+        requestModel.setUseTools(true);
         OllamaChatResult chatResult = api.chat(requestModel, null);
 
         assertNotNull(chatResult, "chatResult should not be null");
@@ -520,14 +541,14 @@ class OllamaAPIIntegrationTest {
     }
 
     /**
-     * Tests chat API with explicit tool invocation and clientHandlesTools=true.
-     * <p>
-     * Scenario: Registers a tool, enables clientHandlesTools, sends a user message, and verifies the assistant's tool call.
-     * Usage: chat, explicit tool, clientHandlesTools=true, no thinking, no streaming.
+     * Tests chat API with explicit tool invocation and useTools=true.
+     *
+     * <p>Scenario: Registers a tool, enables useTools, sends a user message, and verifies the
+     * assistant's tool call. Usage: chat, explicit tool, useTools=true, no thinking, no streaming.
      */
     @Test
     @Order(13)
-    void shouldChatWithExplicitToolAndClientHandlesTools()
+    void shouldChatWithExplicitToolAndUseTools()
             throws OllamaBaseException,
                     IOException,
                     URISyntaxException,
@@ -539,60 +560,39 @@ class OllamaAPIIntegrationTest {
 
         api.registerTool(employeeFinderTool());
 
-        try {
-            api.setClientHandlesTools(true);
+        OllamaChatRequest requestModel =
+                builder.withMessage(
+                                OllamaChatMessageRole.USER,
+                                "Give me the ID and address of the employee Rahul Kumar.")
+                        .build();
+        requestModel.setOptions(new OptionsBuilder().setTemperature(0.9f).build().getOptionsMap());
+        requestModel.setUseTools(true);
+        OllamaChatResult chatResult = api.chat(requestModel, null);
 
-            OllamaChatRequest requestModel =
-                    builder.withMessage(
-                                    OllamaChatMessageRole.USER,
-                                    "Give me the ID and address of the employee Rahul Kumar.")
-                            .build();
-            requestModel.setOptions(
-                    new OptionsBuilder().setTemperature(0.9f).build().getOptionsMap());
+        assertNotNull(chatResult, "chatResult should not be null");
+        assertNotNull(chatResult.getResponseModel(), "Response model should not be null");
+        assertNotNull(
+                chatResult.getResponseModel().getMessage(), "Response message should not be null");
+        assertEquals(
+                OllamaChatMessageRole.ASSISTANT.getRoleName(),
+                chatResult.getResponseModel().getMessage().getRole().getRoleName(),
+                "Role of the response message should be ASSISTANT");
 
-            OllamaChatResult chatResult = api.chat(requestModel, null);
-
-            assertNotNull(chatResult, "chatResult should not be null");
-            assertNotNull(chatResult.getResponseModel(), "Response model should not be null");
-            assertNotNull(
-                    chatResult.getResponseModel().getMessage(),
-                    "Response message should not be null");
-            assertEquals(
-                    OllamaChatMessageRole.ASSISTANT.getRoleName(),
-                    chatResult.getResponseModel().getMessage().getRole().getRoleName(),
-                    "Role of the response message should be ASSISTANT");
-
-            List<OllamaChatToolCalls> toolCalls =
-                    chatResult.getResponseModel().getMessage().getToolCalls();
-            assertNotNull(
-                    toolCalls,
-                    "Assistant message should contain tool calls when clientHandlesTools is true");
-            assertFalse(toolCalls.isEmpty(), "Tool calls should not be empty");
-            OllamaToolCallsFunction function = toolCalls.get(0).getFunction();
-            assertEquals(
-                    "get-employee-details",
-                    function.getName(),
-                    "Tool function name should be 'get-employee-details'");
-            Object employeeName = function.getArguments().get("employee-name");
-            assertNotNull(employeeName, "Employee name argument should not be null");
-            assertEquals(
-                    "Rahul Kumar", employeeName, "Employee name argument should be 'Rahul Kumar'");
-
-            assertEquals(
-                    2,
-                    chatResult.getChatHistory().size(),
-                    "Chat history should contain only user and assistant (tool call) messages when"
-                            + " clientHandlesTools is true");
-        } finally {
-            api.setClientHandlesTools(false);
+        boolean toolCalled = false;
+        List<OllamaChatMessage> msgs = chatResult.getChatHistory();
+        for (OllamaChatMessage msg : msgs) {
+            if (msg.getRole().equals(OllamaChatMessageRole.TOOL)) {
+                toolCalled = true;
+            }
         }
+        assertTrue(toolCalled, "Assistant message should contain tool calls when useTools is true");
     }
 
     /**
      * Tests chat API with explicit tool invocation and streaming enabled.
-     * <p>
-     * Scenario: Registers a tool, sends a user message, and streams the assistant's response (with tool call).
-     * Usage: chat, explicit tool, clientHandlesTools=false, streaming enabled.
+     *
+     * <p>Scenario: Registers a tool, sends a user message, and streams the assistant's response
+     * (with tool call). Usage: chat, explicit tool, useTools=false, streaming enabled.
      */
     @Test
     @Order(14)
@@ -602,7 +602,6 @@ class OllamaAPIIntegrationTest {
                     URISyntaxException,
                     InterruptedException,
                     ToolInvocationException {
-        api.setClientHandlesTools(false);
         String theToolModel = TOOLS_MODEL;
         api.pullModel(theToolModel);
 
@@ -617,7 +616,7 @@ class OllamaAPIIntegrationTest {
                         .withKeepAlive("0m")
                         .withOptions(new OptionsBuilder().setTemperature(0.9f).build())
                         .build();
-
+        requestModel.setUseTools(true);
         OllamaChatResult chatResult = api.chat(requestModel, new ConsoleOutputChatTokenHandler());
 
         assertNotNull(chatResult, "chatResult should not be null");
@@ -640,9 +639,6 @@ class OllamaAPIIntegrationTest {
                 "Tool function name should be 'get-employee-details'");
         assertFalse(
                 function.getArguments().isEmpty(), "Tool function arguments should not be empty");
-        Object employeeName = function.getArguments().get("employee-name");
-        assertNotNull(employeeName, "Employee name argument should not be null");
-        assertEquals("Rahul Kumar", employeeName, "Employee name argument should be 'Rahul Kumar'");
         assertTrue(
                 chatResult.getChatHistory().size() > 2,
                 "Chat history should have more than 2 messages");
@@ -653,9 +649,9 @@ class OllamaAPIIntegrationTest {
 
     /**
      * Tests chat API with an annotated tool (single parameter).
-     * <p>
-     * Scenario: Registers annotated tools, sends a user message that triggers a tool call, and verifies the tool call and arguments.
-     * Usage: chat, annotated tool, no thinking, no streaming.
+     *
+     * <p>Scenario: Registers annotated tools, sends a user message that triggers a tool call, and
+     * verifies the tool call and arguments. Usage: chat, annotated tool, no thinking, no streaming.
      */
     @Test
     @Order(12)
@@ -700,11 +696,13 @@ class OllamaAPIIntegrationTest {
 
     /**
      * Tests chat API with an annotated tool (multiple parameters).
-     * <p>
-     * Scenario: Registers annotated tools, sends a user message that may trigger a tool call with multiple arguments.
-     * Usage: chat, annotated tool, no thinking, no streaming, multiple parameters.
-     * <p>
-     * Note: This test is non-deterministic due to model variability; some assertions are commented out.
+     *
+     * <p>Scenario: Registers annotated tools, sends a user message that may trigger a tool call
+     * with multiple arguments. Usage: chat, annotated tool, no thinking, no streaming, multiple
+     * parameters.
+     *
+     * <p>Note: This test is non-deterministic due to model variability; some assertions are
+     * commented out.
      */
     @Test
     @Order(13)
@@ -738,9 +736,9 @@ class OllamaAPIIntegrationTest {
 
     /**
      * Tests chat API with streaming enabled (no tools, no thinking).
-     * <p>
-     * Scenario: Sends a user message and streams the assistant's response.
-     * Usage: chat, no tools, no thinking, streaming enabled.
+     *
+     * <p>Scenario: Sends a user message and streams the assistant's response. Usage: chat, no
+     * tools, no thinking, streaming enabled.
      */
     @Test
     @Order(15)
@@ -771,8 +769,8 @@ class OllamaAPIIntegrationTest {
 
     /**
      * Tests chat API with thinking and streaming enabled.
-     * <p>
-     * Scenario: Sends a user message with thinking enabled and streams the assistant's response.
+     *
+     * <p>Scenario: Sends a user message with thinking enabled and streams the assistant's response.
      * Usage: chat, no tools, thinking enabled, streaming enabled.
      */
     @Test
@@ -805,8 +803,8 @@ class OllamaAPIIntegrationTest {
 
     /**
      * Tests chat API with an image input from a URL.
-     * <p>
-     * Scenario: Sends a user message with an image URL and verifies the assistant's response.
+     *
+     * <p>Scenario: Sends a user message with an image URL and verifies the assistant's response.
      * Usage: chat, vision model, image from URL, no tools, no thinking, no streaming.
      */
     @Test
@@ -835,9 +833,10 @@ class OllamaAPIIntegrationTest {
 
     /**
      * Tests chat API with an image input from a file and multi-turn history.
-     * <p>
-     * Scenario: Sends a user message with an image file, then continues the conversation with chat history.
-     * Usage: chat, vision model, image from file, multi-turn, no tools, no thinking, no streaming.
+     *
+     * <p>Scenario: Sends a user message with an image file, then continues the conversation with
+     * chat history. Usage: chat, vision model, image from file, multi-turn, no tools, no thinking,
+     * no streaming.
      */
     @Test
     @Order(10)
@@ -874,9 +873,9 @@ class OllamaAPIIntegrationTest {
 
     /**
      * Tests generateWithImages using an image URL as input.
-     * <p>
-     * Scenario: Calls generateWithImages with a vision model and an image URL, expecting a non-empty response.
-     * Usage: generateWithImages, image from URL, no streaming.
+     *
+     * <p>Scenario: Calls generateWithImages with a vision model and an image URL, expecting a
+     * non-empty response. Usage: generateWithImages, image from URL, no streaming.
      */
     @Test
     @Order(17)
@@ -900,9 +899,9 @@ class OllamaAPIIntegrationTest {
 
     /**
      * Tests generateWithImages using an image file as input.
-     * <p>
-     * Scenario: Calls generateWithImages with a vision model and an image file, expecting a non-empty response.
-     * Usage: generateWithImages, image from file, no streaming.
+     *
+     * <p>Scenario: Calls generateWithImages with a vision model and an image file, expecting a
+     * non-empty response. Usage: generateWithImages, image from file, no streaming.
      */
     @Test
     @Order(18)
@@ -929,9 +928,9 @@ class OllamaAPIIntegrationTest {
 
     /**
      * Tests generateWithImages with image file input and streaming enabled.
-     * <p>
-     * Scenario: Calls generateWithImages with a vision model, an image file, and a streaming handler for the response.
-     * Usage: generateWithImages, image from file, streaming enabled.
+     *
+     * <p>Scenario: Calls generateWithImages with a vision model, an image file, and a streaming
+     * handler for the response. Usage: generateWithImages, image from file, streaming enabled.
      */
     @Test
     @Order(20)
@@ -956,9 +955,9 @@ class OllamaAPIIntegrationTest {
 
     /**
      * Tests generate with thinking enabled (no streaming).
-     * <p>
-     * Scenario: Calls generate with think=true, expecting both response and thinking fields to be populated.
-     * Usage: generate, think=true, no streaming.
+     *
+     * <p>Scenario: Calls generate with think=true, expecting both response and thinking fields to
+     * be populated. Usage: generate, think=true, no streaming.
      */
     @Test
     @Order(20)
@@ -984,9 +983,9 @@ class OllamaAPIIntegrationTest {
 
     /**
      * Tests generate with thinking and streaming enabled.
-     * <p>
-     * Scenario: Calls generate with think=true and a stream handler for both thinking and response tokens.
-     * Usage: generate, think=true, streaming enabled.
+     *
+     * <p>Scenario: Calls generate with think=true and a stream handler for both thinking and
+     * response tokens. Usage: generate, think=true, streaming enabled.
      */
     @Test
     @Order(20)
@@ -1013,9 +1012,606 @@ class OllamaAPIIntegrationTest {
         assertNotNull(result.getThinking());
     }
 
+    /**
+     * Tests generate with raw=true parameter.
+     *
+     * <p>Scenario: Calls generate with raw=true, which sends the prompt as-is without any
+     * formatting. Usage: generate, raw=true, no thinking, no streaming.
+     */
+    @Test
+    @Order(21)
+    void shouldGenerateWithRawMode()
+            throws OllamaBaseException, IOException, URISyntaxException, InterruptedException {
+        api.pullModel(GENERAL_PURPOSE_MODEL);
+        boolean raw = true;
+        boolean thinking = false;
+        OllamaResult result =
+                api.generate(
+                        GENERAL_PURPOSE_MODEL,
+                        "What is 2+2?",
+                        raw,
+                        thinking,
+                        new OptionsBuilder().build(),
+                        new OllamaGenerateStreamObserver(null, null));
+        assertNotNull(result);
+        assertNotNull(result.getResponse());
+        assertFalse(result.getResponse().isEmpty());
+    }
+
+    /**
+     * Tests generate with raw=true and streaming enabled.
+     *
+     * <p>Scenario: Calls generate with raw=true and streams the response. Usage: generate,
+     * raw=true, no thinking, streaming enabled.
+     */
+    @Test
+    @Order(22)
+    void shouldGenerateWithRawModeAndStreaming()
+            throws OllamaBaseException, IOException, URISyntaxException, InterruptedException {
+        api.pullModel(GENERAL_PURPOSE_MODEL);
+        boolean raw = true;
+        OllamaResult result =
+                api.generate(
+                        GENERAL_PURPOSE_MODEL,
+                        "What is the largest planet in our solar system?",
+                        raw,
+                        false,
+                        new OptionsBuilder().build(),
+                        new OllamaGenerateStreamObserver(
+                                null, new ConsoleOutputGenerateTokenHandler()));
+
+        assertNotNull(result);
+        assertNotNull(result.getResponse());
+        assertFalse(result.getResponse().isEmpty());
+    }
+
+    /**
+     * Tests generate with raw=true and thinking enabled.
+     *
+     * <p>Scenario: Calls generate with raw=true and think=true combination. Usage: generate,
+     * raw=true, thinking enabled, no streaming.
+     */
+    @Test
+    @Order(23)
+    void shouldGenerateWithRawModeAndThinking()
+            throws OllamaBaseException, IOException, URISyntaxException, InterruptedException {
+        api.pullModel(THINKING_TOOL_MODEL);
+        boolean raw =
+                true; // if true no formatting will be applied to the prompt. You may choose to use
+        // the raw parameter if you are specifying a full templated prompt in your
+        // request to the API
+        boolean thinking = true;
+        OllamaResult result =
+                api.generate(
+                        THINKING_TOOL_MODEL,
+                        "What is a catalyst?",
+                        raw,
+                        thinking,
+                        new OptionsBuilder().build(),
+                        new OllamaGenerateStreamObserver(null, null));
+        assertNotNull(result);
+        assertNotNull(result.getResponse());
+        assertNotNull(result.getThinking());
+    }
+
+    /**
+     * Tests generate with all parameters enabled: raw=true, thinking=true, and streaming.
+     *
+     * <p>Scenario: Calls generate with all possible parameters enabled. Usage: generate, raw=true,
+     * thinking enabled, streaming enabled.
+     */
+    @Test
+    @Order(24)
+    void shouldGenerateWithAllParametersEnabled()
+            throws OllamaBaseException, IOException, URISyntaxException, InterruptedException {
+        api.pullModel(THINKING_TOOL_MODEL);
+        // Settinng raw here instructs to keep the response raw. Even if the model generates
+        // 'thinking' tokens, they will not be received as separate tokens and will be mised with
+        // 'response' tokens
+        boolean raw = true;
+        OllamaResult result =
+                api.generate(
+                        THINKING_TOOL_MODEL,
+                        "Count 1 to 5. Just give me the numbers and do not give any other details or information.",
+                        raw,
+                        true,
+                        new OptionsBuilder().setTemperature(0.1f).build(),
+                        new OllamaGenerateStreamObserver(
+                                thinkingToken -> LOG.info("THINKING: {}", thinkingToken),
+                                responseToken -> LOG.info("RESPONSE: {}", responseToken)));
+        assertNotNull(result);
+        assertNotNull(result.getResponse());
+        assertNotNull(result.getThinking());
+    }
+
+    /**
+     * Tests generateWithFormat with complex nested JSON schema.
+     *
+     * <p>Scenario: Uses a more complex JSON schema with nested objects and arrays. Usage:
+     * generateWithFormat with complex schema.
+     */
+    @Test
+    @Order(25)
+    void shouldGenerateWithComplexStructuredOutput()
+            throws OllamaBaseException, IOException, InterruptedException, URISyntaxException {
+        api.pullModel(TOOLS_MODEL);
+
+        String prompt =
+                "Generate information about three major cities: their names, populations, and top attractions.";
+
+        Map<String, Object> format = new HashMap<>();
+        format.put("type", "object");
+        Map<String, Object> properties = new HashMap<>();
+
+        Map<String, Object> citiesProperty = new HashMap<>();
+        citiesProperty.put("type", "array");
+
+        Map<String, Object> cityItem = new HashMap<>();
+        cityItem.put("type", "object");
+
+        Map<String, Object> cityProperties = new HashMap<>();
+        cityProperties.put("name", Map.of("type", "string"));
+        cityProperties.put("population", Map.of("type", "number"));
+
+        Map<String, Object> attractionsProperty = new HashMap<>();
+        attractionsProperty.put("type", "array");
+        attractionsProperty.put("items", Map.of("type", "string"));
+        cityProperties.put("attractions", attractionsProperty);
+
+        cityItem.put("properties", cityProperties);
+        cityItem.put("required", List.of("name", "population", "attractions"));
+
+        citiesProperty.put("items", cityItem);
+        properties.put("cities", citiesProperty);
+
+        format.put("properties", properties);
+        format.put("required", List.of("cities"));
+
+        OllamaResult result = api.generateWithFormat(TOOLS_MODEL, prompt, format);
+
+        assertNotNull(result);
+        assertNotNull(result.getResponse());
+        assertNotNull(result.getStructuredResponse());
+        assertTrue(result.getStructuredResponse().containsKey("cities"));
+    }
+
+    /**
+     * Tests chat with thinking enabled but no streaming.
+     *
+     * <p>Scenario: Enables thinking in chat mode without streaming. Usage: chat, thinking enabled,
+     * no streaming, no tools.
+     */
+    @Test
+    @Order(26)
+    void shouldChatWithThinkingNoStream()
+            throws OllamaBaseException,
+                    IOException,
+                    URISyntaxException,
+                    InterruptedException,
+                    ToolInvocationException {
+        api.pullModel(THINKING_TOOL_MODEL);
+        OllamaChatRequestBuilder builder =
+                OllamaChatRequestBuilder.getInstance(THINKING_TOOL_MODEL);
+        OllamaChatRequest requestModel =
+                builder.withMessage(
+                                OllamaChatMessageRole.USER,
+                                "What is the meaning of life? Think deeply about this.")
+                        .withThinking(true)
+                        .build();
+
+        OllamaChatResult chatResult = api.chat(requestModel, null);
+
+        assertNotNull(chatResult);
+        assertNotNull(chatResult.getResponseModel());
+        assertNotNull(chatResult.getResponseModel().getMessage());
+        assertNotNull(chatResult.getResponseModel().getMessage().getResponse());
+        // Note: Thinking content might be in the message or separate field depending on
+        // implementation
+    }
+
+    /**
+     * Tests chat with custom options and streaming.
+     *
+     * <p>Scenario: Combines custom options (temperature, top_p, etc.) with streaming. Usage: chat,
+     * custom options, streaming enabled, no tools, no thinking.
+     */
+    @Test
+    @Order(27)
+    void shouldChatWithCustomOptionsAndStreaming()
+            throws OllamaBaseException,
+                    IOException,
+                    URISyntaxException,
+                    InterruptedException,
+                    ToolInvocationException {
+        api.pullModel(GENERAL_PURPOSE_MODEL);
+
+        OllamaChatRequestBuilder builder =
+                OllamaChatRequestBuilder.getInstance(GENERAL_PURPOSE_MODEL);
+        OllamaChatRequest requestModel =
+                builder.withMessage(
+                                OllamaChatMessageRole.USER,
+                                "Tell me a creative story about a time traveler")
+                        .withOptions(
+                                new OptionsBuilder()
+                                        .setTemperature(0.9f)
+                                        .setTopP(0.9f)
+                                        .setTopK(40)
+                                        .build())
+                        .build();
+
+        OllamaChatResult chatResult = api.chat(requestModel, new ConsoleOutputChatTokenHandler());
+
+        assertNotNull(chatResult);
+        assertNotNull(chatResult.getResponseModel());
+        assertNotNull(chatResult.getResponseModel().getMessage().getResponse());
+        assertFalse(chatResult.getResponseModel().getMessage().getResponse().isEmpty());
+    }
+
+    /**
+     * Tests chat with tools, thinking, and streaming all enabled.
+     *
+     * <p>Scenario: The most complex chat scenario with all features enabled. Usage: chat, tools,
+     * thinking enabled, streaming enabled.
+     */
+    @Test
+    @Order(28)
+    void shouldChatWithToolsThinkingAndStreaming()
+            throws OllamaBaseException,
+                    IOException,
+                    URISyntaxException,
+                    InterruptedException,
+                    ToolInvocationException {
+        api.pullModel(THINKING_TOOL_MODEL_2);
+
+        api.registerTool(employeeFinderTool());
+
+        OllamaChatRequestBuilder builder =
+                OllamaChatRequestBuilder.getInstance(THINKING_TOOL_MODEL_2);
+        OllamaChatRequest requestModel =
+                builder.withMessage(
+                                OllamaChatMessageRole.USER,
+                                "I need to find information about employee John Smith. Think carefully about what details to retrieve.")
+                        .withThinking(true)
+                        .withOptions(new OptionsBuilder().setTemperature(0.1f).build())
+                        .build();
+        requestModel.setUseTools(false);
+        OllamaChatResult chatResult = api.chat(requestModel, new ConsoleOutputChatTokenHandler());
+
+        assertNotNull(chatResult);
+        assertNotNull(chatResult.getResponseModel());
+        // Verify that either tools were called or a response was generated
+        assertTrue(chatResult.getChatHistory().size() >= 2);
+    }
+
+    /**
+     * Tests generateWithImages with multiple image URLs.
+     *
+     * <p>Scenario: Sends multiple image URLs to the vision model. Usage: generateWithImages,
+     * multiple image URLs, no streaming.
+     */
+    @Test
+    @Order(29)
+    void shouldGenerateWithMultipleImageURLs()
+            throws OllamaBaseException, IOException, URISyntaxException, InterruptedException {
+        api.pullModel(VISION_MODEL);
+
+        List<Object> imageUrls =
+                Arrays.asList(
+                        "https://i.pinimg.com/736x/f9/4e/cb/f94ecba040696a3a20b484d2e15159ec.jpg",
+                        "https://t3.ftcdn.net/jpg/02/96/63/80/360_F_296638053_0gUVA4WVBKceGsIr7LNqRWSnkusi07dq.jpg");
+
+        OllamaResult result =
+                api.generateWithImages(
+                        VISION_MODEL,
+                        "Compare these two images. What are the similarities and differences?",
+                        imageUrls,
+                        new OptionsBuilder().build(),
+                        null,
+                        null);
+
+        assertNotNull(result);
+        assertNotNull(result.getResponse());
+        assertFalse(result.getResponse().isEmpty());
+    }
+
+    /**
+     * Tests generateWithImages with mixed image sources (URL and file).
+     *
+     * <p>Scenario: Combines image URL with local file in a single request. Usage:
+     * generateWithImages, mixed image sources, no streaming.
+     */
+    @Test
+    @Order(30)
+    void shouldGenerateWithMixedImageSources()
+            throws OllamaBaseException, IOException, URISyntaxException, InterruptedException {
+        api.pullModel(VISION_MODEL);
+
+        File localImage = getImageFileFromClasspath("emoji-smile.jpeg");
+        List<Object> images =
+                Arrays.asList(
+                        "https://i.pinimg.com/736x/f9/4e/cb/f94ecba040696a3a20b484d2e15159ec.jpg",
+                        localImage);
+
+        OllamaResult result =
+                api.generateWithImages(
+                        VISION_MODEL,
+                        "Describe what you see in these images",
+                        images,
+                        new OptionsBuilder().build(),
+                        null,
+                        null);
+
+        assertNotNull(result);
+        assertNotNull(result.getResponse());
+        assertFalse(result.getResponse().isEmpty());
+    }
+
+    /**
+     * Tests chat with multiple images in a single message.
+     *
+     * <p>Scenario: Sends multiple images in one chat message. Usage: chat, vision model, multiple
+     * images, no tools, no thinking, no streaming.
+     */
+    @Test
+    @Order(31)
+    void shouldChatWithMultipleImages()
+            throws OllamaBaseException,
+                    IOException,
+                    URISyntaxException,
+                    InterruptedException,
+                    ToolInvocationException {
+        api.pullModel(VISION_MODEL);
+
+        List<OllamaChatToolCalls> tools = Collections.emptyList();
+
+        File image1 = getImageFileFromClasspath("emoji-smile.jpeg");
+        File image2 = getImageFileFromClasspath("roses.jpg");
+
+        OllamaChatRequestBuilder builder = OllamaChatRequestBuilder.getInstance(VISION_MODEL);
+        OllamaChatRequest requestModel =
+                builder.withMessage(
+                                OllamaChatMessageRole.USER,
+                                "Compare these images and tell me what you see",
+                                tools,
+                                Arrays.asList(image1, image2))
+                        .build();
+        requestModel.setUseTools(false);
+        OllamaChatResult chatResult = api.chat(requestModel, null);
+
+        assertNotNull(chatResult);
+        assertNotNull(chatResult.getResponseModel());
+        assertNotNull(chatResult.getResponseModel().getMessage().getResponse());
+        assertFalse(chatResult.getResponseModel().getMessage().getResponse().isEmpty());
+    }
+
+    /**
+     * Tests error handling when model doesn't exist.
+     *
+     * <p>Scenario: Attempts to use a non-existent model and verifies proper error handling.
+     */
+    @Test
+    @Order(32)
+    void shouldHandleNonExistentModel() {
+        String nonExistentModel = "this-model-does-not-exist:latest";
+
+        assertThrows(
+                OllamaBaseException.class,
+                () -> {
+                    api.generate(
+                            nonExistentModel,
+                            "Hello",
+                            false,
+                            false,
+                            new OptionsBuilder().build(),
+                            new OllamaGenerateStreamObserver(null, null));
+                });
+    }
+
+    /**
+     * Tests chat with empty message (edge case).
+     *
+     * <p>Scenario: Sends an empty or whitespace-only message. Usage: chat, edge case testing.
+     */
+    @Test
+    @Order(33)
+    void shouldHandleEmptyMessage()
+            throws OllamaBaseException,
+                    IOException,
+                    URISyntaxException,
+                    InterruptedException,
+                    ToolInvocationException {
+        api.pullModel(GENERAL_PURPOSE_MODEL);
+
+        List<OllamaChatToolCalls> tools = Collections.emptyList();
+        OllamaChatRequestBuilder builder =
+                OllamaChatRequestBuilder.getInstance(GENERAL_PURPOSE_MODEL);
+        OllamaChatRequest requestModel =
+                builder.withMessage(OllamaChatMessageRole.USER, "   ", tools) // whitespace only
+                        .build();
+        requestModel.setUseTools(false);
+        OllamaChatResult chatResult = api.chat(requestModel, null);
+
+        assertNotNull(chatResult);
+        assertNotNull(chatResult.getResponseModel());
+        // Should handle gracefully even with empty input
+    }
+
+    /**
+     * Tests generate with very high temperature setting.
+     *
+     * <p>Scenario: Tests extreme parameter values for robustness. Usage: generate, extreme
+     * parameters, edge case testing.
+     */
+    @Test
+    @Order(34)
+    void shouldGenerateWithExtremeParameters()
+            throws OllamaBaseException, IOException, URISyntaxException, InterruptedException {
+        api.pullModel(GENERAL_PURPOSE_MODEL);
+
+        OllamaResult result =
+                api.generate(
+                        GENERAL_PURPOSE_MODEL,
+                        "Generate a random word",
+                        false,
+                        false,
+                        new OptionsBuilder()
+                                .setTemperature(2.0f) // Very high temperature
+                                .setTopP(1.0f)
+                                .setTopK(1)
+                                .build(),
+                        new OllamaGenerateStreamObserver(null, null));
+
+        assertNotNull(result);
+        assertNotNull(result.getResponse());
+    }
+
+    /**
+     * Tests embeddings with single input string.
+     *
+     * <p>Scenario: Tests embedding generation with a single string instead of array. Usage: embed,
+     * single input.
+     */
+    @Test
+    @Order(35)
+    void shouldReturnEmbeddingsForSingleInput() throws Exception {
+        api.pullModel(EMBEDDING_MODEL);
+
+        OllamaEmbedRequestModel requestModel = new OllamaEmbedRequestModel();
+        requestModel.setModel(EMBEDDING_MODEL);
+        requestModel.setInput(
+                Collections.singletonList("This is a single test sentence for embedding."));
+
+        OllamaEmbedResponseModel embeddings = api.embed(requestModel);
+
+        assertNotNull(embeddings);
+        assertFalse(embeddings.getEmbeddings().isEmpty());
+        assertEquals(1, embeddings.getEmbeddings().size());
+    }
+
+    /**
+     * Tests chat with keep-alive parameter.
+     *
+     * <p>Scenario: Tests the keep-alive parameter which controls model unloading. Usage: chat,
+     * keep-alive parameter, model lifecycle management.
+     */
+    @Test
+    @Order(36)
+    void shouldChatWithKeepAlive()
+            throws OllamaBaseException,
+                    IOException,
+                    URISyntaxException,
+                    InterruptedException,
+                    ToolInvocationException {
+        api.pullModel(GENERAL_PURPOSE_MODEL);
+
+        OllamaChatRequestBuilder builder =
+                OllamaChatRequestBuilder.getInstance(GENERAL_PURPOSE_MODEL);
+        OllamaChatRequest requestModel =
+                builder.withMessage(OllamaChatMessageRole.USER, "Hello, how are you?")
+                        .withKeepAlive("5m") // Keep model loaded for 5 minutes
+                        .build();
+        requestModel.setUseTools(false);
+        OllamaChatResult chatResult = api.chat(requestModel, null);
+
+        assertNotNull(chatResult);
+        assertNotNull(chatResult.getResponseModel());
+        assertNotNull(chatResult.getResponseModel().getMessage().getResponse());
+    }
+
+    /**
+     * Tests generate with custom context window options.
+     *
+     * <p>Scenario: Tests generation with custom context length and other advanced options. Usage:
+     * generate, advanced options, context management.
+     */
+    @Test
+    @Order(37)
+    void shouldGenerateWithAdvancedOptions()
+            throws OllamaBaseException, IOException, URISyntaxException, InterruptedException {
+        api.pullModel(GENERAL_PURPOSE_MODEL);
+
+        OllamaResult result =
+                api.generate(
+                        GENERAL_PURPOSE_MODEL,
+                        "Write a detailed explanation of machine learning",
+                        false,
+                        false,
+                        new OptionsBuilder()
+                                .setTemperature(0.7f)
+                                .setTopP(0.9f)
+                                .setTopK(40)
+                                .setNumCtx(4096) // Context window size
+                                .setRepeatPenalty(1.1f)
+                                .build(),
+                        new OllamaGenerateStreamObserver(null, null));
+
+        assertNotNull(result);
+        assertNotNull(result.getResponse());
+        assertFalse(result.getResponse().isEmpty());
+    }
+
+    /**
+     * Tests concurrent chat requests to verify thread safety.
+     *
+     * <p>Scenario: Sends multiple chat requests concurrently to test thread safety. Usage: chat,
+     * concurrency testing, thread safety.
+     */
+    @Test
+    @Order(38)
+    void shouldHandleConcurrentChatRequests()
+            throws InterruptedException, OllamaBaseException, IOException, URISyntaxException {
+        api.pullModel(GENERAL_PURPOSE_MODEL);
+
+        int numThreads = 3;
+        CountDownLatch latch = new CountDownLatch(numThreads);
+        List<OllamaChatResult> results = Collections.synchronizedList(new ArrayList<>());
+        List<Exception> exceptions = Collections.synchronizedList(new ArrayList<>());
+
+        for (int i = 0; i < numThreads; i++) {
+            final int threadId = i;
+            Thread thread =
+                    new Thread(
+                            () -> {
+                                try {
+                                    OllamaChatRequestBuilder builder =
+                                            OllamaChatRequestBuilder.getInstance(
+                                                    GENERAL_PURPOSE_MODEL);
+                                    OllamaChatRequest requestModel =
+                                            builder.withMessage(
+                                                            OllamaChatMessageRole.USER,
+                                                            "Hello from thread "
+                                                                    + threadId
+                                                                    + ". What is 2+2?")
+                                                    .build();
+                                    requestModel.setUseTools(false);
+                                    OllamaChatResult result = api.chat(requestModel, null);
+                                    results.add(result);
+                                } catch (Exception e) {
+                                    exceptions.add(e);
+                                } finally {
+                                    latch.countDown();
+                                }
+                            });
+            thread.start();
+        }
+
+        latch.await(60, java.util.concurrent.TimeUnit.SECONDS);
+
+        assertTrue(exceptions.isEmpty(), "No exceptions should occur during concurrent requests");
+        assertEquals(numThreads, results.size(), "All requests should complete successfully");
+
+        for (OllamaChatResult result : results) {
+            assertNotNull(result);
+            assertNotNull(result.getResponseModel());
+            assertNotNull(result.getResponseModel().getMessage().getResponse());
+        }
+    }
+
     /**
      * Utility method to retrieve an image file from the classpath.
+     *
      * <p>
+     *
      * @param fileName the name of the image file
      * @return the File object for the image
      */
@@ -1026,8 +1622,8 @@ class OllamaAPIIntegrationTest {
 
     /**
      * Returns a ToolSpecification for an employee finder tool.
-     * <p>
-     * This tool can be registered with the OllamaAPI to enable tool-calling scenarios in chat.
+     *
+     * <p>This tool can be registered with the OllamaAPI to enable tool-calling scenarios in chat.
      * The tool accepts employee-name, employee-address, and employee-phone as parameters.
      */
     private Tools.ToolSpecification employeeFinderTool() {
@@ -1058,11 +1654,11 @@ class OllamaAPIIntegrationTest {
                                                                                                         "string")
                                                                                                 .description(
                                                                                                         "The name"
-                                                                                                            + " of the"
-                                                                                                            + " employee,"
-                                                                                                            + " e.g."
-                                                                                                            + " John"
-                                                                                                            + " Doe")
+                                                                                                                + " of the"
+                                                                                                                + " employee,"
+                                                                                                                + " e.g."
+                                                                                                                + " John"
+                                                                                                                + " Doe")
                                                                                                 .required(
                                                                                                         true)
                                                                                                 .build())
@@ -1076,16 +1672,16 @@ class OllamaAPIIntegrationTest {
                                                                                                         "string")
                                                                                                 .description(
                                                                                                         "The address"
-                                                                                                            + " of the"
-                                                                                                            + " employee,"
-                                                                                                            + " Always"
-                                                                                                            + " eturns"
-                                                                                                            + " a random"
-                                                                                                            + " address."
-                                                                                                            + " For example,"
-                                                                                                            + " Church"
-                                                                                                            + " St, Bengaluru,"
-                                                                                                            + " India")
+                                                                                                                + " of the"
+                                                                                                                + " employee,"
+                                                                                                                + " Always"
+                                                                                                                + " returns"
+                                                                                                                + " a random"
+                                                                                                                + " address."
+                                                                                                                + " For example,"
+                                                                                                                + " Church"
+                                                                                                                + " St, Bengaluru,"
+                                                                                                                + " India")
                                                                                                 .required(
                                                                                                         true)
                                                                                                 .build())
@@ -1099,16 +1695,16 @@ class OllamaAPIIntegrationTest {
                                                                                                         "string")
                                                                                                 .description(
                                                                                                         "The phone"
-                                                                                                            + " number"
-                                                                                                            + " of the"
-                                                                                                            + " employee."
-                                                                                                            + " Always"
-                                                                                                            + " returns"
-                                                                                                            + " a random"
-                                                                                                            + " phone"
-                                                                                                            + " number."
-                                                                                                            + " For example,"
-                                                                                                            + " 9911002233")
+                                                                                                                + " number"
+                                                                                                                + " of the"
+                                                                                                                + " employee."
+                                                                                                                + " Always"
+                                                                                                                + " returns"
+                                                                                                                + " a random"
+                                                                                                                + " phone"
+                                                                                                                + " number."
+                                                                                                                + " For example,"
+                                                                                                                + " 9911002233")
                                                                                                 .required(
                                                                                                         true)
                                                                                                 .build())
diff --git a/src/test/resources/test-config.properties b/src/test/resources/test-config.properties
index 62f46dd..0b656cb 100644
--- a/src/test/resources/test-config.properties
+++ b/src/test/resources/test-config.properties
@@ -1,4 +1,5 @@
 USE_EXTERNAL_OLLAMA_HOST=true
 OLLAMA_HOST=http://192.168.29.229:11434/
 REQUEST_TIMEOUT_SECONDS=120
-NUMBER_RETRIES_FOR_MODEL_PULL=3
\ No newline at end of file
+NUMBER_RETRIES_FOR_MODEL_PULL=3
+MODEL_KEEP_ALIVE_TIME=0
\ No newline at end of file