refactor: clean up and deprecate unused methods in OllamaAPI and related classes

- Removed deprecated methods and unused imports from `OllamaAPI`. - Updated method signatures to improve clarity and consistency. - Refactored embedding request handling to utilize `OllamaEmbedRequestModel`. - Adjusted integration tests to reflect changes in method usage and removed obsolete tests. - Enhanced code readability by standardizing formatting and comments across various classes.
2025-10-14 01:18:58 +02:00 · 2025-09-16 00:27:11 +05:30 · 2025-09-16 00:27:11 +05:30 · 656802b343
commit 656802b343
parent 44c6236243
31 changed files with 558 additions and 959 deletions
--- a/src/main/java/io/github/ollama4j/OllamaAPI.java
+++ b/src/main/java/io/github/ollama4j/OllamaAPI.java
@ -9,8 +9,6 @@ import io.github.ollama4j.exceptions.ToolNotFoundException;
 import io.github.ollama4j.models.chat.*;
 import io.github.ollama4j.models.embeddings.OllamaEmbedRequestModel;
 import io.github.ollama4j.models.embeddings.OllamaEmbedResponseModel;
-import io.github.ollama4j.models.embeddings.OllamaEmbeddingResponseModel;
-import io.github.ollama4j.models.embeddings.OllamaEmbeddingsRequestModel;
 import io.github.ollama4j.models.generate.OllamaGenerateRequest;
 import io.github.ollama4j.models.generate.OllamaStreamHandler;
 import io.github.ollama4j.models.generate.OllamaTokenHandler;
@ -25,10 +23,6 @@ import io.github.ollama4j.utils.Constants;
 import io.github.ollama4j.utils.Options;
 import io.github.ollama4j.utils.Utils;
 import lombok.Setter;
-import org.jsoup.Jsoup;
-import org.jsoup.nodes.Document;
-import org.jsoup.nodes.Element;
-import org.jsoup.select.Elements;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;

@ -233,182 +227,6 @@ public class OllamaAPI {
        }
    }

-    /**
-     * Retrieves a list of models from the Ollama library. This method fetches the
-     * available models directly from Ollama
-     * library page, including model details such as the name, pull count, popular
-     * tags, tag count, and the time when model was updated.
-     *
-     * @return A list of {@link LibraryModel} objects representing the models
-     * available in the Ollama library.
-     * @throws OllamaBaseException  If the HTTP request fails or the response is not
-     *                              successful (non-200 status code).
-     * @throws IOException          If an I/O error occurs during the HTTP request
-     *                              or response processing.
-     * @throws InterruptedException If the thread executing the request is
-     *                              interrupted.
-     * @throws URISyntaxException   If there is an error creating the URI for the
-     *                              HTTP request.
-     */
-    public List<LibraryModel> listModelsFromLibrary()
-            throws OllamaBaseException, IOException, InterruptedException, URISyntaxException {
-        String url = "https://ollama.com/library";
-        HttpClient httpClient = HttpClient.newHttpClient();
-        HttpRequest httpRequest = getRequestBuilderDefault(new URI(url))
-                .header(Constants.HttpConstants.HEADER_KEY_ACCEPT, Constants.HttpConstants.APPLICATION_JSON)
-                .header(Constants.HttpConstants.HEADER_KEY_CONTENT_TYPE, Constants.HttpConstants.APPLICATION_JSON).GET()
-                .build();
-        HttpResponse<String> response = httpClient.send(httpRequest, HttpResponse.BodyHandlers.ofString());
-        int statusCode = response.statusCode();
-        String responseString = response.body();
-        List<LibraryModel> models = new ArrayList<>();
-        if (statusCode == 200) {
-            Document doc = Jsoup.parse(responseString);
-            Elements modelSections = doc.selectXpath("//*[@id='repo']/ul/li/a");
-            for (Element e : modelSections) {
-                LibraryModel model = new LibraryModel();
-                Elements names = e.select("div > h2 > div > span");
-                Elements desc = e.select("div > p");
-                Elements pullCounts = e.select("div:nth-of-type(2) > p > span:first-of-type > span:first-of-type");
-                Elements popularTags = e.select("div > div > span");
-                Elements totalTags = e.select("div:nth-of-type(2) > p > span:nth-of-type(2) > span:first-of-type");
-                Elements lastUpdatedTime = e
-                        .select("div:nth-of-type(2) > p > span:nth-of-type(3) > span:nth-of-type(2)");
-
-                if (names.first() == null || names.isEmpty()) {
-                    // if name cannot be extracted, skip.
-                    continue;
-                }
-                Optional.ofNullable(names.first()).map(Element::text).ifPresent(model::setName);
-                model.setDescription(Optional.ofNullable(desc.first()).map(Element::text).orElse(""));
-                model.setPopularTags(Optional.of(popularTags)
-                        .map(tags -> tags.stream().map(Element::text).collect(Collectors.toList()))
-                        .orElse(new ArrayList<>()));
-                model.setPullCount(Optional.ofNullable(pullCounts.first()).map(Element::text).orElse(""));
-                model.setTotalTags(
-                        Optional.ofNullable(totalTags.first()).map(Element::text).map(Integer::parseInt).orElse(0));
-                model.setLastUpdated(Optional.ofNullable(lastUpdatedTime.first()).map(Element::text).orElse(""));
-
-                models.add(model);
-            }
-            return models;
-        } else {
-            throw new OllamaBaseException(statusCode + " - " + responseString);
-        }
-    }
-
-    /**
-     * Fetches the tags associated with a specific model from Ollama library.
-     * This method fetches the available model tags directly from Ollama library
-     * model page, including model tag name, size and time when model was last
-     * updated
-     * into a list of {@link LibraryModelTag} objects.
-     *
-     * @param libraryModel the {@link LibraryModel} object which contains the name
-     *                     of the library model
-     *                     for which the tags need to be fetched.
-     * @return a list of {@link LibraryModelTag} objects containing the extracted
-     * tags and their associated metadata.
-     * @throws OllamaBaseException  if the HTTP response status code indicates an
-     *                              error (i.e., not 200 OK),
-     *                              or if there is any other issue during the
-     *                              request or response processing.
-     * @throws IOException          if an input/output exception occurs during the
-     *                              HTTP request or response handling.
-     * @throws InterruptedException if the thread is interrupted while waiting for
-     *                              the HTTP response.
-     * @throws URISyntaxException   if the URI format is incorrect or invalid.
-     */
-    public LibraryModelDetail getLibraryModelDetails(LibraryModel libraryModel)
-            throws OllamaBaseException, IOException, InterruptedException, URISyntaxException {
-        String url = String.format("https://ollama.com/library/%s/tags", libraryModel.getName());
-        HttpClient httpClient = HttpClient.newHttpClient();
-        HttpRequest httpRequest = getRequestBuilderDefault(new URI(url))
-                .header(Constants.HttpConstants.HEADER_KEY_ACCEPT, Constants.HttpConstants.APPLICATION_JSON)
-                .header(Constants.HttpConstants.HEADER_KEY_CONTENT_TYPE, Constants.HttpConstants.APPLICATION_JSON).GET()
-                .build();
-        HttpResponse<String> response = httpClient.send(httpRequest, HttpResponse.BodyHandlers.ofString());
-        int statusCode = response.statusCode();
-        String responseString = response.body();
-
-        List<LibraryModelTag> libraryModelTags = new ArrayList<>();
-        if (statusCode == 200) {
-            Document doc = Jsoup.parse(responseString);
-            Elements tagSections = doc
-                    .select("html > body > main > div > section > div > div > div:nth-child(n+2) > div");
-            for (Element e : tagSections) {
-                Elements tags = e.select("div > a > div");
-                Elements tagsMetas = e.select("div > span");
-
-                LibraryModelTag libraryModelTag = new LibraryModelTag();
-
-                if (tags.first() == null || tags.isEmpty()) {
-                    // if tag cannot be extracted, skip.
-                    continue;
-                }
-                libraryModelTag.setName(libraryModel.getName());
-                Optional.ofNullable(tags.first()).map(Element::text).ifPresent(libraryModelTag::setTag);
-                libraryModelTag.setSize(Optional.ofNullable(tagsMetas.first()).map(element -> element.text().split("•"))
-                        .filter(parts -> parts.length > 1).map(parts -> parts[1].trim()).orElse(""));
-                libraryModelTag
-                        .setLastUpdated(Optional.ofNullable(tagsMetas.first()).map(element -> element.text().split("•"))
-                                .filter(parts -> parts.length > 1).map(parts -> parts[2].trim()).orElse(""));
-                libraryModelTags.add(libraryModelTag);
-            }
-            LibraryModelDetail libraryModelDetail = new LibraryModelDetail();
-            libraryModelDetail.setModel(libraryModel);
-            libraryModelDetail.setTags(libraryModelTags);
-            return libraryModelDetail;
-        } else {
-            throw new OllamaBaseException(statusCode + " - " + responseString);
-        }
-    }
-
-    /**
-     * Finds a specific model using model name and tag from Ollama library.
-     * <p>
-     * <b>Deprecated:</b> This method relies on the HTML structure of the Ollama
-     * website,
-     * which is subject to change at any time. As a result, it is difficult to keep
-     * this API
-     * method consistently updated and reliable. Therefore, this method is
-     * deprecated and
-     * may be removed in future releases.
-     * <p>
-     * This method retrieves the model from the Ollama library by its name, then
-     * fetches its tags.
-     * It searches through the tags of the model to find one that matches the
-     * specified tag name.
-     * If the model or the tag is not found, it throws a
-     * {@link NoSuchElementException}.
-     *
-     * @param modelName The name of the model to search for in the library.
-     * @param tag       The tag name to search for within the specified model.
-     * @return The {@link LibraryModelTag} associated with the specified model and
-     * tag.
-     * @throws OllamaBaseException    If there is a problem with the Ollama library
-     *                                operations.
-     * @throws IOException            If an I/O error occurs during the operation.
-     * @throws URISyntaxException     If there is an error with the URI syntax.
-     * @throws InterruptedException   If the operation is interrupted.
-     * @throws NoSuchElementException If the model or the tag is not found.
-     * @deprecated This method relies on the HTML structure of the Ollama website,
-     * which can change at any time and break this API. It is deprecated
-     * and may be removed in the future.
-     */
-    @Deprecated
-    public LibraryModelTag findModelTagFromLibrary(String modelName, String tag)
-            throws OllamaBaseException, IOException, URISyntaxException, InterruptedException {
-        List<LibraryModel> libraryModels = this.listModelsFromLibrary();
-        LibraryModel libraryModel = libraryModels.stream().filter(model -> model.getName().equals(modelName))
-                .findFirst().orElseThrow(
-                        () -> new NoSuchElementException(String.format("Model by name '%s' not found", modelName)));
-        LibraryModelDetail libraryModelDetail = this.getLibraryModelDetails(libraryModel);
-        return libraryModelDetail.getTags().stream().filter(tagName -> tagName.getTag().equals(tag)).findFirst()
-                .orElseThrow(() -> new NoSuchElementException(
-                        String.format("Tag '%s' for model '%s' not found", tag, modelName)));
-    }
-
    /**
     * Pull a model on the Ollama server from the list of <a
     * href="https://ollama.ai/library">available models</a>.
@ -584,80 +402,6 @@ public class OllamaAPI {
        }
    }

-    /**
-     * Create a custom model from a model file. Read more about custom model file
-     * creation <a
-     * href=
-     * "https://github.com/jmorganca/ollama/blob/main/docs/modelfile.md">here</a>.
-     *
-     * @param modelName     the name of the custom model to be created.
-     * @param modelFilePath the path to model file that exists on the Ollama server.
-     * @throws OllamaBaseException  if the response indicates an error status
-     * @throws IOException          if an I/O error occurs during the HTTP request
-     * @throws InterruptedException if the operation is interrupted
-     * @throws URISyntaxException   if the URI for the request is malformed
-     */
-    @Deprecated
-    public void createModelWithFilePath(String modelName, String modelFilePath)
-            throws IOException, InterruptedException, OllamaBaseException, URISyntaxException {
-        String url = this.host + "/api/create";
-        String jsonData = new CustomModelFilePathRequest(modelName, modelFilePath).toString();
-        HttpRequest request = getRequestBuilderDefault(new URI(url))
-                .header(Constants.HttpConstants.HEADER_KEY_ACCEPT, Constants.HttpConstants.APPLICATION_JSON)
-                .header(Constants.HttpConstants.HEADER_KEY_CONTENT_TYPE, Constants.HttpConstants.APPLICATION_JSON)
-                .POST(HttpRequest.BodyPublishers.ofString(jsonData, StandardCharsets.UTF_8)).build();
-        HttpClient client = HttpClient.newHttpClient();
-        HttpResponse<String> response = client.send(request, HttpResponse.BodyHandlers.ofString());
-        int statusCode = response.statusCode();
-        String responseString = response.body();
-        if (statusCode != 200) {
-            throw new OllamaBaseException(statusCode + " - " + responseString);
-        }
-        // FIXME: Ollama API returns HTTP status code 200 for model creation failure
-        // cases. Correct this
-        // if the issue is fixed in the Ollama API server.
-        if (responseString.contains("error")) {
-            throw new OllamaBaseException(responseString);
-        }
-        LOG.debug(responseString);
-    }
-
-    /**
-     * Create a custom model from a model file. Read more about custom model file
-     * creation <a
-     * href=
-     * "https://github.com/jmorganca/ollama/blob/main/docs/modelfile.md">here</a>.
-     *
-     * @param modelName         the name of the custom model to be created.
-     * @param modelFileContents the path to model file that exists on the Ollama
-     *                          server.
-     * @throws OllamaBaseException  if the response indicates an error status
-     * @throws IOException          if an I/O error occurs during the HTTP request
-     * @throws InterruptedException if the operation is interrupted
-     * @throws URISyntaxException   if the URI for the request is malformed
-     */
-    @Deprecated
-    public void createModelWithModelFileContents(String modelName, String modelFileContents)
-            throws IOException, InterruptedException, OllamaBaseException, URISyntaxException {
-        String url = this.host + "/api/create";
-        String jsonData = new CustomModelFileContentsRequest(modelName, modelFileContents).toString();
-        HttpRequest request = getRequestBuilderDefault(new URI(url))
-                .header(Constants.HttpConstants.HEADER_KEY_ACCEPT, Constants.HttpConstants.APPLICATION_JSON)
-                .header(Constants.HttpConstants.HEADER_KEY_CONTENT_TYPE, Constants.HttpConstants.APPLICATION_JSON)
-                .POST(HttpRequest.BodyPublishers.ofString(jsonData, StandardCharsets.UTF_8)).build();
-        HttpClient client = HttpClient.newHttpClient();
-        HttpResponse<String> response = client.send(request, HttpResponse.BodyHandlers.ofString());
-        int statusCode = response.statusCode();
-        String responseString = response.body();
-        if (statusCode != 200) {
-            throw new OllamaBaseException(statusCode + " - " + responseString);
-        }
-        if (responseString.contains("error")) {
-            throw new OllamaBaseException(responseString);
-        }
-        LOG.debug(responseString);
-    }
-
    /**
     * Create a custom model. Read more about custom model creation <a
     * href=
@ -722,70 +466,6 @@ public class OllamaAPI {
        }
    }

-    /**
-     * Generate embeddings for a given text from a model
-     *
-     * @param model  name of model to generate embeddings from
-     * @param prompt text to generate embeddings for
-     * @return embeddings
-     * @throws OllamaBaseException  if the response indicates an error status
-     * @throws IOException          if an I/O error occurs during the HTTP request
-     * @throws InterruptedException if the operation is interrupted
-     * @deprecated Use {@link #embed(String, List)} instead.
-     */
-    @Deprecated
-    public List<Double> generateEmbeddings(String model, String prompt)
-            throws IOException, InterruptedException, OllamaBaseException {
-        return generateEmbeddings(new OllamaEmbeddingsRequestModel(model, prompt));
-    }
-
-    /**
-     * Generate embeddings using a {@link OllamaEmbeddingsRequestModel}.
-     *
-     * @param modelRequest request for '/api/embeddings' endpoint
-     * @return embeddings
-     * @throws OllamaBaseException  if the response indicates an error status
-     * @throws IOException          if an I/O error occurs during the HTTP request
-     * @throws InterruptedException if the operation is interrupted
-     * @deprecated Use {@link #embed(OllamaEmbedRequestModel)} instead.
-     */
-    @Deprecated
-    public List<Double> generateEmbeddings(OllamaEmbeddingsRequestModel modelRequest)
-            throws IOException, InterruptedException, OllamaBaseException {
-        URI uri = URI.create(this.host + "/api/embeddings");
-        String jsonData = modelRequest.toString();
-        HttpClient httpClient = HttpClient.newHttpClient();
-        HttpRequest.Builder requestBuilder = getRequestBuilderDefault(uri)
-                .header(Constants.HttpConstants.HEADER_KEY_ACCEPT, Constants.HttpConstants.APPLICATION_JSON)
-                .POST(HttpRequest.BodyPublishers.ofString(jsonData));
-        HttpRequest request = requestBuilder.build();
-        HttpResponse<String> response = httpClient.send(request, HttpResponse.BodyHandlers.ofString());
-        int statusCode = response.statusCode();
-        String responseBody = response.body();
-        if (statusCode == 200) {
-            OllamaEmbeddingResponseModel embeddingResponse = Utils.getObjectMapper().readValue(responseBody,
-                    OllamaEmbeddingResponseModel.class);
-            return embeddingResponse.getEmbedding();
-        } else {
-            throw new OllamaBaseException(statusCode + " - " + responseBody);
-        }
-    }
-
-    /**
-     * Generate embeddings for a given text from a model
-     *
-     * @param model  name of model to generate embeddings from
-     * @param inputs text/s to generate embeddings for
-     * @return embeddings
-     * @throws OllamaBaseException  if the response indicates an error status
-     * @throws IOException          if an I/O error occurs during the HTTP request
-     * @throws InterruptedException if the operation is interrupted
-     */
-    public OllamaEmbedResponseModel embed(String model, List<String> inputs)
-            throws IOException, InterruptedException, OllamaBaseException {
-        return embed(new OllamaEmbedRequestModel(model, inputs));
-    }
-
    /**
     * Generate embeddings using a {@link OllamaEmbedRequestModel}.
     *
@ -1068,7 +748,7 @@ public class OllamaAPI {
     * </p>
     *
     * <pre>{@code
-     * OllamaAsyncResultStreamer resultStreamer = ollamaAPI.generateAsync("gpt-oss:20b", "Who are you", false, true);
+     * OllamaAsyncResultStreamer resultStreamer = ollamaAPI.generate("gpt-oss:20b", "Who are you", false, true);
     * int pollIntervalMilliseconds = 1000;
     * while (true) {
     *     String thinkingTokens = resultStreamer.getThinkingResponseStream().poll();
@ -1155,86 +835,7 @@ public class OllamaAPI {
    }

    /**
-     * Ask a question to a model based on a given message stack (i.e. a chat
-     * history). Creates a synchronous call to the api
-     * 'api/chat'.
-     *
-     * @param model    the ollama model to ask the question to
-     * @param messages chat history / message stack to send to the model
-     * @return {@link OllamaChatResult} containing the api response and the message
-     * history including the newly acquired assistant response.
-     * @throws OllamaBaseException     any response code than 200 has been returned
-     * @throws IOException             in case the responseStream can not be read
-     * @throws InterruptedException    in case the server is not reachable or
-     *                                 network
-     *                                 issues happen
-     * @throws OllamaBaseException     if the response indicates an error status
-     * @throws IOException             if an I/O error occurs during the HTTP
-     *                                 request
-     * @throws InterruptedException    if the operation is interrupted
-     * @throws ToolInvocationException if the tool invocation fails
-     */
-    public OllamaChatResult chat(String model, List<OllamaChatMessage> messages)
-            throws OllamaBaseException, IOException, InterruptedException, ToolInvocationException {
-        OllamaChatRequestBuilder builder = OllamaChatRequestBuilder.getInstance(model);
-        return chat(builder.withMessages(messages).build());
-    }
-
-    /**
-     * Ask a question to a model using an {@link OllamaChatRequest}. This can be
-     * constructed using an {@link OllamaChatRequestBuilder}.
-     * <p>
-     * Hint: the OllamaChatRequestModel#getStream() property is not implemented.
-     *
-     * @param request request object to be sent to the server
-     * @return {@link OllamaChatResult}
-     * @throws OllamaBaseException     any response code than 200 has been returned
-     * @throws IOException             in case the responseStream can not be read
-     * @throws InterruptedException    in case the server is not reachable or
-     *                                 network
-     *                                 issues happen
-     * @throws OllamaBaseException     if the response indicates an error status
-     * @throws IOException             if an I/O error occurs during the HTTP
-     *                                 request
-     * @throws InterruptedException    if the operation is interrupted
-     * @throws ToolInvocationException if the tool invocation fails
-     */
-    public OllamaChatResult chat(OllamaChatRequest request)
-            throws OllamaBaseException, IOException, InterruptedException, ToolInvocationException {
-        return chat(request, null, null);
-    }
-
-    /**
-     * Ask a question to a model using an {@link OllamaChatRequest}. This can be
-     * constructed using an {@link OllamaChatRequestBuilder}.
-     * <p>
-     * Hint: the OllamaChatRequestModel#getStream() property is not implemented.
-     *
-     * @param request               request object to be sent to the server
-     * @param responseStreamHandler callback handler to handle the last message from
-     *                              stream
-     * @param thinkingStreamHandler callback handler to handle the last thinking
-     *                              message from stream
-     * @return {@link OllamaChatResult}
-     * @throws OllamaBaseException     any response code than 200 has been returned
-     * @throws IOException             in case the responseStream can not be read
-     * @throws InterruptedException    in case the server is not reachable or
-     *                                 network
-     *                                 issues happen
-     * @throws OllamaBaseException     if the response indicates an error status
-     * @throws IOException             if an I/O error occurs during the HTTP
-     *                                 request
-     * @throws InterruptedException    if the operation is interrupted
-     * @throws ToolInvocationException if the tool invocation fails
-     */
-    public OllamaChatResult chat(OllamaChatRequest request, OllamaStreamHandler thinkingStreamHandler,
-                                 OllamaStreamHandler responseStreamHandler)
-            throws OllamaBaseException, IOException, InterruptedException, ToolInvocationException {
-        return chatStreaming(request, new OllamaChatStreamObserver(thinkingStreamHandler, responseStreamHandler));
-    }
-
-    /**
-     * Ask a question to a model using an {@link OllamaChatRequest}. This can be
+     * Ask a question to a model using an {@link OllamaChatRequest} and set up streaming response. This can be
     * constructed using an {@link OllamaChatRequestBuilder}.
     * <p>
     * Hint: the OllamaChatRequestModel#getStream() property is not implemented.
@ -1252,7 +853,7 @@ public class OllamaAPI {
     * @throws IOException          if an I/O error occurs during the HTTP request
     * @throws InterruptedException if the operation is interrupted
     */
-    public OllamaChatResult chatStreaming(OllamaChatRequest request, OllamaTokenHandler tokenHandler)
+    public OllamaChatResult chat(OllamaChatRequest request, OllamaTokenHandler tokenHandler)
            throws OllamaBaseException, IOException, InterruptedException, ToolInvocationException {
        OllamaChatEndpointCaller requestCaller = new OllamaChatEndpointCaller(host, auth, requestTimeoutSeconds);
        OllamaChatResult result;
--- a/src/main/java/io/github/ollama4j/models/chat/OllamaChatResult.java
+++ b/src/main/java/io/github/ollama4j/models/chat/OllamaChatResult.java
@ -38,17 +38,17 @@ public class OllamaChatResult {
    }

    @Deprecated
-    public String getResponse(){
+    public String getResponse() {
        return responseModel != null ? responseModel.getMessage().getContent() : "";
    }

    @Deprecated
-    public int getHttpStatusCode(){
+    public int getHttpStatusCode() {
        return 200;
    }

    @Deprecated
-    public long getResponseTime(){
+    public long getResponseTime() {
        return responseModel != null ? responseModel.getTotalDuration() : 0L;
    }
 }
--- a/src/main/java/io/github/ollama4j/models/embeddings/OllamaEmbedRequestBuilder.java
+++ b/src/main/java/io/github/ollama4j/models/embeddings/OllamaEmbedRequestBuilder.java
@ -12,24 +12,24 @@ public class OllamaEmbedRequestBuilder {
    private final OllamaEmbedRequestModel request;

    private OllamaEmbedRequestBuilder(String model, List<String> input) {
-        this.request = new OllamaEmbedRequestModel(model,input);
+        this.request = new OllamaEmbedRequestModel(model, input);
    }

-    public static OllamaEmbedRequestBuilder getInstance(String model, String... input){
+    public static OllamaEmbedRequestBuilder getInstance(String model, String... input) {
        return new OllamaEmbedRequestBuilder(model, List.of(input));
    }

-    public OllamaEmbedRequestBuilder withOptions(Options options){
+    public OllamaEmbedRequestBuilder withOptions(Options options) {
        this.request.setOptions(options.getOptionsMap());
        return this;
    }

-    public OllamaEmbedRequestBuilder withKeepAlive(String keepAlive){
+    public OllamaEmbedRequestBuilder withKeepAlive(String keepAlive) {
        this.request.setKeepAlive(keepAlive);
        return this;
    }

-    public OllamaEmbedRequestBuilder withoutTruncate(){
+    public OllamaEmbedRequestBuilder withoutTruncate() {
        this.request.setTruncate(false);
        return this;
    }
--- a/src/main/java/io/github/ollama4j/models/embeddings/OllamaEmbeddingResponseModel.java
+++ b/src/main/java/io/github/ollama4j/models/embeddings/OllamaEmbeddingResponseModel.java
@ -7,7 +7,7 @@ import java.util.List;

@SuppressWarnings("unused")
@Data
-@Deprecated(since="1.0.90")
+@Deprecated(since = "1.0.90")
 public class OllamaEmbeddingResponseModel {
    @JsonProperty("embedding")
    private List<Double> embedding;
--- a/src/main/java/io/github/ollama4j/models/embeddings/OllamaEmbeddingsRequestBuilder.java
+++ b/src/main/java/io/github/ollama4j/models/embeddings/OllamaEmbeddingsRequestBuilder.java
@ -2,29 +2,29 @@ package io.github.ollama4j.models.embeddings;

 import io.github.ollama4j.utils.Options;

-@Deprecated(since="1.0.90")
+@Deprecated(since = "1.0.90")
 public class OllamaEmbeddingsRequestBuilder {

-    private OllamaEmbeddingsRequestBuilder(String model, String prompt){
+    private OllamaEmbeddingsRequestBuilder(String model, String prompt) {
        request = new OllamaEmbeddingsRequestModel(model, prompt);
    }

    private OllamaEmbeddingsRequestModel request;

-    public static OllamaEmbeddingsRequestBuilder getInstance(String model, String prompt){
+    public static OllamaEmbeddingsRequestBuilder getInstance(String model, String prompt) {
        return new OllamaEmbeddingsRequestBuilder(model, prompt);
    }

-    public OllamaEmbeddingsRequestModel build(){
+    public OllamaEmbeddingsRequestModel build() {
        return request;
    }

-    public OllamaEmbeddingsRequestBuilder withOptions(Options options){
+    public OllamaEmbeddingsRequestBuilder withOptions(Options options) {
        this.request.setOptions(options.getOptionsMap());
        return this;
    }

-    public OllamaEmbeddingsRequestBuilder withKeepAlive(String keepAlive){
+    public OllamaEmbeddingsRequestBuilder withKeepAlive(String keepAlive) {
        this.request.setKeepAlive(keepAlive);
        return this;
    }
--- a/src/main/java/io/github/ollama4j/models/embeddings/OllamaEmbeddingsRequestModel.java
+++ b/src/main/java/io/github/ollama4j/models/embeddings/OllamaEmbeddingsRequestModel.java
@ -14,23 +14,23 @@ import static io.github.ollama4j.utils.Utils.getObjectMapper;
@Data
@RequiredArgsConstructor
@NoArgsConstructor
-@Deprecated(since="1.0.90")
+@Deprecated(since = "1.0.90")
 public class OllamaEmbeddingsRequestModel {
-  @NonNull
-  private String model;
-  @NonNull
-  private String prompt;
+    @NonNull
+    private String model;
+    @NonNull
+    private String prompt;

-  protected Map<String, Object> options;
-  @JsonProperty(value = "keep_alive")
-  private String keepAlive;
+    protected Map<String, Object> options;
+    @JsonProperty(value = "keep_alive")
+    private String keepAlive;

-  @Override
-  public String toString() {
-    try {
-      return getObjectMapper().writerWithDefaultPrettyPrinter().writeValueAsString(this);
-    } catch (JsonProcessingException e) {
-      throw new RuntimeException(e);
+    @Override
+    public String toString() {
+        try {
+            return getObjectMapper().writerWithDefaultPrettyPrinter().writeValueAsString(this);
+        } catch (JsonProcessingException e) {
+            throw new RuntimeException(e);
+        }
    }
-  }
 }
--- a/src/main/java/io/github/ollama4j/models/generate/OllamaGenerateRequest.java
+++ b/src/main/java/io/github/ollama4j/models/generate/OllamaGenerateRequest.java
@ -7,7 +7,6 @@ import lombok.Getter;
 import lombok.Setter;

 import java.util.List;
-import java.util.Map;

@Getter
@Setter
--- a/src/main/java/io/github/ollama4j/models/generate/OllamaGenerateRequestBuilder.java
+++ b/src/main/java/io/github/ollama4j/models/generate/OllamaGenerateRequestBuilder.java
@ -8,46 +8,46 @@ import io.github.ollama4j.utils.Options;
 */
 public class OllamaGenerateRequestBuilder {

-    private OllamaGenerateRequestBuilder(String model, String prompt){
+    private OllamaGenerateRequestBuilder(String model, String prompt) {
        request = new OllamaGenerateRequest(model, prompt);
    }

    private OllamaGenerateRequest request;

-    public static OllamaGenerateRequestBuilder getInstance(String model){
-        return new OllamaGenerateRequestBuilder(model,"");
+    public static OllamaGenerateRequestBuilder getInstance(String model) {
+        return new OllamaGenerateRequestBuilder(model, "");
    }

-    public OllamaGenerateRequest build(){
+    public OllamaGenerateRequest build() {
        return request;
    }

-    public OllamaGenerateRequestBuilder withPrompt(String prompt){
+    public OllamaGenerateRequestBuilder withPrompt(String prompt) {
        request.setPrompt(prompt);
        return this;
    }

-    public OllamaGenerateRequestBuilder withGetJsonResponse(){
+    public OllamaGenerateRequestBuilder withGetJsonResponse() {
        this.request.setFormat("json");
        return this;
    }

-    public OllamaGenerateRequestBuilder withOptions(Options options){
+    public OllamaGenerateRequestBuilder withOptions(Options options) {
        this.request.setOptions(options.getOptionsMap());
        return this;
    }

-    public OllamaGenerateRequestBuilder withTemplate(String template){
+    public OllamaGenerateRequestBuilder withTemplate(String template) {
        this.request.setTemplate(template);
        return this;
    }

-    public OllamaGenerateRequestBuilder withStreaming(){
+    public OllamaGenerateRequestBuilder withStreaming() {
        this.request.setStream(true);
        return this;
    }

-    public OllamaGenerateRequestBuilder withKeepAlive(String keepAlive){
+    public OllamaGenerateRequestBuilder withKeepAlive(String keepAlive) {
        this.request.setKeepAlive(keepAlive);
        return this;
    }
--- a/src/main/java/io/github/ollama4j/models/request/Auth.java
+++ b/src/main/java/io/github/ollama4j/models/request/Auth.java
@ -1,10 +1,10 @@
 package io.github.ollama4j.models.request;

 public abstract class Auth {
-  /**
-   * Get authentication header value.
-   *
-   * @return authentication header value
-   */
-  public abstract String getAuthHeaderValue();
+    /**
+     * Get authentication header value.
+     *
+     * @return authentication header value
+     */
+    public abstract String getAuthHeaderValue();
 }
--- a/src/main/java/io/github/ollama4j/models/request/BasicAuth.java
+++ b/src/main/java/io/github/ollama4j/models/request/BasicAuth.java
@ -10,16 +10,16 @@ import java.util.Base64;
@AllArgsConstructor
@EqualsAndHashCode(callSuper = false)
 public class BasicAuth extends Auth {
-  private String username;
-  private String password;
+    private String username;
+    private String password;

-  /**
-   * Get basic authentication header value.
-   *
-   * @return basic authentication header value (encoded credentials)
-   */
-  public String getAuthHeaderValue() {
-      final String credentialsToEncode = this.getUsername() + ":" + this.getPassword();
-      return "Basic " + Base64.getEncoder().encodeToString(credentialsToEncode.getBytes());
-  }
+    /**
+     * Get basic authentication header value.
+     *
+     * @return basic authentication header value (encoded credentials)
+     */
+    public String getAuthHeaderValue() {
+        final String credentialsToEncode = this.getUsername() + ":" + this.getPassword();
+        return "Basic " + Base64.getEncoder().encodeToString(credentialsToEncode.getBytes());
+    }
 }
--- a/src/main/java/io/github/ollama4j/models/request/CustomModelFileContentsRequest.java
+++ b/src/main/java/io/github/ollama4j/models/request/CustomModelFileContentsRequest.java
@ -9,15 +9,15 @@ import static io.github.ollama4j.utils.Utils.getObjectMapper;
@Data
@AllArgsConstructor
 public class CustomModelFileContentsRequest {
-  private String name;
-  private String modelfile;
+    private String name;
+    private String modelfile;

-  @Override
-  public String toString() {
-    try {
-      return getObjectMapper().writerWithDefaultPrettyPrinter().writeValueAsString(this);
-    } catch (JsonProcessingException e) {
-      throw new RuntimeException(e);
+    @Override
+    public String toString() {
+        try {
+            return getObjectMapper().writerWithDefaultPrettyPrinter().writeValueAsString(this);
+        } catch (JsonProcessingException e) {
+            throw new RuntimeException(e);
+        }
    }
-  }
 }
--- a/src/main/java/io/github/ollama4j/models/request/CustomModelFilePathRequest.java
+++ b/src/main/java/io/github/ollama4j/models/request/CustomModelFilePathRequest.java
@ -9,15 +9,15 @@ import static io.github.ollama4j.utils.Utils.getObjectMapper;
@Data
@AllArgsConstructor
 public class CustomModelFilePathRequest {
-  private String name;
-  private String path;
+    private String name;
+    private String path;

-  @Override
-  public String toString() {
-    try {
-      return getObjectMapper().writerWithDefaultPrettyPrinter().writeValueAsString(this);
-    } catch (JsonProcessingException e) {
-      throw new RuntimeException(e);
+    @Override
+    public String toString() {
+        try {
+            return getObjectMapper().writerWithDefaultPrettyPrinter().writeValueAsString(this);
+        } catch (JsonProcessingException e) {
+            throw new RuntimeException(e);
+        }
    }
-  }
 }
--- a/src/main/java/io/github/ollama4j/models/request/ModelRequest.java
+++ b/src/main/java/io/github/ollama4j/models/request/ModelRequest.java
@ -9,14 +9,14 @@ import static io.github.ollama4j.utils.Utils.getObjectMapper;
@Data
@AllArgsConstructor
 public class ModelRequest {
-  private String name;
+    private String name;

-  @Override
-  public String toString() {
-    try {
-      return getObjectMapper().writerWithDefaultPrettyPrinter().writeValueAsString(this);
-    } catch (JsonProcessingException e) {
-      throw new RuntimeException(e);
+    @Override
+    public String toString() {
+        try {
+            return getObjectMapper().writerWithDefaultPrettyPrinter().writeValueAsString(this);
+        } catch (JsonProcessingException e) {
+            throw new RuntimeException(e);
+        }
    }
-  }
 }
--- a/src/main/java/io/github/ollama4j/models/request/OllamaChatEndpointCaller.java
+++ b/src/main/java/io/github/ollama4j/models/request/OllamaChatEndpointCaller.java
@ -61,8 +61,7 @@ public class OllamaChatEndpointCaller extends OllamaEndpointCaller {
            if (message != null) {
                if (message.getThinking() != null) {
                    thinkingBuffer.append(message.getThinking());
-                }
-                else {
+                } else {
                    responseBuffer.append(message.getContent());
                }
                if (tokenHandler != null) {
--- a/src/main/java/io/github/ollama4j/models/request/OllamaCommonRequest.java
+++ b/src/main/java/io/github/ollama4j/models/request/OllamaCommonRequest.java
@ -3,8 +3,6 @@ package io.github.ollama4j.models.request;
 import com.fasterxml.jackson.annotation.JsonInclude;
 import com.fasterxml.jackson.annotation.JsonProperty;
 import com.fasterxml.jackson.core.JsonProcessingException;
-import com.fasterxml.jackson.databind.annotation.JsonSerialize;
-import io.github.ollama4j.utils.BooleanToJsonFormatFlagSerializer;
 import io.github.ollama4j.utils.Utils;
 import lombok.Data;

@ -14,23 +12,23 @@ import java.util.Map;
@JsonInclude(JsonInclude.Include.NON_NULL)
 public abstract class OllamaCommonRequest {

-  protected String model;
-//  @JsonSerialize(using = BooleanToJsonFormatFlagSerializer.class)
+    protected String model;
+    //  @JsonSerialize(using = BooleanToJsonFormatFlagSerializer.class)
 //  this can either be set to format=json or format={"key1": "val1", "key2": "val2"}
-  @JsonProperty(value = "format", required = false, defaultValue = "json")
-  protected Object format;
-  protected Map<String, Object> options;
-  protected String template;
-  protected boolean stream;
-  @JsonProperty(value = "keep_alive")
-  protected String keepAlive;
+    @JsonProperty(value = "format", required = false, defaultValue = "json")
+    protected Object format;
+    protected Map<String, Object> options;
+    protected String template;
+    protected boolean stream;
+    @JsonProperty(value = "keep_alive")
+    protected String keepAlive;


-  public String toString() {
-    try {
-      return Utils.getObjectMapper().writerWithDefaultPrettyPrinter().writeValueAsString(this);
-    } catch (JsonProcessingException e) {
-      throw new RuntimeException(e);
+    public String toString() {
+        try {
+            return Utils.getObjectMapper().writerWithDefaultPrettyPrinter().writeValueAsString(this);
+        } catch (JsonProcessingException e) {
+            throw new RuntimeException(e);
+        }
    }
-  }
 }
--- a/src/main/java/io/github/ollama4j/models/response/Model.java
+++ b/src/main/java/io/github/ollama4j/models/response/Model.java
@ -12,43 +12,43 @@ import java.time.OffsetDateTime;
@JsonIgnoreProperties(ignoreUnknown = true)
 public class Model {

-  private String name;
-  private String model;
-  @JsonProperty("modified_at")
-  private OffsetDateTime modifiedAt;
-  @JsonProperty("expires_at")
-  private OffsetDateTime expiresAt;
-  private String digest;
-  private long size;
-  @JsonProperty("details")
-  private ModelMeta modelMeta;
+    private String name;
+    private String model;
+    @JsonProperty("modified_at")
+    private OffsetDateTime modifiedAt;
+    @JsonProperty("expires_at")
+    private OffsetDateTime expiresAt;
+    private String digest;
+    private long size;
+    @JsonProperty("details")
+    private ModelMeta modelMeta;


-  /**
-   * Returns the model name without its version
-   *
-   * @return model name
-   */
-  public String getModelName() {
-    return name.split(":")[0];
-  }
+    /**
+     * Returns the model name without its version
+     *
+     * @return model name
+     */
+    public String getModelName() {
+        return name.split(":")[0];
+    }

-  /**
-   * Returns the model version without its name
-   *
-   * @return model version
-   */
-  public String getModelVersion() {
-    return name.split(":")[1];
-  }
+    /**
+     * Returns the model version without its name
+     *
+     * @return model version
+     */
+    public String getModelVersion() {
+        return name.split(":")[1];
+    }

    @Override
-  public String toString() {
-    try {
-      return Utils.getObjectMapper().writerWithDefaultPrettyPrinter().writeValueAsString(this);
-    } catch (JsonProcessingException e) {
-      throw new RuntimeException(e);
+    public String toString() {
+        try {
+            return Utils.getObjectMapper().writerWithDefaultPrettyPrinter().writeValueAsString(this);
+        } catch (JsonProcessingException e) {
+            throw new RuntimeException(e);
+        }
    }
-  }

 }
--- a/src/main/java/io/github/ollama4j/models/response/ModelDetail.java
+++ b/src/main/java/io/github/ollama4j/models/response/ModelDetail.java
@ -9,22 +9,22 @@ import lombok.Data;
@Data
@JsonIgnoreProperties(ignoreUnknown = true)
 public class ModelDetail {
-  private String license;
+    private String license;

-  @JsonProperty("modelfile")
-  private String modelFile;
+    @JsonProperty("modelfile")
+    private String modelFile;

-  private String parameters;
-  private String template;
-  private String system;
-  private ModelMeta details;
+    private String parameters;
+    private String template;
+    private String system;
+    private ModelMeta details;

    @Override
-  public String toString() {
-    try {
-      return Utils.getObjectMapper().writerWithDefaultPrettyPrinter().writeValueAsString(this);
-    } catch (JsonProcessingException e) {
-      throw new RuntimeException(e);
+    public String toString() {
+        try {
+            return Utils.getObjectMapper().writerWithDefaultPrettyPrinter().writeValueAsString(this);
+        } catch (JsonProcessingException e) {
+            throw new RuntimeException(e);
+        }
    }
-  }
 }
--- a/src/main/java/io/github/ollama4j/models/response/ModelMeta.java
+++ b/src/main/java/io/github/ollama4j/models/response/ModelMeta.java
@ -9,27 +9,27 @@ import lombok.Data;
@Data
@JsonIgnoreProperties(ignoreUnknown = true)
 public class ModelMeta {
-  @JsonProperty("format")
-  private String format;
+    @JsonProperty("format")
+    private String format;

-  @JsonProperty("family")
-  private String family;
+    @JsonProperty("family")
+    private String family;

-  @JsonProperty("families")
-  private String[] families;
+    @JsonProperty("families")
+    private String[] families;

-  @JsonProperty("parameter_size")
-  private String parameterSize;
+    @JsonProperty("parameter_size")
+    private String parameterSize;

-  @JsonProperty("quantization_level")
-  private String quantizationLevel;
+    @JsonProperty("quantization_level")
+    private String quantizationLevel;

    @Override
-  public String toString() {
-    try {
-      return Utils.getObjectMapper().writerWithDefaultPrettyPrinter().writeValueAsString(this);
-    } catch (JsonProcessingException e) {
-      throw new RuntimeException(e);
+    public String toString() {
+        try {
+            return Utils.getObjectMapper().writerWithDefaultPrettyPrinter().writeValueAsString(this);
+        } catch (JsonProcessingException e) {
+            throw new RuntimeException(e);
+        }
    }
-  }
 }
--- a/src/main/java/io/github/ollama4j/models/response/OllamaErrorResponse.java
+++ b/src/main/java/io/github/ollama4j/models/response/OllamaErrorResponse.java
@ -7,5 +7,5 @@ import lombok.Data;
@JsonIgnoreProperties(ignoreUnknown = true)
 public class OllamaErrorResponse {

-  private String error;
+    private String error;
 }
--- a/src/main/java/io/github/ollama4j/models/response/OllamaStructuredResult.java
+++ b/src/main/java/io/github/ollama4j/models/response/OllamaStructuredResult.java
@ -19,67 +19,67 @@ import static io.github.ollama4j.utils.Utils.getObjectMapper;
@NoArgsConstructor
@JsonIgnoreProperties(ignoreUnknown = true)
 public class OllamaStructuredResult {
-  private String response;
-  private String thinking;
-  private int httpStatusCode;
-  private long responseTime = 0;
-  private String model;
+    private String response;
+    private String thinking;
+    private int httpStatusCode;
+    private long responseTime = 0;
+    private String model;

-  private @JsonProperty("created_at") String createdAt;
-  private boolean done;
-  private @JsonProperty("done_reason") String doneReason;
-  private List<Integer> context;
-  private @JsonProperty("total_duration") Long totalDuration;
-  private @JsonProperty("load_duration") Long loadDuration;
-  private @JsonProperty("prompt_eval_count") Integer promptEvalCount;
-  private @JsonProperty("prompt_eval_duration") Long promptEvalDuration;
-  private @JsonProperty("eval_count") Integer evalCount;
-  private @JsonProperty("eval_duration") Long evalDuration;
+    private @JsonProperty("created_at") String createdAt;
+    private boolean done;
+    private @JsonProperty("done_reason") String doneReason;
+    private List<Integer> context;
+    private @JsonProperty("total_duration") Long totalDuration;
+    private @JsonProperty("load_duration") Long loadDuration;
+    private @JsonProperty("prompt_eval_count") Integer promptEvalCount;
+    private @JsonProperty("prompt_eval_duration") Long promptEvalDuration;
+    private @JsonProperty("eval_count") Integer evalCount;
+    private @JsonProperty("eval_duration") Long evalDuration;

-  public OllamaStructuredResult(String response, long responseTime, int httpStatusCode) {
-    this.response = response;
-    this.responseTime = responseTime;
-    this.httpStatusCode = httpStatusCode;
-  }
-
-  @Override
-  public String toString() {
-    try {
-      return getObjectMapper().writerWithDefaultPrettyPrinter().writeValueAsString(this);
-    } catch (JsonProcessingException e) {
-      throw new RuntimeException(e);
+    public OllamaStructuredResult(String response, long responseTime, int httpStatusCode) {
+        this.response = response;
+        this.responseTime = responseTime;
+        this.httpStatusCode = httpStatusCode;
    }
-  }

-  /**
-   * Get the structured response if the response is a JSON object.
-   *
-   * @return Map - structured response
-   */
-  public Map<String, Object> getStructuredResponse() {
-    try {
-      Map<String, Object> response = getObjectMapper().readValue(this.getResponse(),
-          new TypeReference<Map<String, Object>>() {
-          });
-      return response;
-    } catch (JsonProcessingException e) {
-      throw new RuntimeException(e);
+    @Override
+    public String toString() {
+        try {
+            return getObjectMapper().writerWithDefaultPrettyPrinter().writeValueAsString(this);
+        } catch (JsonProcessingException e) {
+            throw new RuntimeException(e);
+        }
    }
-  }

-  /**
-   * Get the structured response mapped to a specific class type.
-   *
-   * @param <T> The type of class to map the response to
-   * @param clazz The class to map the response to
-   * @return An instance of the specified class with the response data
-   * @throws RuntimeException if there is an error mapping the response
-   */
-  public <T> T getStructuredResponse(Class<T> clazz) {
-    try {
-      return getObjectMapper().readValue(this.getResponse(), clazz);
-    } catch (JsonProcessingException e) {
-      throw new RuntimeException(e);
+    /**
+     * Get the structured response if the response is a JSON object.
+     *
+     * @return Map - structured response
+     */
+    public Map<String, Object> getStructuredResponse() {
+        try {
+            Map<String, Object> response = getObjectMapper().readValue(this.getResponse(),
+                    new TypeReference<Map<String, Object>>() {
+                    });
+            return response;
+        } catch (JsonProcessingException e) {
+            throw new RuntimeException(e);
+        }
+    }
+
+    /**
+     * Get the structured response mapped to a specific class type.
+     *
+     * @param <T>   The type of class to map the response to
+     * @param clazz The class to map the response to
+     * @return An instance of the specified class with the response data
+     * @throws RuntimeException if there is an error mapping the response
+     */
+    public <T> T getStructuredResponse(Class<T> clazz) {
+        try {
+            return getObjectMapper().readValue(this.getResponse(), clazz);
+        } catch (JsonProcessingException e) {
+            throw new RuntimeException(e);
+        }
    }
-  }
 }
--- a/src/main/java/io/github/ollama4j/tools/OllamaToolCallsFunction.java
+++ b/src/main/java/io/github/ollama4j/tools/OllamaToolCallsFunction.java
@ -11,8 +11,7 @@ import java.util.Map;
@NoArgsConstructor
@AllArgsConstructor
@JsonIgnoreProperties(ignoreUnknown = true)
-public class OllamaToolCallsFunction
-{
+public class OllamaToolCallsFunction {
    private String name;
-    private Map<String,Object> arguments;
+    private Map<String, Object> arguments;
 }
--- a/src/main/java/io/github/ollama4j/tools/ReflectionalToolFunction.java
+++ b/src/main/java/io/github/ollama4j/tools/ReflectionalToolFunction.java
@ -15,17 +15,17 @@ import java.util.Map;
@Setter
@Getter
@AllArgsConstructor
-public class ReflectionalToolFunction implements ToolFunction{
+public class ReflectionalToolFunction implements ToolFunction {

    private Object functionHolder;
    private Method function;
-    private LinkedHashMap<String,String> propertyDefinition;
+    private LinkedHashMap<String, String> propertyDefinition;

    @Override
    public Object apply(Map<String, Object> arguments) {
        LinkedHashMap<String, Object> argumentsCopy = new LinkedHashMap<>(this.propertyDefinition);
-        for (Map.Entry<String,String> param : this.propertyDefinition.entrySet()){
-            argumentsCopy.replace(param.getKey(),typeCast(arguments.get(param.getKey()),param.getValue()));
+        for (Map.Entry<String, String> param : this.propertyDefinition.entrySet()) {
+            argumentsCopy.replace(param.getKey(), typeCast(arguments.get(param.getKey()), param.getValue()));
        }
        try {
            return function.invoke(functionHolder, argumentsCopy.values().toArray());
@ -35,7 +35,7 @@ public class ReflectionalToolFunction implements ToolFunction{
    }

    private Object typeCast(Object inputValue, String className) {
-        if(className == null || inputValue == null) {
+        if (className == null || inputValue == null) {
            return null;
        }
        String inputValueString = inputValue.toString();
--- a/src/main/java/io/github/ollama4j/utils/OllamaRequestBody.java
+++ b/src/main/java/io/github/ollama4j/utils/OllamaRequestBody.java
@ -17,12 +17,12 @@ public interface OllamaRequestBody {
     * @return JSON representation of a OllamaRequest
     */
    @JsonIgnore
-    default BodyPublisher getBodyPublisher(){
-                try {
-          return BodyPublishers.ofString(
-                      Utils.getObjectMapper().writeValueAsString(this));
+    default BodyPublisher getBodyPublisher() {
+        try {
+            return BodyPublishers.ofString(
+                    Utils.getObjectMapper().writeValueAsString(this));
        } catch (JsonProcessingException e) {
-          throw new IllegalArgumentException("Request not Body convertible.",e);
+            throw new IllegalArgumentException("Request not Body convertible.", e);
        }
    }
 }
--- a/src/main/java/io/github/ollama4j/utils/Options.java
+++ b/src/main/java/io/github/ollama4j/utils/Options.java
@ -4,9 +4,11 @@ import lombok.Data;

 import java.util.Map;

-/** Class for options for Ollama model. */
+/**
+ * Class for options for Ollama model.
+ */
@Data
 public class Options {

-  private final Map<String, Object> optionsMap;
+    private final Map<String, Object> optionsMap;
 }
--- a/src/main/java/io/github/ollama4j/utils/OptionsBuilder.java
+++ b/src/main/java/io/github/ollama4j/utils/OptionsBuilder.java
@ -2,247 +2,251 @@ package io.github.ollama4j.utils;

 import java.util.HashMap;

-/** Builder class for creating options for Ollama model. */
+/**
+ * Builder class for creating options for Ollama model.
+ */
 public class OptionsBuilder {

-  private final Options options;
+    private final Options options;

-  /** Constructs a new OptionsBuilder with an empty options map. */
-  public OptionsBuilder() {
-    this.options = new Options(new HashMap<>());
-  }
-
-  /**
-   * Enable Mirostat sampling for controlling perplexity. (default: 0, 0 = disabled, 1 = Mirostat, 2
-   * = Mirostat 2.0)
-   *
-   * @param value The value for the "mirostat" parameter.
-   * @return The updated OptionsBuilder.
-   */
-  public OptionsBuilder setMirostat(int value) {
-    options.getOptionsMap().put("mirostat", value);
-    return this;
-  }
-
-  /**
-   * Influences how quickly the algorithm responds to feedback from the generated text. A lower
-   * learning rate will result in slower adjustments, while a higher learning rate will make the
-   * algorithm more responsive. (Default: 0.1)
-   *
-   * @param value The value for the "mirostat_eta" parameter.
-   * @return The updated OptionsBuilder.
-   */
-  public OptionsBuilder setMirostatEta(float value) {
-    options.getOptionsMap().put("mirostat_eta", value);
-    return this;
-  }
-
-  /**
-   * Controls the balance between coherence and diversity of the output. A lower value will result
-   * in more focused and coherent text. (Default: 5.0)
-   *
-   * @param value The value for the "mirostat_tau" parameter.
-   * @return The updated OptionsBuilder.
-   */
-  public OptionsBuilder setMirostatTau(float value) {
-    options.getOptionsMap().put("mirostat_tau", value);
-    return this;
-  }
-
-  /**
-   * Sets the size of the context window used to generate the next token. (Default: 2048)
-   *
-   * @param value The value for the "num_ctx" parameter.
-   * @return The updated OptionsBuilder.
-   */
-  public OptionsBuilder setNumCtx(int value) {
-    options.getOptionsMap().put("num_ctx", value);
-    return this;
-  }
-
-  /**
-   * The number of GQA groups in the transformer layer. Required for some models, for example, it is
-   * 8 for llama2:70b.
-   *
-   * @param value The value for the "num_gqa" parameter.
-   * @return The updated OptionsBuilder.
-   */
-  public OptionsBuilder setNumGqa(int value) {
-    options.getOptionsMap().put("num_gqa", value);
-    return this;
-  }
-
-  /**
-   * The number of layers to send to the GPU(s). On macOS it defaults to 1 to enable metal support,
-   * 0 to disable.
-   *
-   * @param value The value for the "num_gpu" parameter.
-   * @return The updated OptionsBuilder.
-   */
-  public OptionsBuilder setNumGpu(int value) {
-    options.getOptionsMap().put("num_gpu", value);
-    return this;
-  }
-
-  /**
-   * Sets the number of threads to use during computation. By default, Ollama will detect this for
-   * optimal performance. It is recommended to set this value to the number of physical CPU cores
-   * your system has (as opposed to the logical number of cores).
-   *
-   * @param value The value for the "num_thread" parameter.
-   * @return The updated OptionsBuilder.
-   */
-  public OptionsBuilder setNumThread(int value) {
-    options.getOptionsMap().put("num_thread", value);
-    return this;
-  }
-
-  /**
-   * Sets how far back for the model to look back to prevent repetition. (Default: 64, 0 = disabled,
-   * -1 = num_ctx)
-   *
-   * @param value The value for the "repeat_last_n" parameter.
-   * @return The updated OptionsBuilder.
-   */
-  public OptionsBuilder setRepeatLastN(int value) {
-    options.getOptionsMap().put("repeat_last_n", value);
-    return this;
-  }
-
-  /**
-   * Sets how strongly to penalize repetitions. A higher value (e.g., 1.5) will penalize repetitions
-   * more strongly, while a lower value (e.g., 0.9) will be more lenient. (Default: 1.1)
-   *
-   * @param value The value for the "repeat_penalty" parameter.
-   * @return The updated OptionsBuilder.
-   */
-  public OptionsBuilder setRepeatPenalty(float value) {
-    options.getOptionsMap().put("repeat_penalty", value);
-    return this;
-  }
-
-  /**
-   * The temperature of the model. Increasing the temperature will make the model answer more
-   * creatively. (Default: 0.8)
-   *
-   * @param value The value for the "temperature" parameter.
-   * @return The updated OptionsBuilder.
-   */
-  public OptionsBuilder setTemperature(float value) {
-    options.getOptionsMap().put("temperature", value);
-    return this;
-  }
-
-  /**
-   * Sets the random number seed to use for generation. Setting this to a specific number will make
-   * the model generate the same text for the same prompt. (Default: 0)
-   *
-   * @param value The value for the "seed" parameter.
-   * @return The updated OptionsBuilder.
-   */
-  public OptionsBuilder setSeed(int value) {
-    options.getOptionsMap().put("seed", value);
-    return this;
-  }
-
-  /**
-   * Sets the stop sequences to use. When this pattern is encountered the LLM will stop generating
-   * text and return. Multiple stop patterns may be set by specifying multiple separate `stop`
-   * parameters in a modelfile.
-   *
-   * @param value The value for the "stop" parameter.
-   * @return The updated OptionsBuilder.
-   */
-  public OptionsBuilder setStop(String value) {
-    options.getOptionsMap().put("stop", value);
-    return this;
-  }
-
-  /**
-   * Tail free sampling is used to reduce the impact of less probable tokens from the output. A
-   * higher value (e.g., 2.0) will reduce the impact more, while a value of 1.0 disables this
-   * setting. (default: 1)
-   *
-   * @param value The value for the "tfs_z" parameter.
-   * @return The updated OptionsBuilder.
-   */
-  public OptionsBuilder setTfsZ(float value) {
-    options.getOptionsMap().put("tfs_z", value);
-    return this;
-  }
-
-  /**
-   * Maximum number of tokens to predict when generating text. (Default: 128, -1 = infinite
-   * generation, -2 = fill context)
-   *
-   * @param value The value for the "num_predict" parameter.
-   * @return The updated OptionsBuilder.
-   */
-  public OptionsBuilder setNumPredict(int value) {
-    options.getOptionsMap().put("num_predict", value);
-    return this;
-  }
-
-  /**
-   * Reduces the probability of generating nonsense. A higher value (e.g. 100) will give more
-   * diverse answers, while a lower value (e.g. 10) will be more conservative. (Default: 40)
-   *
-   * @param value The value for the "top_k" parameter.
-   * @return The updated OptionsBuilder.
-   */
-  public OptionsBuilder setTopK(int value) {
-    options.getOptionsMap().put("top_k", value);
-    return this;
-  }
-
-  /**
-   * Works together with top-k. A higher value (e.g., 0.95) will lead to more diverse text, while a
-   * lower value (e.g., 0.5) will generate more focused and conservative text. (Default: 0.9)
-   *
-   * @param value The value for the "top_p" parameter.
-   * @return The updated OptionsBuilder.
-   */
-  public OptionsBuilder setTopP(float value) {
-    options.getOptionsMap().put("top_p", value);
-    return this;
-  }
-
-  /**
-   * Alternative to the top_p, and aims to ensure a balance of qualityand variety. The parameter p
-   * represents the minimum probability for a token to be considered, relative to the probability
-   * of the most likely token. For example, with p=0.05 and the most likely token having a
-   * probability of 0.9, logits with a value less than 0.045 are filtered out. (Default: 0.0)
-   */
-  public OptionsBuilder setMinP(float value) {
-    options.getOptionsMap().put("min_p", value);
-    return this;
-  }
-
-  /**
-   * Allows passing an option not formally supported by the library
-   * @param name The option name for the parameter.
-   * @param value The value for the "{name}" parameter.
-   * @return The updated OptionsBuilder.
-   * @throws IllegalArgumentException if parameter has an unsupported type
-   */
-  public OptionsBuilder setCustomOption(String name, Object value) throws IllegalArgumentException {
-    if (!(value instanceof Integer || value instanceof Float || value instanceof String)) {
-      throw new IllegalArgumentException("Invalid type for parameter. Allowed types are: Integer, Float, or String.");
+    /**
+     * Constructs a new OptionsBuilder with an empty options map.
+     */
+    public OptionsBuilder() {
+        this.options = new Options(new HashMap<>());
+    }
+
+    /**
+     * Enable Mirostat sampling for controlling perplexity. (default: 0, 0 = disabled, 1 = Mirostat, 2
+     * = Mirostat 2.0)
+     *
+     * @param value The value for the "mirostat" parameter.
+     * @return The updated OptionsBuilder.
+     */
+    public OptionsBuilder setMirostat(int value) {
+        options.getOptionsMap().put("mirostat", value);
+        return this;
+    }
+
+    /**
+     * Influences how quickly the algorithm responds to feedback from the generated text. A lower
+     * learning rate will result in slower adjustments, while a higher learning rate will make the
+     * algorithm more responsive. (Default: 0.1)
+     *
+     * @param value The value for the "mirostat_eta" parameter.
+     * @return The updated OptionsBuilder.
+     */
+    public OptionsBuilder setMirostatEta(float value) {
+        options.getOptionsMap().put("mirostat_eta", value);
+        return this;
+    }
+
+    /**
+     * Controls the balance between coherence and diversity of the output. A lower value will result
+     * in more focused and coherent text. (Default: 5.0)
+     *
+     * @param value The value for the "mirostat_tau" parameter.
+     * @return The updated OptionsBuilder.
+     */
+    public OptionsBuilder setMirostatTau(float value) {
+        options.getOptionsMap().put("mirostat_tau", value);
+        return this;
+    }
+
+    /**
+     * Sets the size of the context window used to generate the next token. (Default: 2048)
+     *
+     * @param value The value for the "num_ctx" parameter.
+     * @return The updated OptionsBuilder.
+     */
+    public OptionsBuilder setNumCtx(int value) {
+        options.getOptionsMap().put("num_ctx", value);
+        return this;
+    }
+
+    /**
+     * The number of GQA groups in the transformer layer. Required for some models, for example, it is
+     * 8 for llama2:70b.
+     *
+     * @param value The value for the "num_gqa" parameter.
+     * @return The updated OptionsBuilder.
+     */
+    public OptionsBuilder setNumGqa(int value) {
+        options.getOptionsMap().put("num_gqa", value);
+        return this;
+    }
+
+    /**
+     * The number of layers to send to the GPU(s). On macOS it defaults to 1 to enable metal support,
+     * 0 to disable.
+     *
+     * @param value The value for the "num_gpu" parameter.
+     * @return The updated OptionsBuilder.
+     */
+    public OptionsBuilder setNumGpu(int value) {
+        options.getOptionsMap().put("num_gpu", value);
+        return this;
+    }
+
+    /**
+     * Sets the number of threads to use during computation. By default, Ollama will detect this for
+     * optimal performance. It is recommended to set this value to the number of physical CPU cores
+     * your system has (as opposed to the logical number of cores).
+     *
+     * @param value The value for the "num_thread" parameter.
+     * @return The updated OptionsBuilder.
+     */
+    public OptionsBuilder setNumThread(int value) {
+        options.getOptionsMap().put("num_thread", value);
+        return this;
+    }
+
+    /**
+     * Sets how far back for the model to look back to prevent repetition. (Default: 64, 0 = disabled,
+     * -1 = num_ctx)
+     *
+     * @param value The value for the "repeat_last_n" parameter.
+     * @return The updated OptionsBuilder.
+     */
+    public OptionsBuilder setRepeatLastN(int value) {
+        options.getOptionsMap().put("repeat_last_n", value);
+        return this;
+    }
+
+    /**
+     * Sets how strongly to penalize repetitions. A higher value (e.g., 1.5) will penalize repetitions
+     * more strongly, while a lower value (e.g., 0.9) will be more lenient. (Default: 1.1)
+     *
+     * @param value The value for the "repeat_penalty" parameter.
+     * @return The updated OptionsBuilder.
+     */
+    public OptionsBuilder setRepeatPenalty(float value) {
+        options.getOptionsMap().put("repeat_penalty", value);
+        return this;
+    }
+
+    /**
+     * The temperature of the model. Increasing the temperature will make the model answer more
+     * creatively. (Default: 0.8)
+     *
+     * @param value The value for the "temperature" parameter.
+     * @return The updated OptionsBuilder.
+     */
+    public OptionsBuilder setTemperature(float value) {
+        options.getOptionsMap().put("temperature", value);
+        return this;
+    }
+
+    /**
+     * Sets the random number seed to use for generation. Setting this to a specific number will make
+     * the model generate the same text for the same prompt. (Default: 0)
+     *
+     * @param value The value for the "seed" parameter.
+     * @return The updated OptionsBuilder.
+     */
+    public OptionsBuilder setSeed(int value) {
+        options.getOptionsMap().put("seed", value);
+        return this;
+    }
+
+    /**
+     * Sets the stop sequences to use. When this pattern is encountered the LLM will stop generating
+     * text and return. Multiple stop patterns may be set by specifying multiple separate `stop`
+     * parameters in a modelfile.
+     *
+     * @param value The value for the "stop" parameter.
+     * @return The updated OptionsBuilder.
+     */
+    public OptionsBuilder setStop(String value) {
+        options.getOptionsMap().put("stop", value);
+        return this;
+    }
+
+    /**
+     * Tail free sampling is used to reduce the impact of less probable tokens from the output. A
+     * higher value (e.g., 2.0) will reduce the impact more, while a value of 1.0 disables this
+     * setting. (default: 1)
+     *
+     * @param value The value for the "tfs_z" parameter.
+     * @return The updated OptionsBuilder.
+     */
+    public OptionsBuilder setTfsZ(float value) {
+        options.getOptionsMap().put("tfs_z", value);
+        return this;
+    }
+
+    /**
+     * Maximum number of tokens to predict when generating text. (Default: 128, -1 = infinite
+     * generation, -2 = fill context)
+     *
+     * @param value The value for the "num_predict" parameter.
+     * @return The updated OptionsBuilder.
+     */
+    public OptionsBuilder setNumPredict(int value) {
+        options.getOptionsMap().put("num_predict", value);
+        return this;
+    }
+
+    /**
+     * Reduces the probability of generating nonsense. A higher value (e.g. 100) will give more
+     * diverse answers, while a lower value (e.g. 10) will be more conservative. (Default: 40)
+     *
+     * @param value The value for the "top_k" parameter.
+     * @return The updated OptionsBuilder.
+     */
+    public OptionsBuilder setTopK(int value) {
+        options.getOptionsMap().put("top_k", value);
+        return this;
+    }
+
+    /**
+     * Works together with top-k. A higher value (e.g., 0.95) will lead to more diverse text, while a
+     * lower value (e.g., 0.5) will generate more focused and conservative text. (Default: 0.9)
+     *
+     * @param value The value for the "top_p" parameter.
+     * @return The updated OptionsBuilder.
+     */
+    public OptionsBuilder setTopP(float value) {
+        options.getOptionsMap().put("top_p", value);
+        return this;
+    }
+
+    /**
+     * Alternative to the top_p, and aims to ensure a balance of qualityand variety. The parameter p
+     * represents the minimum probability for a token to be considered, relative to the probability
+     * of the most likely token. For example, with p=0.05 and the most likely token having a
+     * probability of 0.9, logits with a value less than 0.045 are filtered out. (Default: 0.0)
+     */
+    public OptionsBuilder setMinP(float value) {
+        options.getOptionsMap().put("min_p", value);
+        return this;
+    }
+
+    /**
+     * Allows passing an option not formally supported by the library
+     *
+     * @param name  The option name for the parameter.
+     * @param value The value for the "{name}" parameter.
+     * @return The updated OptionsBuilder.
+     * @throws IllegalArgumentException if parameter has an unsupported type
+     */
+    public OptionsBuilder setCustomOption(String name, Object value) throws IllegalArgumentException {
+        if (!(value instanceof Integer || value instanceof Float || value instanceof String)) {
+            throw new IllegalArgumentException("Invalid type for parameter. Allowed types are: Integer, Float, or String.");
+        }
+        options.getOptionsMap().put(name, value);
+        return this;
    }
-    options.getOptionsMap().put(name, value);
-    return this;
-  }


-
-  /**
-   * Builds the options map.
-   *
-   * @return The populated options map.
-   */
-  public Options build() {
-    return options;
-  }
+    /**
+     * Builds the options map.
+     *
+     * @return The populated options map.
+     */
+    public Options build() {
+        return options;
+    }


 }
--- a/src/main/java/io/github/ollama4j/utils/PromptBuilder.java
+++ b/src/main/java/io/github/ollama4j/utils/PromptBuilder.java
@ -18,52 +18,54 @@ package io.github.ollama4j.utils;
 */
 public class PromptBuilder {

-  private final StringBuilder prompt;
+    private final StringBuilder prompt;

-  /** Constructs a new {@code PromptBuilder} with an empty prompt. */
-  public PromptBuilder() {
-    this.prompt = new StringBuilder();
-  }
+    /**
+     * Constructs a new {@code PromptBuilder} with an empty prompt.
+     */
+    public PromptBuilder() {
+        this.prompt = new StringBuilder();
+    }

-  /**
-   * Appends the specified text to the prompt.
-   *
-   * @param text the text to be added to the prompt
-   * @return a reference to this {@code PromptBuilder} instance for method chaining
-   */
-  public PromptBuilder add(String text) {
-    prompt.append(text);
-    return this;
-  }
+    /**
+     * Appends the specified text to the prompt.
+     *
+     * @param text the text to be added to the prompt
+     * @return a reference to this {@code PromptBuilder} instance for method chaining
+     */
+    public PromptBuilder add(String text) {
+        prompt.append(text);
+        return this;
+    }

-  /**
-   * Appends the specified text followed by a newline character to the prompt.
-   *
-   * @param text the text to be added as a line to the prompt
-   * @return a reference to this {@code PromptBuilder} instance for method chaining
-   */
-  public PromptBuilder addLine(String text) {
-    prompt.append(text).append("\n");
-    return this;
-  }
+    /**
+     * Appends the specified text followed by a newline character to the prompt.
+     *
+     * @param text the text to be added as a line to the prompt
+     * @return a reference to this {@code PromptBuilder} instance for method chaining
+     */
+    public PromptBuilder addLine(String text) {
+        prompt.append(text).append("\n");
+        return this;
+    }

-  /**
-   * Appends a separator line to the prompt. The separator is a newline followed by a line of
-   * dashes.
-   *
-   * @return a reference to this {@code PromptBuilder} instance for method chaining
-   */
-  public PromptBuilder addSeparator() {
-    prompt.append("\n--------------------------------------------------\n");
-    return this;
-  }
+    /**
+     * Appends a separator line to the prompt. The separator is a newline followed by a line of
+     * dashes.
+     *
+     * @return a reference to this {@code PromptBuilder} instance for method chaining
+     */
+    public PromptBuilder addSeparator() {
+        prompt.append("\n--------------------------------------------------\n");
+        return this;
+    }

-  /**
-   * Builds and returns the final prompt as a string.
-   *
-   * @return the final prompt as a string
-   */
-  public String build() {
-    return prompt.toString();
-  }
+    /**
+     * Builds and returns the final prompt as a string.
+     *
+     * @return the final prompt as a string
+     */
+    public String build() {
+        return prompt.toString();
+    }
 }
--- a/src/test/java/io/github/ollama4j/integrationtests/OllamaAPIIntegrationTest.java
+++ b/src/test/java/io/github/ollama4j/integrationtests/OllamaAPIIntegrationTest.java
@ -4,8 +4,8 @@ import io.github.ollama4j.OllamaAPI;
 import io.github.ollama4j.exceptions.OllamaBaseException;
 import io.github.ollama4j.exceptions.ToolInvocationException;
 import io.github.ollama4j.models.chat.*;
+import io.github.ollama4j.models.embeddings.OllamaEmbedRequestModel;
 import io.github.ollama4j.models.embeddings.OllamaEmbedResponseModel;
-import io.github.ollama4j.models.response.LibraryModel;
 import io.github.ollama4j.models.response.Model;
 import io.github.ollama4j.models.response.ModelDetail;
 import io.github.ollama4j.models.response.OllamaResult;
@ -113,15 +113,6 @@ class OllamaAPIIntegrationTest {
        assertTrue(models.size() >= 0, "Models list should not be empty");
    }

-    @Test
-    @Order(2)
-    void testListModelsFromLibrary()
-            throws OllamaBaseException, IOException, URISyntaxException, InterruptedException {
-        List<LibraryModel> models = api.listModelsFromLibrary();
-        assertNotNull(models);
-        assertFalse(models.isEmpty());
-    }
-
    @Test
    @Order(3)
    void testPullModelAPI() throws URISyntaxException, IOException, OllamaBaseException, InterruptedException {
@ -144,8 +135,10 @@ class OllamaAPIIntegrationTest {
    @Order(5)
    void testEmbeddings() throws Exception {
        api.pullModel(EMBEDDING_MODEL);
-        OllamaEmbedResponseModel embeddings = api.embed(EMBEDDING_MODEL,
-                Arrays.asList("Why is the sky blue?", "Why is the grass green?"));
+        OllamaEmbedRequestModel m = new OllamaEmbedRequestModel();
+        m.setModel(EMBEDDING_MODEL);
+        m.setInput(Arrays.asList("Why is the sky blue?", "Why is the grass green?"));
+        OllamaEmbedResponseModel embeddings = api.embed(m);
        assertNotNull(embeddings, "Embeddings should not be null");
        assertFalse(embeddings.getEmbeddings().isEmpty(), "Embeddings should not be empty");
    }
@ -228,7 +221,7 @@ class OllamaAPIIntegrationTest {
        requestModel = builder.withMessages(requestModel.getMessages())
                .withMessage(OllamaChatMessageRole.USER, "Give me a cool name")
                .withOptions(new OptionsBuilder().setTemperature(0.5f).build()).build();
-        OllamaChatResult chatResult = api.chat(requestModel);
+        OllamaChatResult chatResult = api.chat(requestModel, null);

        assertNotNull(chatResult);
        assertNotNull(chatResult.getResponseModel());
@ -249,7 +242,7 @@ class OllamaAPIIntegrationTest {
                        expectedResponse)).withMessage(OllamaChatMessageRole.USER, "Who are you?")
                .withOptions(new OptionsBuilder().setTemperature(0.0f).build()).build();

-        OllamaChatResult chatResult = api.chat(requestModel);
+        OllamaChatResult chatResult = api.chat(requestModel, null);
        assertNotNull(chatResult);
        assertNotNull(chatResult.getResponseModel());
        assertNotNull(chatResult.getResponseModel().getMessage());
@ -270,7 +263,7 @@ class OllamaAPIIntegrationTest {
                .build();

        // Start conversation with model
-        OllamaChatResult chatResult = api.chat(requestModel);
+        OllamaChatResult chatResult = api.chat(requestModel, null);

        assertTrue(chatResult.getChatHistory().stream().anyMatch(chat -> chat.getContent().contains("2")),
                "Expected chat history to contain '2'");
@ -279,7 +272,7 @@ class OllamaAPIIntegrationTest {
                .withMessage(OllamaChatMessageRole.USER, "And what is its squared value?").build();

        // Continue conversation with model
-        chatResult = api.chat(requestModel);
+        chatResult = api.chat(requestModel, null);

        assertTrue(chatResult.getChatHistory().stream().anyMatch(chat -> chat.getContent().contains("4")),
                "Expected chat history to contain '4'");
@ -289,7 +282,7 @@ class OllamaAPIIntegrationTest {
                "What is the largest value between 2, 4 and 6?").build();

        // Continue conversation with the model for the third question
-        chatResult = api.chat(requestModel);
+        chatResult = api.chat(requestModel, null);

        // verify the result
        assertNotNull(chatResult, "Chat result should not be null");
@ -315,7 +308,7 @@ class OllamaAPIIntegrationTest {
                "Give me the ID and address of the employee Rahul Kumar.").build();
        requestModel.setOptions(new OptionsBuilder().setTemperature(0.9f).build().getOptionsMap());

-        OllamaChatResult chatResult = api.chat(requestModel);
+        OllamaChatResult chatResult = api.chat(requestModel, null);

        assertNotNull(chatResult, "chatResult should not be null");
        assertNotNull(chatResult.getResponseModel(), "Response model should not be null");
@ -357,7 +350,7 @@ class OllamaAPIIntegrationTest {
                    .build();
            requestModel.setOptions(new OptionsBuilder().setTemperature(0.9f).build().getOptionsMap());

-            OllamaChatResult chatResult = api.chat(requestModel);
+            OllamaChatResult chatResult = api.chat(requestModel, null);

            assertNotNull(chatResult, "chatResult should not be null");
            assertNotNull(chatResult.getResponseModel(), "Response model should not be null");
@ -405,11 +398,11 @@ class OllamaAPIIntegrationTest {
                .withKeepAlive("0m").withOptions(new OptionsBuilder().setTemperature(0.9f).build())
                .build();

-        OllamaChatResult chatResult = api.chat(requestModel, (s) -> {
+        OllamaChatResult chatResult = api.chat(requestModel, new OllamaChatStreamObserver((s) -> {
            LOG.info(s.toUpperCase());
        }, (s) -> {
            LOG.info(s.toLowerCase());
-        });
+        }));

        assertNotNull(chatResult, "chatResult should not be null");
        assertNotNull(chatResult.getResponseModel(), "Response model should not be null");
@ -447,7 +440,7 @@ class OllamaAPIIntegrationTest {
                        "Compute the most important constant in the world using 5 digits")
                .build();

-        OllamaChatResult chatResult = api.chat(requestModel);
+        OllamaChatResult chatResult = api.chat(requestModel, null);
        assertNotNull(chatResult);
        assertNotNull(chatResult.getResponseModel());
        assertNotNull(chatResult.getResponseModel().getMessage());
@ -480,7 +473,7 @@ class OllamaAPIIntegrationTest {
                        "Greet Rahul with a lot of hearts and respond to me with count of emojis that have been in used in the greeting")
                .build();

-        OllamaChatResult chatResult = api.chat(requestModel);
+        OllamaChatResult chatResult = api.chat(requestModel, null);
        assertNotNull(chatResult);
        assertNotNull(chatResult.getResponseModel());
        assertNotNull(chatResult.getResponseModel().getMessage());
@ -515,13 +508,11 @@ class OllamaAPIIntegrationTest {
        requestModel.setThink(false);
        StringBuffer sb = new StringBuffer();

-        OllamaChatResult chatResult = api.chat(requestModel, (s) -> {
+        OllamaChatResult chatResult = api.chat(requestModel, new OllamaChatStreamObserver((s) -> {
            LOG.info(s.toUpperCase());
-            sb.append(s);
        }, (s) -> {
            LOG.info(s.toLowerCase());
-            sb.append(s);
-        });
+        }));
        assertNotNull(chatResult);
        assertNotNull(chatResult.getResponseModel());
        assertNotNull(chatResult.getResponseModel().getMessage());
@ -540,13 +531,11 @@ class OllamaAPIIntegrationTest {
                .withThinking(true).withKeepAlive("0m").build();
        StringBuffer sb = new StringBuffer();

-        OllamaChatResult chatResult = api.chat(requestModel, (s) -> {
-            sb.append(s);
+        OllamaChatResult chatResult = api.chat(requestModel, new OllamaChatStreamObserver((s) -> {
            LOG.info(s.toUpperCase());
        }, (s) -> {
-            sb.append(s);
            LOG.info(s.toLowerCase());
-        });
+        }));

        assertNotNull(chatResult);
        assertNotNull(chatResult.getResponseModel());
@ -569,7 +558,7 @@ class OllamaAPIIntegrationTest {
                .build();
        api.registerAnnotatedTools(new OllamaAPIIntegrationTest());

-        OllamaChatResult chatResult = api.chat(requestModel);
+        OllamaChatResult chatResult = api.chat(requestModel, null);
        assertNotNull(chatResult);
    }

@ -583,7 +572,7 @@ class OllamaAPIIntegrationTest {
                "What's in the picture?", Collections.emptyList(),
                List.of(getImageFileFromClasspath("emoji-smile.jpeg"))).build();

-        OllamaChatResult chatResult = api.chat(requestModel);
+        OllamaChatResult chatResult = api.chat(requestModel, null);
        assertNotNull(chatResult);
        assertNotNull(chatResult.getResponseModel());
        builder.reset();
@ -591,7 +580,7 @@ class OllamaAPIIntegrationTest {
        requestModel = builder.withMessages(chatResult.getChatHistory())
                .withMessage(OllamaChatMessageRole.USER, "What's the color?").build();

-        chatResult = api.chat(requestModel);
+        chatResult = api.chat(requestModel, null);
        assertNotNull(chatResult);
        assertNotNull(chatResult.getResponseModel());
    }
--- a/src/test/java/io/github/ollama4j/unittests/TestAuth.java
+++ b/src/test/java/io/github/ollama4j/unittests/TestAuth.java
@ -4,7 +4,8 @@ import io.github.ollama4j.models.request.BasicAuth;
 import io.github.ollama4j.models.request.BearerAuth;
 import org.junit.jupiter.api.Test;

-import static org.junit.jupiter.api.Assertions.*;
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertTrue;

 class TestAuth {

--- a/src/test/java/io/github/ollama4j/unittests/TestMockedAPIs.java
+++ b/src/test/java/io/github/ollama4j/unittests/TestMockedAPIs.java
@ -96,9 +96,12 @@ class TestMockedAPIs {
        String model = OllamaModelType.LLAMA2;
        String prompt = "some prompt text";
        try {
-            when(ollamaAPI.generateEmbeddings(model, prompt)).thenReturn(new ArrayList<>());
-            ollamaAPI.generateEmbeddings(model, prompt);
-            verify(ollamaAPI, times(1)).generateEmbeddings(model, prompt);
+            OllamaEmbedRequestModel m = new OllamaEmbedRequestModel();
+            m.setModel(model);
+            m.setInput(List.of(prompt));
+            when(ollamaAPI.embed(m)).thenReturn(new OllamaEmbedResponseModel());
+            ollamaAPI.embed(m);
+            verify(ollamaAPI, times(1)).embed(m);
        } catch (IOException | OllamaBaseException | InterruptedException e) {
            throw new RuntimeException(e);
        }
@ -110,9 +113,10 @@ class TestMockedAPIs {
        String model = OllamaModelType.LLAMA2;
        List<String> inputs = List.of("some prompt text");
        try {
-            when(ollamaAPI.embed(model, inputs)).thenReturn(new OllamaEmbedResponseModel());
-            ollamaAPI.embed(model, inputs);
-            verify(ollamaAPI, times(1)).embed(model, inputs);
+            OllamaEmbedRequestModel m = new OllamaEmbedRequestModel(model, inputs);
+            when(ollamaAPI.embed(m)).thenReturn(new OllamaEmbedResponseModel());
+            ollamaAPI.embed(m);
+            verify(ollamaAPI, times(1)).embed(m);
        } catch (IOException | OllamaBaseException | InterruptedException e) {
            throw new RuntimeException(e);
        }
--- a/src/test/java/io/github/ollama4j/unittests/TestOllamaChatMessage.java
+++ b/src/test/java/io/github/ollama4j/unittests/TestOllamaChatMessage.java
@ -5,7 +5,8 @@ import io.github.ollama4j.models.chat.OllamaChatMessageRole;
 import org.json.JSONObject;
 import org.junit.jupiter.api.Test;

-import static org.junit.jupiter.api.Assertions.*;
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertTrue;

 class TestOllamaChatMessage {

--- a/src/test/java/io/github/ollama4j/unittests/TestToolsPromptBuilder.java
+++ b/src/test/java/io/github/ollama4j/unittests/TestToolsPromptBuilder.java
@ -7,7 +7,7 @@ import org.junit.jupiter.api.Test;
 import java.util.List;
 import java.util.Map;

-import static org.junit.jupiter.api.Assertions.*;
+import static org.junit.jupiter.api.Assertions.assertTrue;

 class TestToolsPromptBuilder {