Updated generateAsync() API

Signed-off-by: koujalgi.amith@gmail.com <koujalgi.amith@gmail.com>
This commit is contained in:
koujalgi.amith@gmail.com
2024-07-13 23:54:49 +05:30
parent af1b213a76
commit 383d0f56ca
10 changed files with 208 additions and 191 deletions

View File

@@ -360,15 +360,15 @@ public class OllamaAPI {
}
/**
* Convenience method to call Ollama API without streaming responses.
* Generates response using the specified AI model and prompt (in blocking mode).
* <p>
* Uses {@link #generate(String, String, boolean, Options, OllamaStreamHandler)}
*
* @param model Model to use
* @param prompt Prompt text
* @param model The name or identifier of the AI model to use for generating the response.
* @param prompt The input text or prompt to provide to the AI model.
* @param raw In some cases, you may wish to bypass the templating system and provide a full prompt. In this case, you can use the raw parameter to disable templating. Also note that raw mode will not return a context.
* @param options Additional Options
* @return OllamaResult
* @param options Additional options or configurations to use when generating the response.
* @return {@link OllamaResult}
*/
public OllamaResult generate(String model, String prompt, boolean raw, Options options)
throws OllamaBaseException, IOException, InterruptedException {
@@ -376,6 +376,20 @@ public class OllamaAPI {
}
/**
* Generates response using the specified AI model and prompt (in blocking mode), and then invokes a set of tools
* on the generated response.
*
* @param model The name or identifier of the AI model to use for generating the response.
* @param prompt The input text or prompt to provide to the AI model.
* @param raw In some cases, you may wish to bypass the templating system and provide a full prompt. In this case, you can use the raw parameter to disable templating. Also note that raw mode will not return a context.
* @param options Additional options or configurations to use when generating the response.
* @return {@link OllamaToolsResult} An OllamaToolsResult object containing the response from the AI model and the results of invoking the tools on that output.
* @throws OllamaBaseException If there is an error related to the Ollama API or service.
* @throws IOException If there is an error related to input/output operations.
* @throws InterruptedException If the method is interrupted while waiting for the AI model
* to generate the response or for the tools to be invoked.
*/
public OllamaToolsResult generateWithTools(String model, String prompt, boolean raw, Options options)
throws OllamaBaseException, IOException, InterruptedException {
OllamaToolsResult toolResult = new OllamaToolsResult();
@@ -402,15 +416,15 @@ public class OllamaAPI {
* @param prompt the prompt/question text
* @return the ollama async result callback handle
*/
public OllamaAsyncResultCallback generateAsync(String model, String prompt, boolean raw) {
public OllamaAsyncResultStreamer generateAsync(String model, String prompt, boolean raw) {
OllamaGenerateRequestModel ollamaRequestModel = new OllamaGenerateRequestModel(model, prompt);
ollamaRequestModel.setRaw(raw);
URI uri = URI.create(this.host + "/api/generate");
OllamaAsyncResultCallback ollamaAsyncResultCallback =
new OllamaAsyncResultCallback(
OllamaAsyncResultStreamer ollamaAsyncResultStreamer =
new OllamaAsyncResultStreamer(
getRequestBuilderDefault(uri), ollamaRequestModel, requestTimeoutSeconds);
ollamaAsyncResultCallback.start();
return ollamaAsyncResultCallback;
ollamaAsyncResultStreamer.start();
return ollamaAsyncResultStreamer;
}
/**
@@ -508,7 +522,7 @@ public class OllamaAPI {
* Hint: the OllamaChatRequestModel#getStream() property is not implemented.
*
* @param request request object to be sent to the server
* @return
* @return {@link OllamaChatResult}
* @throws OllamaBaseException any response code than 200 has been returned
* @throws IOException in case the responseStream can not be read
* @throws InterruptedException in case the server is not reachable or network issues happen
@@ -524,7 +538,7 @@ public class OllamaAPI {
*
* @param request request object to be sent to the server
* @param streamHandler callback handler to handle the last message from stream (caution: all previous messages from stream will be concatenated)
* @return
* @return {@link OllamaChatResult}
* @throws OllamaBaseException any response code than 200 has been returned
* @throws IOException in case the responseStream can not be read
* @throws InterruptedException in case the server is not reachable or network issues happen
@@ -541,6 +555,10 @@ public class OllamaAPI {
return new OllamaChatResult(result.getResponse(), result.getResponseTime(), result.getHttpStatusCode(), request.getMessages());
}
public void registerTool(MistralTools.ToolSpecification toolSpecification) {
ToolRegistry.addFunction(toolSpecification.getFunctionName(), toolSpecification.getToolDefinition());
}
// technical private methods //
private static String encodeFileToBase64(File file) throws IOException {
@@ -603,10 +621,6 @@ public class OllamaAPI {
}
public void registerTool(MistralTools.ToolSpecification toolSpecification) {
ToolRegistry.addFunction(toolSpecification.getFunctionName(), toolSpecification.getToolDefinition());
}
private Object invokeTool(ToolDef toolDef) {
try {
String methodName = toolDef.getName();

View File

@@ -0,0 +1,18 @@
package io.github.amithkoujalgi.ollama4j.core;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.Queue;
public class OllamaResultStream extends LinkedList<String> implements Queue<String> {
@Override
public String poll() {
StringBuilder tokens = new StringBuilder();
Iterator<String> iterator = this.listIterator();
while (iterator.hasNext()) {
tokens.append(iterator.next());
iterator.remove();
}
return tokens.toString();
}
}

View File

@@ -1,143 +0,0 @@
package io.github.amithkoujalgi.ollama4j.core.models;
import io.github.amithkoujalgi.ollama4j.core.exceptions.OllamaBaseException;
import io.github.amithkoujalgi.ollama4j.core.models.generate.OllamaGenerateRequestModel;
import io.github.amithkoujalgi.ollama4j.core.models.generate.OllamaGenerateResponseModel;
import io.github.amithkoujalgi.ollama4j.core.utils.Utils;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.http.HttpClient;
import java.net.http.HttpRequest;
import java.net.http.HttpResponse;
import java.nio.charset.StandardCharsets;
import java.time.Duration;
import java.util.LinkedList;
import java.util.Queue;
import lombok.Data;
import lombok.EqualsAndHashCode;
import lombok.Getter;
@Data
@EqualsAndHashCode(callSuper = true)
@SuppressWarnings("unused")
public class OllamaAsyncResultCallback extends Thread {
private final HttpRequest.Builder requestBuilder;
private final OllamaGenerateRequestModel ollamaRequestModel;
private final Queue<String> queue = new LinkedList<>();
private String result;
private boolean isDone;
/**
* -- GETTER -- Returns the status of the request. Indicates if the request was successful or a
* failure. If the request was a failure, the `getResponse()` method will return the error
* message.
*/
@Getter private boolean succeeded;
private long requestTimeoutSeconds;
/**
* -- GETTER -- Returns the HTTP response status code for the request that was made to Ollama
* server.
*/
@Getter private int httpStatusCode;
/** -- GETTER -- Returns the response time in milliseconds. */
@Getter private long responseTime = 0;
public OllamaAsyncResultCallback(
HttpRequest.Builder requestBuilder,
OllamaGenerateRequestModel ollamaRequestModel,
long requestTimeoutSeconds) {
this.requestBuilder = requestBuilder;
this.ollamaRequestModel = ollamaRequestModel;
this.isDone = false;
this.result = "";
this.queue.add("");
this.requestTimeoutSeconds = requestTimeoutSeconds;
}
@Override
public void run() {
HttpClient httpClient = HttpClient.newHttpClient();
try {
long startTime = System.currentTimeMillis();
HttpRequest request =
requestBuilder
.POST(
HttpRequest.BodyPublishers.ofString(
Utils.getObjectMapper().writeValueAsString(ollamaRequestModel)))
.header("Content-Type", "application/json")
.timeout(Duration.ofSeconds(requestTimeoutSeconds))
.build();
HttpResponse<InputStream> response =
httpClient.send(request, HttpResponse.BodyHandlers.ofInputStream());
int statusCode = response.statusCode();
this.httpStatusCode = statusCode;
InputStream responseBodyStream = response.body();
try (BufferedReader reader =
new BufferedReader(new InputStreamReader(responseBodyStream, StandardCharsets.UTF_8))) {
String line;
StringBuilder responseBuffer = new StringBuilder();
while ((line = reader.readLine()) != null) {
if (statusCode == 404) {
OllamaErrorResponseModel ollamaResponseModel =
Utils.getObjectMapper().readValue(line, OllamaErrorResponseModel.class);
queue.add(ollamaResponseModel.getError());
responseBuffer.append(ollamaResponseModel.getError());
} else {
OllamaGenerateResponseModel ollamaResponseModel =
Utils.getObjectMapper().readValue(line, OllamaGenerateResponseModel.class);
queue.add(ollamaResponseModel.getResponse());
if (!ollamaResponseModel.isDone()) {
responseBuffer.append(ollamaResponseModel.getResponse());
}
}
}
this.isDone = true;
this.succeeded = true;
this.result = responseBuffer.toString();
long endTime = System.currentTimeMillis();
responseTime = endTime - startTime;
}
if (statusCode != 200) {
throw new OllamaBaseException(this.result);
}
} catch (IOException | InterruptedException | OllamaBaseException e) {
this.isDone = true;
this.succeeded = false;
this.result = "[FAILED] " + e.getMessage();
}
}
/**
* Returns the status of the thread. This does not indicate that the request was successful or a
* failure, rather it is just a status flag to indicate if the thread is active or ended.
*
* @return boolean - status
*/
public boolean isComplete() {
return isDone;
}
/**
* Returns the final completion/response when the execution completes. Does not return intermediate results.
*
* @return String completion/response text
*/
public String getResponse() {
return result;
}
public Queue<String> getStream() {
return queue;
}
public void setRequestTimeoutSeconds(long requestTimeoutSeconds) {
this.requestTimeoutSeconds = requestTimeoutSeconds;
}
}

View File

@@ -0,0 +1,124 @@
package io.github.amithkoujalgi.ollama4j.core.models;
import io.github.amithkoujalgi.ollama4j.core.OllamaResultStream;
import io.github.amithkoujalgi.ollama4j.core.exceptions.OllamaBaseException;
import io.github.amithkoujalgi.ollama4j.core.models.generate.OllamaGenerateRequestModel;
import io.github.amithkoujalgi.ollama4j.core.models.generate.OllamaGenerateResponseModel;
import io.github.amithkoujalgi.ollama4j.core.utils.Utils;
import lombok.Data;
import lombok.EqualsAndHashCode;
import lombok.Getter;
import lombok.Setter;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.http.HttpClient;
import java.net.http.HttpRequest;
import java.net.http.HttpResponse;
import java.nio.charset.StandardCharsets;
import java.time.Duration;
@Data
@EqualsAndHashCode(callSuper = true)
@SuppressWarnings("unused")
public class OllamaAsyncResultStreamer extends Thread {
private final HttpRequest.Builder requestBuilder;
private final OllamaGenerateRequestModel ollamaRequestModel;
private final OllamaResultStream stream = new OllamaResultStream();
private String completeResponse;
/**
* -- GETTER -- Returns the status of the request. Indicates if the request was successful or a
* failure. If the request was a failure, the `getResponse()` method will return the error
* message.
*/
@Getter
private boolean succeeded;
@Setter
private long requestTimeoutSeconds;
/**
* -- GETTER -- Returns the HTTP response status code for the request that was made to Ollama
* server.
*/
@Getter
private int httpStatusCode;
/**
* -- GETTER -- Returns the response time in milliseconds.
*/
@Getter
private long responseTime = 0;
public OllamaAsyncResultStreamer(
HttpRequest.Builder requestBuilder,
OllamaGenerateRequestModel ollamaRequestModel,
long requestTimeoutSeconds) {
this.requestBuilder = requestBuilder;
this.ollamaRequestModel = ollamaRequestModel;
this.completeResponse = "";
this.stream.add("");
this.requestTimeoutSeconds = requestTimeoutSeconds;
}
@Override
public void run() {
ollamaRequestModel.setStream(true);
HttpClient httpClient = HttpClient.newHttpClient();
try {
long startTime = System.currentTimeMillis();
HttpRequest request =
requestBuilder
.POST(
HttpRequest.BodyPublishers.ofString(
Utils.getObjectMapper().writeValueAsString(ollamaRequestModel)))
.header("Content-Type", "application/json")
.timeout(Duration.ofSeconds(requestTimeoutSeconds))
.build();
HttpResponse<InputStream> response =
httpClient.send(request, HttpResponse.BodyHandlers.ofInputStream());
int statusCode = response.statusCode();
this.httpStatusCode = statusCode;
InputStream responseBodyStream = response.body();
try (BufferedReader reader =
new BufferedReader(new InputStreamReader(responseBodyStream, StandardCharsets.UTF_8))) {
String line;
StringBuilder responseBuffer = new StringBuilder();
while ((line = reader.readLine()) != null) {
if (statusCode == 404) {
OllamaErrorResponseModel ollamaResponseModel =
Utils.getObjectMapper().readValue(line, OllamaErrorResponseModel.class);
stream.add(ollamaResponseModel.getError());
responseBuffer.append(ollamaResponseModel.getError());
} else {
OllamaGenerateResponseModel ollamaResponseModel =
Utils.getObjectMapper().readValue(line, OllamaGenerateResponseModel.class);
String res = ollamaResponseModel.getResponse();
stream.add(res);
if (!ollamaResponseModel.isDone()) {
responseBuffer.append(res);
}
}
}
this.succeeded = true;
this.completeResponse = responseBuffer.toString();
long endTime = System.currentTimeMillis();
responseTime = endTime - startTime;
}
if (statusCode != 200) {
throw new OllamaBaseException(this.completeResponse);
}
} catch (IOException | InterruptedException | OllamaBaseException e) {
this.succeeded = false;
this.completeResponse = "[FAILED] " + e.getMessage();
}
}
}

View File

@@ -3,7 +3,7 @@ package io.github.amithkoujalgi.ollama4j.unittests;
import io.github.amithkoujalgi.ollama4j.core.OllamaAPI;
import io.github.amithkoujalgi.ollama4j.core.exceptions.OllamaBaseException;
import io.github.amithkoujalgi.ollama4j.core.models.ModelDetail;
import io.github.amithkoujalgi.ollama4j.core.models.OllamaAsyncResultCallback;
import io.github.amithkoujalgi.ollama4j.core.models.OllamaAsyncResultStreamer;
import io.github.amithkoujalgi.ollama4j.core.models.OllamaResult;
import io.github.amithkoujalgi.ollama4j.core.types.OllamaModelType;
import io.github.amithkoujalgi.ollama4j.core.utils.OptionsBuilder;
@@ -157,7 +157,7 @@ class TestMockedAPIs {
String model = OllamaModelType.LLAMA2;
String prompt = "some prompt text";
when(ollamaAPI.generateAsync(model, prompt, false))
.thenReturn(new OllamaAsyncResultCallback(null, null, 3));
.thenReturn(new OllamaAsyncResultStreamer(null, null, 3));
ollamaAPI.generateAsync(model, prompt, false);
verify(ollamaAPI, times(1)).generateAsync(model, prompt, false);
}