mirror of
https://github.com/amithkoujalgi/ollama4j.git
synced 2025-09-16 03:39:05 +02:00
Enhance OllamaAPI with improved timeout and retry mechanisms
- Updated request timeout default to 10 seconds for API calls. - Added verbose logging option with default set to true. - Introduced maxChatToolCallRetries to control retry attempts during chat interactions. - Implemented numberOfRetriesForModelPull with exponential backoff for model retrieval failures. - Refactored pullModel method to include retry logic and improved error handling.
This commit is contained in:
parent
ab9b95dbed
commit
ad1bf658a9
@ -56,33 +56,50 @@ import java.util.stream.Collectors;
|
|||||||
public class OllamaAPI {
|
public class OllamaAPI {
|
||||||
|
|
||||||
private static final Logger logger = LoggerFactory.getLogger(OllamaAPI.class);
|
private static final Logger logger = LoggerFactory.getLogger(OllamaAPI.class);
|
||||||
|
|
||||||
private final String host;
|
private final String host;
|
||||||
|
private Auth auth;
|
||||||
|
private final ToolRegistry toolRegistry = new ToolRegistry();
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* -- SETTER --
|
* The request timeout in seconds for API calls.
|
||||||
* Set request timeout in seconds. Default is 3 seconds.
|
* <p>
|
||||||
|
* Default is 10 seconds. This value determines how long the client will wait for a response
|
||||||
|
* from the Ollama server before timing out.
|
||||||
*/
|
*/
|
||||||
@Setter
|
@Setter
|
||||||
private long requestTimeoutSeconds = 10;
|
private long requestTimeoutSeconds = 10;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* -- SETTER --
|
* Enables or disables verbose logging of responses.
|
||||||
* Set/unset logging of responses
|
* <p>
|
||||||
|
* If set to {@code true}, the API will log detailed information about requests and responses.
|
||||||
|
* Default is {@code true}.
|
||||||
*/
|
*/
|
||||||
@Setter
|
@Setter
|
||||||
private boolean verbose = true;
|
private boolean verbose = true;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The maximum number of retries for tool calls during chat interactions.
|
||||||
|
* <p>
|
||||||
|
* This value controls how many times the API will attempt to call a tool in the event of a failure.
|
||||||
|
* Default is 3.
|
||||||
|
*/
|
||||||
@Setter
|
@Setter
|
||||||
private int maxChatToolCallRetries = 3;
|
private int maxChatToolCallRetries = 3;
|
||||||
|
|
||||||
private Auth auth;
|
/**
|
||||||
|
* The number of retries to attempt when pulling a model from the Ollama server.
|
||||||
|
* <p>
|
||||||
|
* If set to 0, no retries will be performed. If greater than 0, the API will retry pulling the model
|
||||||
|
* up to the specified number of times in case of failure.
|
||||||
|
* <p>
|
||||||
|
* Default is 0 (no retries).
|
||||||
|
*/
|
||||||
|
@Setter
|
||||||
|
@SuppressWarnings({"FieldMayBeFinal", "FieldCanBeLocal"})
|
||||||
private int numberOfRetriesForModelPull = 0;
|
private int numberOfRetriesForModelPull = 0;
|
||||||
|
|
||||||
public void setNumberOfRetriesForModelPull(int numberOfRetriesForModelPull) {
|
|
||||||
this.numberOfRetriesForModelPull = numberOfRetriesForModelPull;
|
|
||||||
}
|
|
||||||
|
|
||||||
private final ToolRegistry toolRegistry = new ToolRegistry();
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Instantiates the Ollama API with default Ollama host:
|
* Instantiates the Ollama API with default Ollama host:
|
||||||
* <a href="http://localhost:11434">http://localhost:11434</a>
|
* <a href="http://localhost:11434">http://localhost:11434</a>
|
||||||
@ -350,36 +367,58 @@ public class OllamaAPI {
|
|||||||
List<LibraryModel> libraryModels = this.listModelsFromLibrary();
|
List<LibraryModel> libraryModels = this.listModelsFromLibrary();
|
||||||
LibraryModel libraryModel = libraryModels.stream().filter(model -> model.getName().equals(modelName)).findFirst().orElseThrow(() -> new NoSuchElementException(String.format("Model by name '%s' not found", modelName)));
|
LibraryModel libraryModel = libraryModels.stream().filter(model -> model.getName().equals(modelName)).findFirst().orElseThrow(() -> new NoSuchElementException(String.format("Model by name '%s' not found", modelName)));
|
||||||
LibraryModelDetail libraryModelDetail = this.getLibraryModelDetails(libraryModel);
|
LibraryModelDetail libraryModelDetail = this.getLibraryModelDetails(libraryModel);
|
||||||
LibraryModelTag libraryModelTag = libraryModelDetail.getTags().stream().filter(tagName -> tagName.getTag().equals(tag)).findFirst().orElseThrow(() -> new NoSuchElementException(String.format("Tag '%s' for model '%s' not found", tag, modelName)));
|
return libraryModelDetail.getTags().stream().filter(tagName -> tagName.getTag().equals(tag)).findFirst().orElseThrow(() -> new NoSuchElementException(String.format("Tag '%s' for model '%s' not found", tag, modelName)));
|
||||||
return libraryModelTag;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Pull a model on the Ollama server from the list of <a
|
* Pull a model on the Ollama server from the list of <a
|
||||||
* href="https://ollama.ai/library">available models</a>.
|
* href="https://ollama.ai/library">available models</a>.
|
||||||
|
* <p>
|
||||||
|
* If {@code numberOfRetriesForModelPull} is greater than 0, this method will retry pulling the model
|
||||||
|
* up to the specified number of times if an {@link OllamaBaseException} occurs, using exponential backoff
|
||||||
|
* between retries (delay doubles after each failed attempt, starting at 1 second).
|
||||||
|
* <p>
|
||||||
|
* The backoff is only applied between retries, not after the final attempt.
|
||||||
*
|
*
|
||||||
* @param modelName the name of the model
|
* @param modelName the name of the model
|
||||||
* @throws OllamaBaseException if the response indicates an error status
|
* @throws OllamaBaseException if the response indicates an error status or all retries fail
|
||||||
* @throws IOException if an I/O error occurs during the HTTP request
|
* @throws IOException if an I/O error occurs during the HTTP request
|
||||||
* @throws InterruptedException if the operation is interrupted
|
* @throws InterruptedException if the operation is interrupted or the thread is interrupted during backoff
|
||||||
* @throws URISyntaxException if the URI for the request is malformed
|
* @throws URISyntaxException if the URI for the request is malformed
|
||||||
*/
|
*/
|
||||||
public void pullModel(String modelName) throws OllamaBaseException, IOException, URISyntaxException, InterruptedException {
|
public void pullModel(String modelName) throws OllamaBaseException, IOException, URISyntaxException, InterruptedException {
|
||||||
if (numberOfRetriesForModelPull == 0) {
|
if (numberOfRetriesForModelPull == 0) {
|
||||||
this.doPullModel(modelName);
|
this.doPullModel(modelName);
|
||||||
} else {
|
return;
|
||||||
|
}
|
||||||
int numberOfRetries = 0;
|
int numberOfRetries = 0;
|
||||||
|
long baseDelayMillis = 1000L; // 1 second base delay
|
||||||
while (numberOfRetries < numberOfRetriesForModelPull) {
|
while (numberOfRetries < numberOfRetriesForModelPull) {
|
||||||
try {
|
try {
|
||||||
this.doPullModel(modelName);
|
this.doPullModel(modelName);
|
||||||
return;
|
return;
|
||||||
} catch (OllamaBaseException e) {
|
} catch (OllamaBaseException e) {
|
||||||
logger.error("Failed to pull model " + modelName + ", retrying...");
|
handlePullRetry(modelName, numberOfRetries, numberOfRetriesForModelPull, baseDelayMillis);
|
||||||
numberOfRetries++;
|
numberOfRetries++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
throw new OllamaBaseException("Failed to pull model " + modelName + " after " + numberOfRetriesForModelPull + " retries");
|
throw new OllamaBaseException("Failed to pull model " + modelName + " after " + numberOfRetriesForModelPull + " retries");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Handles retry logic for pullModel, including logging and backoff.
|
||||||
|
*/
|
||||||
|
private void handlePullRetry(String modelName, int currentRetry, int maxRetries, long baseDelayMillis) throws InterruptedException {
|
||||||
|
logger.error("Failed to pull model {}, retrying... (attempt {}/{})", modelName, currentRetry + 1, maxRetries);
|
||||||
|
if (currentRetry + 1 < maxRetries) {
|
||||||
|
long backoffMillis = baseDelayMillis * (1L << currentRetry);
|
||||||
|
try {
|
||||||
|
Thread.sleep(backoffMillis);
|
||||||
|
} catch (InterruptedException ie) {
|
||||||
|
Thread.currentThread().interrupt();
|
||||||
|
throw ie;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private void doPullModel(String modelName) throws OllamaBaseException, IOException, URISyntaxException, InterruptedException {
|
private void doPullModel(String modelName) throws OllamaBaseException, IOException, URISyntaxException, InterruptedException {
|
||||||
|
Loading…
x
Reference in New Issue
Block a user