Add Prometheus metrics integration and refactor API error handling

Introduces Prometheus metrics support with a new MetricsRecorder and documentation (METRICS.md). Refactors OllamaAPI methods to improve error handling, reduce checked exceptions, and record metrics for API calls. Updates dependencies in pom.xml to include Prometheus and Guava. Adds MetricsRecorder class and updates tests for metrics integration.
This commit is contained in:
amithkoujalgi
2025-09-23 16:51:26 +05:30
parent a9f6d4671c
commit 827bedb696
9 changed files with 1120 additions and 511 deletions

File diff suppressed because it is too large Load Diff

View File

@@ -10,7 +10,11 @@ package io.github.ollama4j.exceptions;
public class OllamaBaseException extends Exception {
public OllamaBaseException(String s) {
super(s);
public OllamaBaseException(String message) {
super(message);
}
public OllamaBaseException(String message, Exception exception) {
super(message, exception);
}
}

View File

@@ -0,0 +1,127 @@
/*
* Ollama4j - Java library for interacting with Ollama server.
* Copyright (c) 2025 Amith Koujalgi and contributors.
*
* Licensed under the MIT License (the "License");
* you may not use this file except in compliance with the License.
*
*/
package io.github.ollama4j.metrics;
import com.google.common.base.Throwables;
import io.prometheus.client.Counter;
import io.prometheus.client.Histogram;
import java.util.Map;
public class MetricsRecorder {
private static final Counter requests =
Counter.build()
.name("ollama_api_requests_total")
.help("Total requests to Ollama API")
.labelNames(
"endpoint",
"status",
"model",
"raw",
"streaming",
"format",
"thinking",
"http_status",
"options")
.register();
private static final Histogram requestLatency =
Histogram.build()
.name("ollama_api_request_duration_seconds")
.help("Request latency in seconds")
.labelNames(
"endpoint",
"model",
"raw",
"streaming",
"format",
"thinking",
"http_status",
"options")
.register();
private static final Histogram responseSize =
Histogram.build()
.name("ollama_api_response_size_bytes")
.help("Response size in bytes")
.labelNames("endpoint", "model", "options") // Added "options"
.register();
public static void record(
String endpoint,
String model,
boolean raw,
boolean thinking,
boolean streaming,
Map<String, Object> options,
Object format,
long startTime,
int responseHttpStatus,
Object response) {
long endTime = System.currentTimeMillis();
String httpStatus = String.valueOf(responseHttpStatus);
String formatString = "";
if (format instanceof String) {
formatString = (String) format;
} else if (format instanceof Map) {
formatString = mapToString((Map<String, Object>) format);
} else if (format != null) {
formatString = format.toString();
}
requests.labels(
endpoint,
"success",
safe(model),
String.valueOf(raw),
String.valueOf(streaming),
String.valueOf(thinking),
httpStatus,
safe(mapToString(options)),
safe(formatString))
.inc();
double durationSeconds = (endTime - startTime) / 1000.0;
requestLatency
.labels(
endpoint,
safe(model),
String.valueOf(raw),
String.valueOf(streaming),
String.valueOf(thinking),
httpStatus,
safe(mapToString(options)),
safe(formatString))
.observe(durationSeconds);
// Record response size (only if response is a string or json-like object)
if (response != null) {
if (response instanceof Exception) {
response = Throwables.getStackTraceAsString((Throwable) response);
}
int size = response.toString().length();
responseSize.labels(endpoint, safe(model), safe(mapToString(options))).observe(size);
}
}
// Utility method to convert options Map to string (you can adjust this for more detailed
// representation)
private static String mapToString(Map<String, Object> map) {
if (map == null || map.isEmpty()) {
return "none";
}
// Convert the map to a string (can be customized to fit the use case)
return map.toString();
}
private static String safe(String value) {
return (value == null || value.isEmpty()) ? "none" : value;
}
}

View File

@@ -11,6 +11,7 @@ package io.github.ollama4j.models.request;
import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.core.type.TypeReference;
import io.github.ollama4j.exceptions.OllamaBaseException;
import io.github.ollama4j.metrics.MetricsRecorder;
import io.github.ollama4j.models.chat.*;
import io.github.ollama4j.models.chat.OllamaChatTokenHandler;
import io.github.ollama4j.models.response.OllamaErrorResponse;
@@ -94,6 +95,7 @@ public class OllamaChatEndpointCaller extends OllamaEndpointCaller {
public OllamaChatResult callSync(OllamaChatRequest body)
throws OllamaBaseException, IOException, InterruptedException {
long startTime = System.currentTimeMillis();
HttpClient httpClient = HttpClient.newHttpClient();
URI uri = URI.create(getHost() + getEndpointSuffix());
HttpRequest.Builder requestBuilder =
@@ -133,6 +135,17 @@ public class OllamaChatEndpointCaller extends OllamaEndpointCaller {
}
}
}
MetricsRecorder.record(
getEndpointSuffix(),
body.getModel(),
false,
body.isThink(),
body.isStream(),
body.getOptions(),
body.getFormat(),
startTime,
statusCode,
responseBuffer);
if (statusCode != 200) {
LOG.error("Status code " + statusCode);
throw new OllamaBaseException(responseBuffer.toString());

View File

@@ -916,7 +916,7 @@ class OllamaAPIIntegrationTest {
assertNotNull(result);
assertNotNull(result.getResponse());
assertFalse(result.getResponse().isEmpty());
} catch (IOException | OllamaBaseException | InterruptedException e) {
} catch (OllamaBaseException e) {
fail(e);
}
}

View File

@@ -26,8 +26,6 @@ import io.github.ollama4j.models.response.OllamaResult;
import io.github.ollama4j.tools.Tools;
import io.github.ollama4j.tools.sampletools.WeatherTool;
import io.github.ollama4j.utils.OptionsBuilder;
import java.io.IOException;
import java.net.URISyntaxException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
@@ -43,7 +41,7 @@ class TestMockedAPIs {
doNothing().when(ollamaAPI).pullModel(model);
ollamaAPI.pullModel(model);
verify(ollamaAPI, times(1)).pullModel(model);
} catch (IOException | OllamaBaseException | InterruptedException | URISyntaxException e) {
} catch (OllamaBaseException e) {
throw new RuntimeException(e);
}
}
@@ -55,7 +53,7 @@ class TestMockedAPIs {
when(ollamaAPI.listModels()).thenReturn(new ArrayList<>());
ollamaAPI.listModels();
verify(ollamaAPI, times(1)).listModels();
} catch (IOException | OllamaBaseException | InterruptedException | URISyntaxException e) {
} catch (OllamaBaseException e) {
throw new RuntimeException(e);
}
}
@@ -73,7 +71,7 @@ class TestMockedAPIs {
doNothing().when(ollamaAPI).createModel(customModelRequest);
ollamaAPI.createModel(customModelRequest);
verify(ollamaAPI, times(1)).createModel(customModelRequest);
} catch (IOException | OllamaBaseException | InterruptedException | URISyntaxException e) {
} catch (OllamaBaseException e) {
throw new RuntimeException(e);
}
}
@@ -86,7 +84,7 @@ class TestMockedAPIs {
doNothing().when(ollamaAPI).deleteModel(model, true);
ollamaAPI.deleteModel(model, true);
verify(ollamaAPI, times(1)).deleteModel(model, true);
} catch (IOException | OllamaBaseException | InterruptedException | URISyntaxException e) {
} catch (OllamaBaseException e) {
throw new RuntimeException(e);
}
}
@@ -113,7 +111,7 @@ class TestMockedAPIs {
when(ollamaAPI.getModelDetails(model)).thenReturn(new ModelDetail());
ollamaAPI.getModelDetails(model);
verify(ollamaAPI, times(1)).getModelDetails(model);
} catch (IOException | OllamaBaseException | InterruptedException | URISyntaxException e) {
} catch (OllamaBaseException e) {
throw new RuntimeException(e);
}
}
@@ -130,7 +128,7 @@ class TestMockedAPIs {
when(ollamaAPI.embed(m)).thenReturn(new OllamaEmbedResponseModel());
ollamaAPI.embed(m);
verify(ollamaAPI, times(1)).embed(m);
} catch (IOException | OllamaBaseException | InterruptedException e) {
} catch (OllamaBaseException e) {
throw new RuntimeException(e);
}
}
@@ -145,7 +143,7 @@ class TestMockedAPIs {
when(ollamaAPI.embed(m)).thenReturn(new OllamaEmbedResponseModel());
ollamaAPI.embed(m);
verify(ollamaAPI, times(1)).embed(m);
} catch (IOException | OllamaBaseException | InterruptedException e) {
} catch (OllamaBaseException e) {
throw new RuntimeException(e);
}
}
@@ -160,7 +158,7 @@ class TestMockedAPIs {
.thenReturn(new OllamaEmbedResponseModel());
ollamaAPI.embed(new OllamaEmbedRequestModel(model, inputs));
verify(ollamaAPI, times(1)).embed(new OllamaEmbedRequestModel(model, inputs));
} catch (IOException | OllamaBaseException | InterruptedException e) {
} catch (OllamaBaseException e) {
throw new RuntimeException(e);
}
}
@@ -178,7 +176,7 @@ class TestMockedAPIs {
ollamaAPI.generate(model, prompt, false, false, optionsBuilder.build(), observer);
verify(ollamaAPI, times(1))
.generate(model, prompt, false, false, optionsBuilder.build(), observer);
} catch (IOException | OllamaBaseException | InterruptedException e) {
} catch (OllamaBaseException e) {
throw new RuntimeException(e);
}
}
@@ -246,13 +244,13 @@ class TestMockedAPIs {
new OptionsBuilder().build(),
null,
null);
} catch (IOException | OllamaBaseException | InterruptedException | URISyntaxException e) {
} catch (OllamaBaseException e) {
throw new RuntimeException(e);
}
}
@Test
void testAskAsync() {
void testAskAsync() throws OllamaBaseException {
OllamaAPI ollamaAPI = Mockito.mock(OllamaAPI.class);
String model = "llama2";
String prompt = "some prompt text";