2025-10-14 17:38:58 +02:00
164 changed files with 8691 additions and 13987 deletions
--- a/.github/CODEOWNERS
+++ b/.github/CODEOWNERS
@ -1,10 +0,0 @@
 # See https://docs.github.com/repositories/managing-your-repositorys-settings-and-features/customizing-your-repository/about-code-owners
 # Default owners for everything in the repo
 * @amithkoujalgi
 # Example for scoping ownership (uncomment and adjust as teams evolve)
 # /docs/ @amithkoujalgi
 # /src/ @amithkoujalgi
--- a/.github/ISSUE_TEMPLATE/bug_report.yml
+++ b/.github/ISSUE_TEMPLATE/bug_report.yml
@ -1,59 +0,0 @@
 name: Bug report
 description: File a bug report
 labels: [bug]
 assignees: []
 body:
  - type: markdown
    attributes:
      value: |
        Thanks for taking the time to fill out this bug report!
  - type: input
    id: version
    attributes:
      label: ollama4j version
      description: e.g., 1.1.0
      placeholder: 1.1.0
    validations:
      required: true
  - type: input
    id: java
    attributes:
      label: Java version
      description: Output of `java -version`
      placeholder: 11/17/21
    validations:
      required: true
  - type: input
    id: environment
    attributes:
      label: Environment
      description: OS, build tool, Docker/Testcontainers, etc.
      placeholder: macOS 13, Maven 3.9.x, Docker 24.x
  - type: textarea
    id: what-happened
    attributes:
      label: What happened?
      description: Also tell us what you expected to happen
    validations:
      required: true
  - type: textarea
    id: steps
    attributes:
      label: Steps to reproduce
      description: Be as specific as possible
      placeholder: |
        1. Setup ...
        2. Run ...
        3. Observe ...
    validations:
      required: true
  - type: textarea
    id: logs
    attributes:
      label: Relevant logs/stack traces
      render: shell
  - type: textarea
    id: additional
    attributes:
      label: Additional context
--- a/.github/ISSUE_TEMPLATE/config.yml
+++ b/.github/ISSUE_TEMPLATE/config.yml
@ -1,6 +0,0 @@
 blank_issues_enabled: false
 contact_links:
  - name: Questions / Discussions
    url: https://github.com/ollama4j/ollama4j/discussions
    about: Ask questions and discuss ideas here
--- a/.github/ISSUE_TEMPLATE/feature_request.yml
+++ b/.github/ISSUE_TEMPLATE/feature_request.yml
@ -1,31 +0,0 @@
 name: Feature request
 description: Suggest an idea or enhancement
 labels: [enhancement]
 assignees: []
 body:
  - type: markdown
    attributes:
      value: |
        Thanks for suggesting an improvement!
  - type: textarea
    id: problem
    attributes:
      label: Is your feature request related to a problem?
      description: A clear and concise description of the problem
      placeholder: I'm frustrated when...
  - type: textarea
    id: solution
    attributes:
      label: Describe the solution you'd like
      placeholder: I'd like...
    validations:
      required: true
  - type: textarea
    id: alternatives
    attributes:
      label: Describe alternatives you've considered
  - type: textarea
    id: context
    attributes:
      label: Additional context
--- a/.github/PULL_REQUEST_TEMPLATE.md
+++ b/.github/PULL_REQUEST_TEMPLATE.md
@ -1,34 +0,0 @@
 ## Description
 Describe what this PR does and why.
 ## Type of change
 - [ ] feat: New feature
 - [ ] fix: Bug fix
 - [ ] docs: Documentation update
 - [ ] refactor: Refactoring
 - [ ] test: Tests only
 - [ ] build/ci: Build or CI changes
 ## How has this been tested?
 Explain the testing done. Include commands, screenshots, logs.
 ## Checklist
 - [ ] I ran `pre-commit run -a` locally
 - [ ] `make build` succeeds locally
 - [ ] Unit/integration tests added or updated as needed
 - [ ] Docs updated (README/docs site) if user-facing changes
 - [ ] PR title follows Conventional Commits
 ## Breaking changes
 List any breaking changes and migration notes.
 ## Related issues
 Fixes #
--- a/.github/dependabot.yml
+++ b/.github/dependabot.yml
@ -1,34 +0,0 @@
 # To get started with Dependabot version updates, you'll need to specify which
 ## package ecosystems to update and where the package manifests are located.
 ## Please see the documentation for all configuration options:
 ## https://docs.github.com/code-security/dependabot/dependabot-version-updates/configuration-options-for-the-dependabot.yml-file
 #
 #version: 2
 #updates:
 #  - package-ecosystem: "" # See documentation for possible values
 #    directory: "/" # Location of package manifests
 #    schedule:
 #      interval: "weekly"
 version: 2
 updates:
  - package-ecosystem: "maven"
    directory: "/"
    schedule:
      interval: "weekly"
    open-pull-requests-limit: 5
    labels: ["dependencies"]
  - package-ecosystem: "github-actions"
    directory: "/"
    schedule:
      interval: "weekly"
    open-pull-requests-limit: 5
    labels: ["dependencies"]
  - package-ecosystem: "npm"
    directory: "/docs"
    schedule:
      interval: "weekly"
    open-pull-requests-limit: 5
    labels: ["dependencies"]
 #
--- a/.github/workflows/build-on-pull-request.yml
+++ b/.github/workflows/build-on-pull-request.yml
@ -20,17 +20,13 @@ jobs:
    permissions:
      contents: read
    environment:
      name: github-pages
      url: ${{ steps.deployment.outputs.page_url }}
    steps:
-      - uses: actions/checkout@v5
+      - uses: actions/checkout@v3
-      - name: Set up JDK 21
+      - name: Set up JDK 11
-        uses: actions/setup-java@v5
+        uses: actions/setup-java@v3
        with:
-          java-version: '21'
+          java-version: '11'
-          distribution: 'oracle'
+          distribution: 'adopt-hotspot'
          server-id: github
          settings-path: ${{ github.workspace }}
@ -50,9 +46,9 @@ jobs:
    runs-on: ubuntu-latest
    steps:
-      - uses: actions/checkout@v5
+      - uses: actions/checkout@v3
      - name: Use Node.js
-        uses: actions/setup-node@v5
+        uses: actions/setup-node@v3
        with:
          node-version: '20.x'
      - run: cd docs && npm ci
--- a/.github/workflows/codeql.yml
+++ b/.github/workflows/codeql.yml
@ -1,44 +0,0 @@
 name: CodeQL
 on:
  push:
    branches: [ main ]
  pull_request:
    branches: [ main ]
  schedule:
    - cron: '0 3 * * 1'
 jobs:
  analyze:
    name: Analyze
    runs-on: ubuntu-latest
    permissions:
      actions: read
      contents: read
      security-events: write
    strategy:
      fail-fast: false
      matrix:
        language: [ 'java', 'javascript' ]
    steps:
      - name: Checkout repository
        uses: actions/checkout@v5
      - name: Set up JDK
        if: matrix.language == 'java'
        uses: actions/setup-java@v5
        with:
          distribution: oracle
          java-version: '21'
      - name: Initialize CodeQL
        uses: github/codeql-action/init@v3
        with:
          languages: ${{ matrix.language }}
      - name: Autobuild
        uses: github/codeql-action/autobuild@v3
      - name: Perform CodeQL Analysis
        uses: github/codeql-action/analyze@v3
--- a/.github/workflows/gh-mvn-publish.yml
+++ b/.github/workflows/gh-mvn-publish.yml
@ -13,12 +13,12 @@ jobs:
      packages: write
    steps:
-      - uses: actions/checkout@v5
+      - uses: actions/checkout@v3
-      - name: Set up JDK 21
+      - name: Set up JDK 17
-        uses: actions/setup-java@v5
+        uses: actions/setup-java@v3
        with:
-          java-version: '21'
+          java-version: '17'
-          distribution: 'oracle'
+          distribution: 'temurin'
          server-id: github
          settings-path: ${{ github.workspace }}
--- a/.github/workflows/label-issue-stale.yml
+++ b/.github/workflows/label-issue-stale.yml
@ -14,7 +14,7 @@ jobs:
    runs-on: ubuntu-latest
    steps:
      - name: Mark stale issues
-        uses: actions/stale@v10
+        uses: actions/stale@v8
        with:
          repo-token: ${{ github.token }}
          days-before-stale: 15
--- a/.github/workflows/maven-publish.yml
+++ b/.github/workflows/maven-publish.yml
@ -24,13 +24,13 @@ jobs:
      packages: write
    steps:
-      - uses: actions/checkout@v5
+      - uses: actions/checkout@v3
-      - name: Set up JDK 21
+      - name: Set up JDK 17
-        uses: actions/setup-java@v5
+        uses: actions/setup-java@v3
        with:
-          java-version: '21'
+          java-version: '17'
-          distribution: 'oracle'
+          distribution: 'temurin'
          server-id: github # Value of the distributionManagement/repository/id field of the pom.xml
          settings-path: ${{ github.workspace }} # location for the settings.xml file
--- a/.github/workflows/pre-commit.yml
+++ b/.github/workflows/pre-commit.yml
@ -1,30 +0,0 @@
 name: Pre-commit Check on PR
 on:
  pull_request:
    types: [opened, reopened, synchronize]
    branches:
      - main
 #on:
 #  pull_request:
 #    branches: [ main ]
 #  push:
 #    branches: [ main ]
 jobs:
  run:
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v5
      - uses: actions/setup-python@v6
        with:
          python-version: '3.x'
      - name: Install pre-commit
        run: |
          python -m pip install --upgrade pip
          pip install pre-commit
      # - name: Run pre-commit
      #   run: |
      #     pre-commit run --all-files --show-diff-on-failure
--- a/.github/workflows/publish-docs.yml
+++ b/.github/workflows/publish-docs.yml
@ -29,18 +29,18 @@ jobs:
      name: github-pages
      url: ${{ steps.deployment.outputs.page_url }}
    steps:
-      - uses: actions/checkout@v5
+      - uses: actions/checkout@v3
-      - name: Set up JDK 21
+      - name: Set up JDK 11
-        uses: actions/setup-java@v5
+        uses: actions/setup-java@v3
        with:
-          java-version: '21'
+          java-version: '11'
-          distribution: 'oracle'
+          distribution: 'adopt-hotspot'
          server-id: github # Value of the distributionManagement/repository/id field of the pom.xml
          settings-path: ${{ github.workspace }} # location for the settings.xml file
-      - uses: actions/checkout@v5
+      - uses: actions/checkout@v4
      - name: Use Node.js
-        uses: actions/setup-node@v5
+        uses: actions/setup-node@v3
        with:
          node-version: '20.x'
      - run: cd docs && npm ci
@ -57,7 +57,7 @@ jobs:
        run: mvn --file pom.xml -U clean package && cp -r ./target/apidocs/. ./docs/build/apidocs
      - name: Doxygen Action
-        uses: mattnotmitt/doxygen-action@v1.12.0
+        uses: mattnotmitt/doxygen-action@v1.1.0
        with:
          doxyfile-path: "./Doxyfile"
          working-directory: "."
@ -65,7 +65,7 @@ jobs:
      - name: Setup Pages
        uses: actions/configure-pages@v5
      - name: Upload artifact
-        uses: actions/upload-pages-artifact@v4
+        uses: actions/upload-pages-artifact@v3
        with:
          # Upload entire repository
          path: './docs/build/.'
--- a/.github/workflows/run-tests.yml
+++ b/.github/workflows/run-tests.yml
@ -28,7 +28,7 @@ jobs:
    steps:
      - name: Checkout target branch
-        uses: actions/checkout@v5
+        uses: actions/checkout@v3
        with:
          ref: ${{ github.event.inputs.branch }}
@ -36,19 +36,19 @@ jobs:
        run: |
          curl -fsSL https://ollama.com/install.sh | sh
-      - name: Set up JDK 21
+      - name: Set up JDK 17
-        uses: actions/setup-java@v5
+        uses: actions/setup-java@v3
        with:
-          java-version: '21'
+          java-version: '17'
-          distribution: 'oracle'
+          distribution: 'temurin'
          server-id: github
          settings-path: ${{ github.workspace }}
      - name: Run unit tests
-        run: make unit-tests
+        run: mvn clean test -Punit-tests
      - name: Run integration tests
-        run: make integration-tests-basic
+        run: mvn clean verify -Pintegration-tests
        env:
          USE_EXTERNAL_OLLAMA_HOST: "true"
          OLLAMA_HOST: "http://localhost:11434"
--- a/.github/workflows/stale.yml
+++ b/.github/workflows/stale.yml
@ -1,33 +0,0 @@
 name: Mark stale issues and PRs
 on:
  schedule:
    - cron: '0 2 * * *'
 permissions:
  issues: write
  pull-requests: write
 jobs:
  stale:
    runs-on: ubuntu-latest
    steps:
      - uses: actions/stale@v10
        with:
          days-before-stale: 60
          days-before-close: 14
          stale-issue-label: 'stale'
          stale-pr-label: 'stale'
          exempt-issue-labels: 'pinned,security'
          exempt-pr-labels: 'pinned,security'
          stale-issue-message: >
            This issue has been automatically marked as stale because it has not had
            recent activity. It will be closed if no further activity occurs.
          close-issue-message: >
            Closing this stale issue. Feel free to reopen if this is still relevant.
          stale-pr-message: >
            This pull request has been automatically marked as stale due to inactivity.
            It will be closed if no further activity occurs.
          close-pr-message: >
            Closing this stale pull request. Please reopen when you're ready to continue.
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@ -21,19 +21,11 @@ repos:
  # for commit message formatting
  - repo: https://github.com/commitizen-tools/commitizen
-    rev: v4.9.1
+    rev: v4.8.3
    hooks:
      - id: commitizen
        stages: [commit-msg]
  - repo: local
    hooks:
      - id: format-code
        name: Format Code
        entry: make apply-formatting
        language: system
        always_run: true
 #  # for java code quality
 #  - repo: https://github.com/gherynos/pre-commit-java
 #    rev: v0.6.10
--- a/CITATION.cff
+++ b/CITATION.cff
@ -1,9 +0,0 @@
 cff-version: 1.2.0
 message: "If you use this software, please cite it as below."
 authors:
  - family-names: "Koujalgi"
    given-names: "Amith"
 title: "Ollama4j: A Java Library (Wrapper/Binding) for Ollama Server"
 version: "1.1.0"
 date-released: 2023-12-19
 url: "https://github.com/ollama4j/ollama4j"
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@ -1,125 +0,0 @@
 ## Contributing to Ollama4j
 Thanks for your interest in contributing! This guide explains how to set up your environment, make changes, and submit pull requests.
 ### Code of Conduct
 By participating, you agree to abide by our [Code of Conduct](CODE_OF_CONDUCT.md).
 ### Quick Start
 Prerequisites:
 - Java 11+
 - Maven 3.8+
 - Docker (required for integration tests)
 - Make (for convenience targets)
 - pre-commit (for Git hooks)
 Setup:
 ```bash
 # 1) Fork the repo and clone your fork
 git clone https://github.com/<your-username>/ollama4j.git
 cd ollama4j
 # 2) Install and enable git hooks
 pre-commit install --hook-type pre-commit --hook-type commit-msg
 # 3) Prepare dev environment (installs husk deps/tools if needed)
 make dev
 ```
 Build and test:
 ```bash
 # Build
 make build
 # Run unit tests
 make unit-tests
 # Run integration tests (requires Docker running)
 make integration-tests
 ```
 If you prefer raw Maven:
 ```bash
 # Unit tests profile
 mvn -P unit-tests clean test
 # Integration tests profile (Docker required)
 mvn -P integration-tests -DskipUnitTests=true clean verify
 ```
 ### Commit Style
 We use Conventional Commits. Commit messages and PR titles should follow:
 ```
 <type>(optional scope): <short summary>
 [optional body]
 [optional footer(s)]
 ```
 Common types: `feat`, `fix`, `docs`, `refactor`, `test`, `build`, `chore`.
 Commit message formatting is enforced via `commitizen` through `pre-commit` hooks.
 ### Pre-commit Hooks
 Before pushing, run:
 ```bash
 pre-commit run -a
 ```
 Hooks will check for merge conflicts, large files, YAML/XML/JSON validity, line endings, and basic formatting. Fix reported issues before opening a PR.
 ### Coding Guidelines
 - Target Java 11+; match existing style and formatting.
 - Prefer clear, descriptive names over abbreviations.
 - Add Javadoc for public APIs and non-obvious logic.
 - Include meaningful tests for new features and bug fixes.
 - Avoid introducing new dependencies without discussion.
 ### Tests
 - Unit tests: place under `src/test/java/**/unittests/`.
 - Integration tests: place under `src/test/java/**/integrationtests/` (uses Testcontainers; ensure Docker is running).
 ### Documentation
 - Update `README.md`, Javadoc, and `docs/` when you change public APIs or user-facing behavior.
 - Add example snippets where useful. Keep API references consistent with the website content when applicable.
 ### Pull Requests
 Before opening a PR:
 - Ensure `make build` and all tests pass locally.
 - Run `pre-commit run -a` and fix any issues.
 - Keep PRs focused and reasonably small. Link related issues (e.g., "Closes #123").
 - Describe the change, rationale, and any trade-offs in the PR description.
 Review process:
 - Maintainers will review for correctness, scope, tests, and docs.
 - You may be asked to iterate; please be responsive to comments.
 ### Security
 If you discover a security issue, please do not open a public issue. Instead, email the maintainer at `koujalgi.amith@gmail.com` with details.
 ### License
 By contributing, you agree that your contributions will be licensed under the project’s [MIT License](LICENSE).
 ### Questions and Discussion
 Have questions or ideas? Open a GitHub Discussion or issue. We welcome feedback and proposals!
--- a/2
+++ b/2
@ -1,6 +1,6 @@
 MIT License
-Copyright (c) 2023 Amith Koujalgi and contributors
+Copyright (c) 2023 Amith Koujalgi
 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal
--- a/71
+++ b/71
@ -2,74 +2,41 @@ dev:
 	@echo "Setting up dev environment..."
 	@command -v pre-commit >/dev/null 2>&1 || { echo "Error: pre-commit is not installed. Please install it first."; exit 1; }
 	@command -v docker >/dev/null 2>&1 || { echo "Error: docker is not installed. Please install it first."; exit 1; }
-	@pre-commit install
+	pre-commit install
-	@pre-commit autoupdate
+	pre-commit autoupdate
-	@pre-commit install --install-hooks
+	pre-commit install --install-hooks
-check-formatting:
+build:
-	@echo "\033[0;34mChecking code formatting...\033[0m"
+	mvn -B clean install -Dgpg.skip=true
 	@mvn spotless:check
-apply-formatting:
+full-build:
-	@echo "\033[0;32mApplying code formatting...\033[0m"
+	mvn -B clean install
 	@mvn spotless:apply
-build: apply-formatting
+unit-tests:
-	@echo "\033[0;34mBuilding project (GPG skipped)...\033[0m"
+	mvn clean test -Punit-tests
 	@mvn -B clean install -Dgpg.skip=true -Dmaven.javadoc.skip=true
-full-build: apply-formatting
+integration-tests:
-	@echo "\033[0;34mPerforming full build...\033[0m"
+	export USE_EXTERNAL_OLLAMA_HOST=false && mvn clean verify -Pintegration-tests
 	@mvn -B clean install
-unit-tests: apply-formatting
+integration-tests-remote:
-	@echo "\033[0;34mRunning unit tests...\033[0m"
+	export USE_EXTERNAL_OLLAMA_HOST=true && export OLLAMA_HOST=http://192.168.29.223:11434 && mvn clean verify -Pintegration-tests -Dgpg.skip=true
 	@mvn clean test -Punit-tests
 integration-tests-all: apply-formatting
 	@echo "\033[0;34mRunning integration tests (local - all)...\033[0m"
 	@export USE_EXTERNAL_OLLAMA_HOST=false && mvn clean verify -Pintegration-tests
 integration-tests-basic: apply-formatting
 	@echo "\033[0;34mRunning integration tests (local - basic)...\033[0m"
 	@export USE_EXTERNAL_OLLAMA_HOST=false && mvn clean verify -Pintegration-tests -Dit.test=WithAuth
 integration-tests-remote: apply-formatting
 	@echo "\033[0;34mRunning integration tests (remote - all)...\033[0m"
 	@export USE_EXTERNAL_OLLAMA_HOST=true && export OLLAMA_HOST=http://192.168.29.229:11434 && mvn clean verify -Pintegration-tests -Dgpg.skip=true
 doxygen:
-	@echo "\033[0;34mGenerating documentation with Doxygen...\033[0m"
+	doxygen Doxyfile
 	@doxygen Doxyfile
 javadoc:
 	@echo "\033[0;34mGenerating Javadocs into '$(javadocfolder)'...\033[0m"
 	@mvn clean javadoc:javadoc
 	@if [ -f "target/reports/apidocs/index.html" ]; then \
 		echo "\033[0;32mJavadocs generated in target/reports/apidocs/index.html\033[0m"; \
 	else \
 		echo "\033[0;31mFailed to generate Javadocs in target/reports/apidocs\033[0m"; \
 		exit 1; \
 	fi
 list-releases:
-	@echo "\033[0;34mListing latest releases...\033[0m"
+	curl 'https://central.sonatype.com/api/internal/browse/component/versions?sortField=normalizedVersion&sortDirection=desc&page=0&size=20&filter=namespace%3Aio.github.ollama4j%2Cname%3Aollama4j' \
 	@curl 'https://central.sonatype.com/api/internal/browse/component/versions?sortField=normalizedVersion&sortDirection=desc&page=0&size=20&filter=namespace%3Aio.github.ollama4j%2Cname%3Aollama4j' \
      --compressed \
      --silent | jq -r '.components[].version'
 docs-build:
-	@echo "\033[0;34mBuilding documentation site...\033[0m"
+	npm i --prefix docs && npm run build --prefix docs
 	@cd ./docs && npm ci --no-audit --fund=false && npm run build
 docs-serve:
-	@echo "\033[0;34mServing documentation site...\033[0m"
+	npm i --prefix docs && npm run start --prefix docs
 	@cd ./docs && npm install && npm run start
 start-cpu:
-	@echo "\033[0;34mStarting Ollama (CPU mode)...\033[0m"
+	docker run -it -v ~/ollama:/root/.ollama -p 11434:11434 ollama/ollama
 	@docker run -it -v ~/ollama:/root/.ollama -p 11434:11434 ollama/ollama
 start-gpu:
-	@echo "\033[0;34mStarting Ollama (GPU mode)...\033[0m"
+	docker run -it --gpus=all -v ~/ollama:/root/.ollama -p 11434:11434 ollama/ollama
 	@docker run -it --gpus=all -v ~/ollama:/root/.ollama -p 11434:11434 ollama/ollama
--- a/README.md
+++ b/README.md
@ -1,32 +1,26 @@
 <div align="center">
  <img src='https://raw.githubusercontent.com/ollama4j/ollama4j/refs/heads/main/ollama4j-new.jpeg' width='200' alt="ollama4j-icon">
 ### Ollama4j
-</div>
+<p align="center">
  <img src='https://raw.githubusercontent.com/ollama4j/ollama4j/65a9d526150da8fcd98e2af6a164f055572bf722/ollama4j.jpeg' width='100' alt="ollama4j-icon">
 </p>
 <div align="center">
 A Java library (wrapper/binding) for Ollama server.
-_Find more details on the **[website](https://ollama4j.github.io/ollama4j/)**._
+Find more details on the [website](https://ollama4j.github.io/ollama4j/).
 ![GitHub stars](https://img.shields.io/github/stars/ollama4j/ollama4j)
 ![GitHub forks](https://img.shields.io/github/forks/ollama4j/ollama4j)
 ![GitHub watchers](https://img.shields.io/github/watchers/ollama4j/ollama4j)
 ![Contributors](https://img.shields.io/github/contributors/ollama4j/ollama4j?style=social)
 ![GitHub last commit](https://img.shields.io/github/last-commit/ollama4j/ollama4j?color=green)
 ![GitHub License](https://img.shields.io/github/license/ollama4j/ollama4j)
 [![Run Unit and Integration Tests](https://github.com/ollama4j/ollama4j/actions/workflows/run-tests.yml/badge.svg)](https://github.com/ollama4j/ollama4j/actions/workflows/run-tests.yml)
 ![Sonar Quality Gate](https://img.shields.io/sonar/quality_gate/ollama4j_ollama4j?server=https%3A%2F%2Fsonarcloud.io&label=Sonar%20Quality%20Gate)
 [![codecov](https://codecov.io/gh/ollama4j/ollama4j/graph/badge.svg?token=U0TE7BGP8L)](https://codecov.io/gh/ollama4j/ollama4j)
 </div>
 [//]: # (![GitHub repo size]&#40;https://img.shields.io/github/repo-size/ollama4j/ollama4j&#41;)
 [//]: # (![GitHub top language]&#40;https://img.shields.io/github/languages/top/ollama4j/ollama4j&#41;)
 [//]: # (![JitPack Downloads This Month Badge]&#40;https://img.shields.io/badge/dynamic/json?url=https%3A%2F%2Fjitpack.io%2Fapi%2Fdownloads%2Fio.github.ollama4j%2Follama4j&query=%24.month&label=JitPack%20Downloads%20-%20This%20Month&#41;)
 [//]: # (![JitPack Downloads This Week Badge]&#40;https://img.shields.io/badge/dynamic/json?url=https%3A%2F%2Fjitpack.io%2Fapi%2Fdownloads%2Fio.github.ollama4j%2Follama4j&query=%24.week&label=JitPack%20Downloads%20-%20This%20Week&#41;)
@ -35,60 +29,32 @@ _Find more details on the **[website](https://ollama4j.github.io/ollama4j/)**._
 [//]: # (![GitHub Downloads &#40;all assets, all releases&#41;]&#40;https://img.shields.io/github/downloads/ollama4j/ollama4j/total?label=GitHub%20Package%20Downloads&#41;)
 ![GitHub last commit](https://img.shields.io/github/last-commit/ollama4j/ollama4j?color=green)
 [![codecov](https://codecov.io/gh/ollama4j/ollama4j/graph/badge.svg?token=U0TE7BGP8L)](https://codecov.io/gh/ollama4j/ollama4j)
 [![Run Unit and Integration Tests](https://github.com/ollama4j/ollama4j/actions/workflows/run-tests.yml/badge.svg)](https://github.com/ollama4j/ollama4j/actions/workflows/run-tests.yml)
 ![Build Status](https://github.com/ollama4j/ollama4j/actions/workflows/maven-publish.yml/badge.svg)
 </div>
 [//]: # (![Hits]&#40;https://hits.seeyoufarm.com/api/count/incr/badge.svg?url=https%3A%2F%2Fgithub.com%2Follama4j%2Follama4j&count_bg=%2379C83D&title_bg=%23555555&icon=&icon_color=%23E7E7E7&title=hits&edge_flat=false&#41;)
 [//]: # (![GitHub language count]&#40;https://img.shields.io/github/languages/count/ollama4j/ollama4j&#41;)
 ## Table of Contents
 - [Capabilities](#capabilities)
 - [How does it work?](#how-does-it-work)
 - [Requirements](#requirements)
- [Usage](#usage)
+- [Installation](#installation)
-    - [For Maven](#for-maven)
+- [API Spec](https://ollama4j.github.io/ollama4j/category/apis---model-management)
        - [Using Maven Central](#using-maven-central)
        - [Using GitHub's Maven Package Repository](#using-githubs-maven-package-repository)
    - [For Gradle](#for-gradle)
 - [API Spec](#api-spec)
 - [Examples](#examples)
 - [Javadoc](https://ollama4j.github.io/ollama4j/apidocs/)
 - [Development](#development)
-    - [Setup dev environment](#setup-dev-environment)
+- [Contributions](#get-involved)
    - [Build](#build)
    - [Run unit tests](#run-unit-tests)
    - [Run integration tests](#run-integration-tests)
    - [Releases](#releases)
 - [Get Involved](#get-involved)
 - [Who's using Ollama4j?](#whos-using-ollama4j)
 - [Growth](#growth)
 - [References](#references)
    - [Credits](#credits)
    - [Appreciate the work?](#appreciate-the-work)
-## Capabilities
+#### How does it work?
 - **Text generation**: Single-turn `generate` with optional streaming and advanced options
 - **Chat**: Multi-turn chat with conversation history and roles
 - **Tool/function calling**: Built-in tool invocation via annotations and tool specs
 - **Reasoning/thinking modes**: Generate and chat with “thinking” outputs where supported
 - **Image inputs (multimodal)**: Generate with images as inputs where models support vision
 - **Embeddings**: Create vector embeddings for text
 - **Async generation**: Fire-and-forget style generation APIs
 - **Custom roles**: Define and use custom chat roles
 - **Model management**: List, pull, create, delete, and get model details
 - **Connectivity utilities**: Server `ping` and process status (`ps`)
 - **Authentication**: Basic auth and bearer token support
 - **Options builder**: Type-safe builder for model parameters and request options
 - **Timeouts**: Configure connect/read/write timeouts
 - **Logging**: Built-in logging hooks for requests and responses
 - **Metrics & Monitoring** 🆕: Built-in Prometheus metrics export for real-time monitoring of requests, model usage, and
  performance. *(Beta feature – feedback/contributions welcome!)* -
  Checkout [ollama4j-examples](https://github.com/ollama4j/ollama4j-examples) repository for details.
 <div align="center">
  <img src='metrics.png' width='100%' alt="ollama4j-icon">
 </div>
 ## How does it work?
 ```mermaid
  flowchart LR
@ -102,16 +68,16 @@ _Find more details on the **[website](https://ollama4j.github.io/ollama4j/)**._
    end
 ```
-## Requirements
+#### Requirements
 ![Java](https://img.shields.io/badge/Java-11_+-green.svg?style=for-the-badge&labelColor=gray&label=Java&color=orange)
 <p align="center">
  <img src="https://img.shields.io/badge/Java-11%2B-green.svg?style=for-the-badge&labelColor=gray&label=Java&color=orange" alt="Java"/>
 <a href="https://ollama.com/" target="_blank">
-    <img src="https://img.shields.io/badge/Ollama-0.11.10+-blue.svg?style=for-the-badge&labelColor=gray&label=Ollama&color=blue" alt="Ollama"/>
+  <img src="https://img.shields.io/badge/v0.3.0-green.svg?style=for-the-badge&labelColor=gray&label=Ollama&color=blue" alt=""/>
 </a>
 </p>
-## Usage
+## Installation
 > [!NOTE]
 > We are now publishing the artifacts to both Maven Central and GitHub package repositories.
@ -136,7 +102,7 @@ In your Maven project, add this dependency:
 <dependency>
    <groupId>io.github.ollama4j</groupId>
    <artifactId>ollama4j</artifactId>
-    <version>1.1.0</version>
+    <version>1.0.100</version>
 </dependency>
 ```
@ -192,7 +158,7 @@ In your Maven project, add this dependency:
 <dependency>
    <groupId>io.github.ollama4j</groupId>
    <artifactId>ollama4j</artifactId>
-    <version>1.1.0</version>
+    <version>1.0.100</version>
 </dependency>
 ```
@ -202,7 +168,7 @@ In your Maven project, add this dependency:
 ```groovy
 dependencies {
-    implementation 'io.github.ollama4j:ollama4j:1.1.0'
+    implementation 'io.github.ollama4j:ollama4j:1.0.100'
 }
 ```
@ -220,17 +186,12 @@ dependencies {
 [lib-shield]: https://img.shields.io/badge/ollama4j-get_latest_version-blue.svg?style=just-the-message&labelColor=gray
-### API Spec
+#### API Spec
 > [!TIP]
 > Find the full API specifications on the [website](https://ollama4j.github.io/ollama4j/).
-## Examples
+### Development
 For practical examples and usage patterns of the Ollama4j library, check out
 the [ollama4j-examples](https://github.com/ollama4j/ollama4j-examples) repository.
 ## Development
 Make sure you have `pre-commit` installed.
@ -249,9 +210,7 @@ pip install pre-commit
 #### Setup dev environment
 > **Note**
-> If you're on Windows, install [Chocolatey Package Manager for Windows](https://chocolatey.org/install) and then
+> If you're on Windows, install [Chocolatey Package Manager for Windows](https://chocolatey.org/install) and then install `make` by running `choco install make`. Just a little tip - run the command with administrator privileges if installation faiils.
 > install `make` by running `choco install make`. Just a little tip - run the command with administrator privileges if
 > installation faiils.
 ```shell
 make dev
@ -282,24 +241,14 @@ make integration-tests
 Newer artifacts are published via GitHub Actions CI workflow when a new release is created from `main` branch.
-## Get Involved
+## Examples
-<div align="center">
+The `ollama4j-examples` repository contains examples for using the Ollama4j library. You can explore
 it [here](https://github.com/ollama4j/ollama4j-examples).
-<a href="">![Open Issues](https://img.shields.io/github/issues-raw/ollama4j/ollama4j)</a>
+## ⭐ Give us a Star!
 <a href="">![Closed Issues](https://img.shields.io/github/issues-closed-raw/ollama4j/ollama4j)</a>
 <a href="">![Open PRs](https://img.shields.io/github/issues-pr-raw/ollama4j/ollama4j)</a>
 <a href="">![Closed PRs](https://img.shields.io/github/issues-pr-closed-raw/ollama4j/ollama4j)</a>
 <a href="">![Discussions](https://img.shields.io/github/discussions/ollama4j/ollama4j)</a>
-</div>
+If you like or are using this project to build your own, please give us a star. It's a free way to show your support.
 Contributions are most welcome! Whether it's reporting a bug, proposing an enhancement, or helping
 with code - any sort of contribution is much appreciated.
 <div style="font-size: 15px; font-weight: bold; padding-top: 10px; padding-bottom: 10px; border: 1px solid" align="center">
 If you like or are use this project, please give us a ⭐. It's a free way to show your support.
 </div>
 ## Who's using Ollama4j?
@ -316,18 +265,23 @@ If you like or are use this project, please give us a ⭐. It's a free way to sh
 | 9  | moqui-wechat      | A moqui-wechat component                                                                                                                                           | [GitHub](https://github.com/heguangyong/moqui-wechat)                                                                                                                                         |
 | 10 | B4X               | A set of simple and powerful RAD tool for Desktop and Server development                                                                                           | [Website](https://www.b4x.com/android/forum/threads/ollama4j-library-pnd_ollama4j-your-local-offline-llm-like-chatgpt.165003/)                                                                |
 | 11 | Research Article  | Article: `Large language model based mutations in genetic improvement` - published on National Library of Medicine (National Center for Biotechnology Information) | [Website](https://pmc.ncbi.nlm.nih.gov/articles/PMC11750896/)                                                                                                                                 |
 | 12 | renaime           | A LLaVa powered tool that automatically renames image files having messy file names.                                                                               | [Website](https://devpost.com/software/renaime)                                                                                                                                               |
-## Growth
+## Traction
 [![Star History Chart](https://api.star-history.com/svg?repos=ollama4j/ollama4j&type=Date)](https://star-history.com/#ollama4j/ollama4j&Date)
 ## Get Involved
 <div align="center">
 <a href="">![Open Issues](https://img.shields.io/github/issues-raw/ollama4j/ollama4j)</a>
 <a href="">![Closed Issues](https://img.shields.io/github/issues-closed-raw/ollama4j/ollama4j)</a>
 <a href="">![Open PRs](https://img.shields.io/github/issues-pr-raw/ollama4j/ollama4j)</a>
 <a href="">![Closed PRs](https://img.shields.io/github/issues-pr-closed-raw/ollama4j/ollama4j)</a>
 <a href="">![Discussions](https://img.shields.io/github/discussions/ollama4j/ollama4j)</a>
 </div>
 <p align="center">
  <a href="https://star-history.com/#ollama4j/ollama4j&Date" target="_blank" rel="noopener noreferrer">
    <img
      src="https://api.star-history.com/svg?repos=ollama4j/ollama4j&type=Date"
      alt="Star History Chart"
    />
  </a>
 </p>
 [//]: # (![GitHub Issues or Pull Requests]&#40;https://img.shields.io/github/issues-raw/ollama4j/ollama4j&#41;)
@ -339,6 +293,27 @@ If you like or are use this project, please give us a ⭐. It's a free way to sh
 [//]: # (![GitHub Discussions]&#40;https://img.shields.io/github/discussions/ollama4j/ollama4j&#41;)
 Contributions are most welcome! Whether it's reporting a bug, proposing an enhancement, or helping
 with code - any sort
 of contribution is much appreciated.
 ## 🏷️ License and Citation
 The code is available under [MIT License](./LICENSE).
 If you find this project helpful in your research, please cite this work at
 ```
@misc{ollama4j2024,
    author       = {Amith Koujalgi},
    title        = {Ollama4j: A Java Library (Wrapper/Binding) for Ollama Server},
    year         = {2024},
    month        = {January},
    url          = {https://github.com/ollama4j/ollama4j}
 }
 ```
 ### References
 - [Ollama REST APIs](https://github.com/jmorganca/ollama/blob/main/docs/api.md)
--- a/SECURITY.md
+++ b/SECURITY.md
@ -1,39 +0,0 @@
 ## Security Policy
 ### Supported Versions
 We aim to support the latest released version of `ollama4j` and the most recent minor version prior to it. Older versions may receive fixes on a best-effort basis.
 ### Reporting a Vulnerability
 Please do not open public GitHub issues for security vulnerabilities.
 Instead, email the maintainer at:
 ```
 koujalgi.amith@gmail.com
 ```
 Include as much detail as possible:
 - A clear description of the issue and impact
 - Steps to reproduce or proof-of-concept
 - Affected version(s) and environment
 - Any suggested mitigations or patches
 You should receive an acknowledgement within 72 hours. We will work with you to validate the issue, determine severity, and prepare a fix.
 ### Disclosure
 We follow a responsible disclosure process:
 1. Receive and validate report privately.
 2. Develop and test a fix.
 3. Coordinate a release that includes the fix.
 4. Publicly credit the reporter (if desired) in release notes.
 ### GPG Signatures
 Releases may be signed as part of our CI pipeline. If verification fails or you have concerns about release integrity, please contact us via the email above.
--- a/docs/METRICS.md
+++ b/docs/METRICS.md
@ -1,186 +0,0 @@
 # Prometheus Metrics Integration
 Ollama4j now includes comprehensive Prometheus metrics collection to help you monitor and observe your Ollama API usage. This feature allows you to track request counts, response times, model usage, and other operational metrics.
 ## Features
 The metrics integration provides the following metrics:
 - **Request Metrics**: Total requests, duration histograms, and response time summaries by endpoint
 - **Model Usage**: Model-specific usage statistics and response times
 - **Token Generation**: Token count tracking per model
 - **Error Tracking**: Error counts by type and endpoint
 - **Active Connections**: Current number of active API connections
 ## Quick Start
 ### 1. Enable Metrics Collection
 ```java
 import io.github.ollama4j.Ollama;
 // Create API instance with metrics enabled
 Ollama ollama = new Ollama();
 ollamaAPI.
 setMetricsEnabled(true);
 ```
 ### 2. Start Metrics Server
 ```java
 import io.prometheus.client.exporter.HTTPServer;
 // Start Prometheus metrics HTTP server on port 8080
 HTTPServer metricsServer = new HTTPServer(8080);
 System.out.println("Metrics available at: http://localhost:8080/metrics");
 ```
 ### 3. Use the API (Metrics are automatically collected)
 ```java
 // All API calls are automatically instrumented
 boolean isReachable = ollama.ping();
 Map<String, Object> format = new HashMap<>();
 format.put("type", "json");
 OllamaResult result = ollama.generateWithFormat(
    "llama2",
    "Generate a JSON object",
    format
 );
 ```
 ## Available Metrics
 ### Request Metrics
 - `ollama_api_requests_total` - Total number of API requests by endpoint, method, and status
 - `ollama_api_request_duration_seconds` - Request duration histogram by endpoint and method
 - `ollama_api_response_time_seconds` - Response time summary with percentiles
 ### Model Metrics
 - `ollama_model_usage_total` - Model usage count by model name and operation
 - `ollama_model_response_time_seconds` - Model response time histogram
 - `ollama_tokens_generated_total` - Total tokens generated by model
 ### System Metrics
 - `ollama_api_active_connections` - Current number of active connections
 - `ollama_api_errors_total` - Error count by endpoint and error type
 ## Example Metrics Output
 ```
 # HELP ollama_api_requests_total Total number of Ollama API requests
 # TYPE ollama_api_requests_total counter
 ollama_api_requests_total{endpoint="/api/generate",method="POST",status="success"} 5.0
 ollama_api_requests_total{endpoint="/api/embed",method="POST",status="success"} 3.0
 # HELP ollama_api_request_duration_seconds Duration of Ollama API requests in seconds
 # TYPE ollama_api_request_duration_seconds histogram
 ollama_api_request_duration_seconds_bucket{endpoint="/api/generate",method="POST",le="0.1"} 0.0
 ollama_api_request_duration_seconds_bucket{endpoint="/api/generate",method="POST",le="0.5"} 2.0
 ollama_api_request_duration_seconds_bucket{endpoint="/api/generate",method="POST",le="1.0"} 4.0
 ollama_api_request_duration_seconds_bucket{endpoint="/api/generate",method="POST",le="+Inf"} 5.0
 ollama_api_request_duration_seconds_sum{endpoint="/api/generate",method="POST"} 2.5
 ollama_api_request_duration_seconds_count{endpoint="/api/generate",method="POST"} 5.0
 # HELP ollama_model_usage_total Total number of model usage requests
 # TYPE ollama_model_usage_total counter
 ollama_model_usage_total{model_name="llama2",operation="generate_with_format"} 5.0
 ollama_model_usage_total{model_name="llama2",operation="embed"} 3.0
 # HELP ollama_tokens_generated_total Total number of tokens generated
 # TYPE ollama_tokens_generated_total counter
 ollama_tokens_generated_total{model_name="llama2"} 150.0
 ```
 ## Configuration
 ### Enable/Disable Metrics
 ```java
 OllamaAPI ollama = new OllamaAPI();
 // Enable metrics collection
 ollama.setMetricsEnabled(true);
 // Disable metrics collection (default)
 ollama.setMetricsEnabled(false);
 ```
 ### Custom Metrics Server
 ```java
 import io.prometheus.client.exporter.HTTPServer;
 // Start on custom port
 HTTPServer metricsServer = new HTTPServer(9090);
 // Start on custom host and port
 HTTPServer metricsServer = new HTTPServer("0.0.0.0", 9090);
 ```
 ## Integration with Prometheus
 ### Prometheus Configuration
 Add this to your `prometheus.yml`:
 ```yaml
 scrape_configs:
  - job_name: 'ollama4j'
    static_configs:
      - targets: ['localhost:8080']
    scrape_interval: 15s
 ```
 ### Grafana Dashboards
 You can create Grafana dashboards using the metrics. Some useful queries:
 - **Request Rate**: `rate(ollama_api_requests_total[5m])`
 - **Average Response Time**: `rate(ollama_api_request_duration_seconds_sum[5m]) / rate(ollama_api_request_duration_seconds_count[5m])`
 - **Error Rate**: `rate(ollama_api_requests_total{status="error"}[5m]) / rate(ollama_api_requests_total[5m])`
 - **Model Usage**: `rate(ollama_model_usage_total[5m])`
 - **Token Generation Rate**: `rate(ollama_tokens_generated_total[5m])`
 ## Performance Considerations
 - Metrics collection adds minimal overhead (~1-2% in most cases)
 - Metrics are collected asynchronously and don't block API calls
 - You can disable metrics in production if needed: `ollama.setMetricsEnabled(false)`
 - The metrics server uses minimal resources
 ## Troubleshooting
 ### Metrics Not Appearing
 1. Ensure metrics are enabled: `ollama.setMetricsEnabled(true)`
 2. Check that the metrics server is running: `http://localhost:8080/metrics`
 3. Verify API calls are being made (metrics only appear after API usage)
 ### High Memory Usage
 - Metrics accumulate over time. Consider restarting your application periodically
 - Use Prometheus to scrape metrics regularly to avoid accumulation
 ### Custom Metrics
 You can extend the metrics by accessing the Prometheus registry directly:
 ```java
 import io.prometheus.client.CollectorRegistry;
 import io.prometheus.client.Counter;
 // Create custom metrics
 Counter customCounter = Counter.build()
    .name("my_custom_metric_total")
    .help("My custom metric")
    .register();
 // Use the metric
 customCounter.inc();
 ```
--- a/docs/blog/2023-12-22-release-post.md
+++ b/docs/blog/2023-12-22-release-post.md
@ -23,8 +23,6 @@ applications! 🌐🚀
 - Ability to ask questions along with image files or image URLs! 🤩
 - Open-source and primed for collaborative contributions from the community!
 <!-- truncate -->
 🦙 What is Ollama?
 Ollama is an advanced AI tool that allows users to easily set up and run large language models locally (in CPU and GPU
--- a/docs/blog/2025-03-08-blog/index.md
+++ b/docs/blog/2025-03-08-blog/index.md
@ -21,8 +21,6 @@ your database using natural, conversational language — just like interacting w
 through the code, explain the key components, and show you how to set up your environment to ensure everything runs
 smoothly.
 <!-- truncate -->
 ### Overview of the Technologies Involved
 Before diving into the implementation, let’s understand the core technologies we’re using:
@ -336,8 +334,8 @@ import com.couchbase.client.java.ClusterOptions;
 import com.couchbase.client.java.Scope;
 import com.couchbase.client.java.json.JsonObject;
 import com.couchbase.client.java.query.QueryResult;
-import io.github.ollama4j.Ollama;
+import io.github.ollama4j.OllamaAPI;
-import io.github.ollama4j.exceptions.OllamaException;
+import io.github.ollama4j.exceptions.OllamaBaseException;
 import io.github.ollama4j.exceptions.ToolInvocationException;
 import io.github.ollama4j.tools.OllamaToolsResult;
 import io.github.ollama4j.tools.ToolFunction;
@ -356,7 +354,7 @@ import java.util.Map;
 public class CouchbaseToolCallingExample {
-  public static void main(String[] args) throws IOException, ToolInvocationException, OllamaException, InterruptedException {
+    public static void main(String[] args) throws IOException, ToolInvocationException, OllamaBaseException, InterruptedException {
        String connectionString = Utilities.getFromEnvVar("CB_CLUSTER_URL");
        String username = Utilities.getFromEnvVar("CB_CLUSTER_USERNAME");
        String password = Utilities.getFromEnvVar("CB_CLUSTER_PASSWORD");
@ -372,17 +370,18 @@ public class CouchbaseToolCallingExample {
        String host = Utilities.getFromConfig("host");
        String modelName = Utilities.getFromConfig("tools_model_mistral");
-    Ollama ollama = new Ollama(host);
+        OllamaAPI ollamaAPI = new OllamaAPI(host);
-    ollama.setRequestTimeoutSeconds(60);
+        ollamaAPI.setVerbose(false);
        ollamaAPI.setRequestTimeoutSeconds(60);
        Tools.ToolSpecification callSignFinderToolSpec = getCallSignFinderToolSpec(cluster, bucketName);
        Tools.ToolSpecification callSignUpdaterToolSpec = getCallSignUpdaterToolSpec(cluster, bucketName);
-    ollama.registerTool(callSignFinderToolSpec);
+        ollamaAPI.registerTool(callSignFinderToolSpec);
-    ollama.registerTool(callSignUpdaterToolSpec);
+        ollamaAPI.registerTool(callSignUpdaterToolSpec);
        String prompt1 = "What is the call-sign of Astraeus?";
-    for (OllamaToolsResult.ToolResult r : ollama.generateWithTools(modelName, new Tools.PromptBuilder()
+        for (OllamaToolsResult.ToolResult r : ollamaAPI.generateWithTools(modelName, new Tools.PromptBuilder()
                .withToolSpecification(callSignFinderToolSpec)
                .withPrompt(prompt1)
                .build(), new OptionsBuilder().build()).getToolResults()) {
@ -391,7 +390,7 @@ public class CouchbaseToolCallingExample {
        }
        String prompt2 = "I want to code name Astraeus as STARBOUND";
-    for (OllamaToolsResult.ToolResult r : ollama.generateWithTools(modelName, new Tools.PromptBuilder()
+        for (OllamaToolsResult.ToolResult r : ollamaAPI.generateWithTools(modelName, new Tools.PromptBuilder()
                .withToolSpecification(callSignUpdaterToolSpec)
                .withPrompt(prompt2)
                .build(), new OptionsBuilder().build()).getToolResults()) {
@ -400,7 +399,7 @@ public class CouchbaseToolCallingExample {
        }
        String prompt3 = "What is the call-sign of Astraeus?";
-    for (OllamaToolsResult.ToolResult r : ollama.generateWithTools(modelName, new Tools.PromptBuilder()
+        for (OllamaToolsResult.ToolResult r : ollamaAPI.generateWithTools(modelName, new Tools.PromptBuilder()
                .withToolSpecification(callSignFinderToolSpec)
                .withPrompt(prompt3)
                .build(), new OptionsBuilder().build()).getToolResults()) {
@ -578,9 +577,9 @@ class AirlineDetail {
 #### 1. Ollama API Client Setup
 ```javascript
-OllamaAPI ollama = new OllamaAPI(host);
+OllamaAPI ollamaAPI = new OllamaAPI(host);
-ollama.setRequestTimeoutSeconds(60);
+ollamaAPI.setRequestTimeoutSeconds(60);
 ```
 Here, we initialize the Ollama API client and configure it with the host of the Ollama server, where the model is hosted
@ -595,7 +594,7 @@ queries the database for airline details based on the airline name.
 ```javascript
 Tools.ToolSpecification callSignFinderToolSpec = getCallSignFinderToolSpec(cluster, bucketName);
-ollama.registerTool(callSignFinderToolSpec);
+ollamaAPI.registerTool(callSignFinderToolSpec);
 ```
 This step registers custom tools with Ollama that allows the tool-calling model to invoke database queries.
@ -619,7 +618,7 @@ String prompt = "What is the call-sign of Astraeus?";
 #### 5. Generating Results with Tools
 ```javascript
-for (OllamaToolsResult.ToolResult r : ollama.generateWithTools(modelName, new Tools.PromptBuilder()
+for (OllamaToolsResult.ToolResult r : ollamaAPI.generateWithTools(modelName, new Tools.PromptBuilder()
        .withToolSpecification(callSignFinderToolSpec)
        .withPrompt(prompt)
        .build(), new OptionsBuilder().build()).getToolResults()) {
@ -649,7 +648,7 @@ then update the airline’s callsign.
 ```javascript
 Tools.ToolSpecification callSignUpdaterToolSpec = getCallSignUpdaterToolSpec(cluster, bucketName);
-ollama.registerTool(callSignUpdaterToolSpec);
+ollamaAPI.registerTool(callSignUpdaterToolSpec);
 ```
 The tool will execute a Couchbase N1QL query to update the airline’s callsign.
@ -671,7 +670,7 @@ And then we invoke the model with the new prompt.
 ```javascript
 String prompt = "I want to code name Astraeus as STARBOUND";
-for (OllamaToolsResult.ToolResult r : ollama.generateWithTools(modelName, new Tools.PromptBuilder()
+for (OllamaToolsResult.ToolResult r : ollamaAPI.generateWithTools(modelName, new Tools.PromptBuilder()
        .withToolSpecification(callSignUpdaterToolSpec)
        .withPrompt(prompt)
        .build(), new OptionsBuilder().build()).getToolResults()) {
--- a/docs/docs/apis-extras/basic-auth.md
+++ b/docs/docs/apis-extras/basic-auth.md
@ -1,8 +1,8 @@
 ---
-sidebar_position: 3
+sidebar_position: 2
 ---
-# Basic Auth
+# Set Basic Authentication
 This API lets you set the basic authentication for the Ollama client. This would help in scenarios where
 Ollama server would be setup behind a gateway/reverse proxy with basic auth.
@ -10,7 +10,7 @@ Ollama server would be setup behind a gateway/reverse proxy with basic auth.
 After configuring basic authentication, all subsequent requests will include the Basic Auth header.
 ```java
-import io.github.ollama4j.Ollama;
+import io.github.ollama4j.OllamaAPI;
 public class Main {
@ -18,9 +18,9 @@ public class Main {
        String host = "http://localhost:11434/";
-        Ollama ollama = new Ollama(host);
+        OllamaAPI ollamaAPI = new OllamaAPI(host);
-        ollama.setBasicAuth("username", "password");
+        ollamaAPI.setBasicAuth("username", "password");
    }
 }
 ```
--- a/docs/docs/apis-extras/bearer-auth.md
+++ b/docs/docs/apis-extras/bearer-auth.md
@ -1,8 +1,8 @@
 ---
-sidebar_position: 4
+sidebar_position: 2
 ---
-# Bearer Auth
+# Set Bearer Authentication
 This API lets you set the bearer authentication for the Ollama client. This would help in scenarios where
 Ollama server would be setup behind a gateway/reverse proxy with bearer auth.
@ -10,7 +10,7 @@ Ollama server would be setup behind a gateway/reverse proxy with bearer auth.
 After configuring bearer authentication, all subsequent requests will include the Bearer Auth header.
 ```java
-import io.github.ollama4j.Ollama;
+import io.github.ollama4j.OllamaAPI;
 public class Main {
@ -18,9 +18,9 @@ public class Main {
        String host = "http://localhost:11434/";
-        Ollama ollama = new Ollama(host);
+        OllamaAPI ollamaAPI = new OllamaAPI(host);
-        ollama.setBearerAuth("YOUR-TOKEN");
+        ollamaAPI.setBearerAuth("YOUR-TOKEN");
    }
 }
 ```
--- a/docs/docs/apis-extras/logging.md
+++ b/docs/docs/apis-extras/logging.md
@ -1,26 +0,0 @@
 ---
 sidebar_position: 7
 ---
 # Logging
 ### Using with SLF4J and Logback
 Add a `logback.xml` file to your `src/main/resources` folder with the following content:
 ```xml
 <configuration>
    <root level="DEBUG">
        <appender-ref ref="STDOUT"/>
    </root>
    <appender name="STDOUT" class="ch.qos.logback.core.ConsoleAppender">
        <encoder>
            <pattern>%d{yyyy-MM-dd HH:mm:ss} %-5level %logger{36} - %msg%n</pattern>
        </encoder>
    </appender>
 </configuration>
 ```
--- a/docs/docs/apis-extras/options-builder.md
+++ b/docs/docs/apis-extras/options-builder.md
@ -36,7 +36,7 @@ from [javadoc](https://ollama4j.github.io/ollama4j/apidocs/io/github/ollama4j/ol
 ## Build an empty `Options` object
 ```java
-import io.github.ollama4j.Ollama;
+import io.github.ollama4j.OllamaAPI;
 import io.github.ollama4j.utils.Options;
 import io.github.ollama4j.utils.OptionsBuilder;
@ -46,7 +46,7 @@ public class Main {
        String host = "http://localhost:11434/";
-        Ollama ollama = new Ollama(host);
+        OllamaAPI ollamaAPI = new OllamaAPI(host);
        Options options = new OptionsBuilder().build();
    }
@ -65,7 +65,7 @@ public class Main {
        String host = "http://localhost:11434/";
-        Ollama ollama = new Ollama(host);
+        OllamaAPI ollamaAPI = new OllamaAPI(host);
        Options options =
                new OptionsBuilder()
@ -74,15 +74,6 @@ public class Main {
                        .setNumGpu(2)
                        .setTemperature(1.5f)
                        .build();
        OllamaResult result =
                ollama.generate(
                        OllamaGenerateRequestBuilder.builder()
                                .withModel(model)
                                .withPrompt("Who are you?")
                                .withOptions(options)
                                .build(),
                        null);
    }
 }
 ```
--- a/docs/docs/apis-extras/ping.md
+++ b/docs/docs/apis-extras/ping.md
@ -1,5 +1,5 @@
 ---
-sidebar_position: 5
+sidebar_position: 3
 ---
 # Ping
@ -7,16 +7,16 @@ sidebar_position: 5
 This API lets you check the reachability of Ollama server.
 ```java
-import io.github.ollama4j.Ollama;
+import io.github.ollama4j.OllamaAPI;
 public class Main {
    public static void main(String[] args) {
        String host = "http://localhost:11434/";
-        Ollama ollama = new Ollama(host);
+        OllamaAPI ollamaAPI = new OllamaAPI(host);
-        ollama.ping();
+        ollamaAPI.ping();
    }
 }
 ```
--- a/docs/docs/apis-extras/ps.md
+++ b/docs/docs/apis-extras/ps.md
@ -1,5 +1,5 @@
 ---
-sidebar_position: 5
+sidebar_position: 4
 ---
 # PS
@ -11,15 +11,18 @@ This API corresponds to the [PS](https://github.com/ollama/ollama/blob/main/docs
 ```java
 package io.github.ollama4j.localtests;
-import io.github.ollama4j.Ollama;
+import io.github.ollama4j.OllamaAPI;
-import io.github.ollama4j.models.ps.ModelProcessesResult;
+import io.github.ollama4j.exceptions.OllamaBaseException;
 import io.github.ollama4j.models.ps.ModelsProcessResponse;
 import java.io.IOException;
 public class Main {
    public static void main(String[] args) {
-        Ollama ollama = new Ollama("http://localhost:11434");
+        OllamaAPI ollamaAPI = new OllamaAPI("http://localhost:11434");
-        ModelProcessesResult response = ollama.ps();
+        ModelsProcessResponse response = ollamaAPI.ps();
        System.out.println(response);
    }
--- a/docs/docs/apis-extras/request-timeout.md
+++ b/docs/docs/apis-extras/request-timeout.md
@ -0,0 +1,23 @@
 ---
 sidebar_position: 2
 ---
 # Set Request Timeout
 This API lets you set the request timeout for the Ollama client.
 ```java
 import io.github.ollama4j.OllamaAPI;
 public class Main {
  public static void main(String[] args) {
    String host = "http://localhost:11434/";
    OllamaAPI ollamaAPI = new OllamaAPI(host);
    ollamaAPI.setRequestTimeoutSeconds(10);
  }
 }
 ```
--- a/docs/docs/apis-extras/timeouts.md
+++ b/docs/docs/apis-extras/timeouts.md
@ -1,25 +0,0 @@
 ---
 sidebar_position: 2
 ---
 # Timeouts
 ### Set Request Timeout
 This API lets you set the request timeout for the Ollama client.
 ```java
 import io.github.ollama4j.Ollama;
 public class Main {
    public static void main(String[] args) {
        String host = "http://localhost:11434/";
        Ollama ollama = new Ollama(host);
        ollama.setRequestTimeoutSeconds(10);
    }
 }
 ```
--- a/docs/docs/apis-extras/verbosity.md
+++ b/docs/docs/apis-extras/verbosity.md
@ -0,0 +1,25 @@
 ---
 sidebar_position: 1
 ---
 # Set Verbosity
 This API lets you set the verbosity of the Ollama client.
 ## Try asking a question about the model.
 ```java
 import io.github.ollama4j.OllamaAPI;
 public class Main {
    public static void main(String[] args) {
        String host = "http://localhost:11434/";
        OllamaAPI ollamaAPI = new OllamaAPI(host);
        ollamaAPI.setVerbose(true);
    }
 }
 ```
--- a/docs/docs/apis-generate/chat-with-thinking.md
+++ b/docs/docs/apis-generate/chat-with-thinking.md
@ -1,92 +0,0 @@
 ---
 sidebar_position: 8
 ---
 import CodeEmbed from '@site/src/components/CodeEmbed';
 import TypewriterTextarea from '@site/src/components/TypewriterTextarea';
 # Chat with Thinking
 This API allows to generate responses from an LLM while also retrieving the model's "thinking" process separately from
 the final answer. The "thinking" tokens represent the model's internal reasoning or planning before it produces the
 actual response. This can be useful for debugging, transparency, or simply understanding how the model arrives at its
 answers.
 You can use this feature to receive both the thinking and the response as separate outputs, either as a complete result
 or streamed token by token. The examples below show how to use the API to access both the thinking and the response, and
 how to display them in your application.
 ### Chat with thinking model and receive the thinking and response text separately
 <CodeEmbed src="https://raw.githubusercontent.com/ollama4j/ollama4j-examples/refs/heads/main/src/main/java/io/github/ollama4j/examples/ChatWithThinkingModelExample.java" />
 You will get a response similar to:
 :::tip[LLM Response]
 **First thinking response:** User asks a simple question. We just answer.
 **First answer response:** The capital of France is _**Paris**_.
 **Second thinking response:** User: "And what is the second largest city?" They asked about the second largest city in
 France. Provide answer: Paris largest, second largest is Marseille. We can provide population stats, maybe mention Lyon
 as third largest. Also context. The answer should be concise. Provide some details: Marseille is the second largest,
 population ~870k, located on Mediterranean coast. Provide maybe some facts. Given no request for extra context, just answer.
 **Second answer response:** The second‑largest city in France is _**Marseille**_. It’s a major Mediterranean port with a
 population of roughly 870,000 (as of the latest estimates) and is known for its historic Old Port, vibrant cultural
 scene, and diverse population.
 :::
 ### Chat with thinking model and receive the thinking and response tokens streamed
 <CodeEmbed src="https://raw.githubusercontent.com/ollama4j/ollama4j-examples/refs/heads/main/src/main/java/io/github/ollama4j/examples/ChatStreamingWithThinkingExample.java" />
 You will get a response similar to:
 :::tip[First Question's Thinking Tokens]
 <TypewriterTextarea
 textContent={`USER ASKS A SIMPLE QUESTION: "WHAT IS THE CAPITAL OF FRANCE?" THE ANSWER: PARIS. PROVIDE ANSWER.`}
 typingSpeed={10}
 pauseBetweenSentences={1200}
 height="auto"
 width="100%"
 style={{ whiteSpace: 'pre-line' }}
 />
 :::
 :::tip[First Question's Response Tokens]
 <TypewriterTextarea
 textContent={`the capital of france is 'paris'.`}
 typingSpeed={10}
 pauseBetweenSentences={1200}
 height="auto"
 width="100%"
 style={{ whiteSpace: 'pre-line' }}
 />
 :::
 :::tip[Second Question's Thinking Tokens]
 <TypewriterTextarea
 textContent={`THE USER ASKS: "AND WHAT IS THE SECOND LARGEST CITY?" LIKELY REFERRING TO FRANCE. THE SECOND LARGEST CITY IN FRANCE (BY POPULATION) IS MARSEILLE. HOWEVER, THERE MIGHT BE NUANCE: THE LARGEST IS PARIS, SECOND LARGEST IS MARSEILLE. BUT SOME MIGHT ARGUE THAT LYON IS SECOND LARGEST? LET'S CONFIRM: POPULATION OF FRANCE: PARIS ~2.1M (METRO 12M). MARSEILLE ~870K (METRO 1.5M). LYON ~515K (METRO 1.5M). SO MARSEILLE IS SECOND LARGEST CITY PROPER. LYON IS THIRD LARGEST. SO ANSWER: MARSEILLE. WE SHOULD PROVIDE THAT. PROVIDE A BRIEF EXPLANATION.`}
 typingSpeed={10}
 pauseBetweenSentences={1200}
 height="auto"
 width="100%"
 style={{ whiteSpace: 'pre-line' }}
 />
 :::
 :::tip[Second Question's Response Tokens]
 <TypewriterTextarea
 textContent={`the second‑largest city in france by population is 'marseille'.
 - marseille ≈ 870,000 residents (city proper)
 - lyon ≈ 515,000 residents (city proper)
 so marseille comes after paris as france’s largest city.`}
 typingSpeed={10}
 pauseBetweenSentences={1200}
 height="auto"
 width="100%"
 style={{ whiteSpace: 'pre-line' }}
 />
 :::
--- a/docs/docs/apis-generate/chat-with-tools.md
+++ b/docs/docs/apis-generate/chat-with-tools.md
@ -16,31 +16,19 @@ experience.
 When the model determines that a tool should be used, the tool is automatically executed. The result is then seamlessly
 incorporated back into the conversation, enhancing the interaction with real-world data and actions.
-The following example demonstrates usage of a simple tool, registered with the `Ollama`, and then used within a chat
+The following example demonstrates usage of a simple tool, registered with the `OllamaAPI`, and then used within a chat
 session. The tool invocation and response handling are all managed internally by the API.
 <CodeEmbed src="https://raw.githubusercontent.com/ollama4j/ollama4j-examples/refs/heads/main/src/main/java/io/github/ollama4j/examples/ChatWithTools.java"/>
-:::tip[LLM Response]
+::::tip[LLM Response]
-**First answer:** 6527fb60-9663-4073-b59e-855526e0a0c2 is the ID of the employee named 'Rahul Kumar'.
+> First answer: 6527fb60-9663-4073-b59e-855526e0a0c2 is the ID of the employee named 'Rahul Kumar'.
-
+>
-**Second answer:**  _Kumar_ is the last name of the employee named 'Rahul Kumar'.
+> Second answer:  Kumar is the last name of the employee named 'Rahul Kumar'.
-:::
+::::
 This tool calling can also be done using the streaming API.
 ### Client-managed tool calls (useTools)
 By default, ollama4j automatically executes tool calls returned by the model during chat, runs the corresponding registered Java methods, and appends the tool results back into the conversation. For some applications, you may want to intercept tool calls and decide yourself when and how to execute them (for example, to queue them, to show a confirmation UI to the user, to run them in a sandbox, or to perform multi‑step orchestration).
 To enable this behavior, set the useTools flag to true on your Ollama instance. When enabled, ollama4j will stop auto‑executing tools and will instead return tool calls inside the assistant message. You can then inspect the tool calls and execute them manually.
 Notes:
 - Default value: useTools is true.
 - When useTools is false, ollama4j auto‑executes tools and loops internally until tools are resolved or max retries is reached.
 - When useTools is true, ollama4j will not execute tools; you are responsible for invoking tools and passing results back as TOOL messages, then re‑calling chat() to continue.
 ### Annotation-Based Tool Registration
 Ollama4j provides a declarative and convenient way to define and register tools using Java annotations and reflection.
@ -57,10 +45,10 @@ To use a method as a tool within a chat call, follow these steps:
        * `java.lang.Boolean`
        * `java.math.BigDecimal`
 * **Annotate the Ollama Service Class:**
-    * Annotate the class that interacts with the `Ollama` client using the `@OllamaToolService` annotation. Reference
+    * Annotate the class that interacts with the `OllamaAPI` client using the `@OllamaToolService` annotation. Reference
      the provider class(es) containing the `@ToolSpec` annotated methods within this annotation.
 * **Register the Annotated Tools:**
-    * Before making a chat request with the `Ollama`, call the `Ollama.registerAnnotatedTools()` method. This
+    * Before making a chat request with the `OllamaAPI`, call the `OllamaAPI.registerAnnotatedTools()` method. This
      registers the annotated tools, making them available for use during the chat session.
 Let's try an example. Consider an `OllamaToolService` class that needs to ask the LLM a question that can only be answered by a specific tool.
@ -74,8 +62,8 @@ The annotated method can then be used as a tool in the chat session:
 Running the above would produce a response similar to:
-:::tip[LLM Response]
+::::tip[LLM Response]
-**First answer:** 0.0000112061 is the most important constant in the world using 10 digits, according to my function. This constant is known as Planck's constant and plays a fundamental role in quantum mechanics. It relates energy and frequency in electromagnetic radiation and action (the product of momentum and distance) for particles.
+> First answer: 0.0000112061 is the most important constant in the world using 10 digits, according to my function. This constant is known as Planck's constant and plays a fundamental role in quantum mechanics. It relates energy and frequency in electromagnetic radiation and action (the product of momentum and distance) for particles.
-
+>
-**Second answer:** 3-digit constant: 8.001
+> Second answer: 3-digit constant: 8.001
-:::
+::::
--- a/docs/docs/apis-generate/chat.md
+++ b/docs/docs/apis-generate/chat.md
@ -16,7 +16,7 @@ information using the history of already asked questions and the respective answ
 You will get a response similar to:
-:::tip[LLM Response]
+::::tip[LLM Response]
 > First answer: The capital of France is Paris.
 >
@ -47,17 +47,35 @@ You will get a response similar to:
  "tool_calls" : null
 }]
 ```
-:::
+::::
 ### Create a conversation where the answer is streamed
-<CodeEmbed src="https://raw.githubusercontent.com/ollama4j/ollama4j-examples/refs/heads/main/src/main/java/io/github/ollama4j/examples/ChatStreamingExample.java" />
+<CodeEmbed src="https://raw.githubusercontent.com/ollama4j/ollama4j-examples/refs/heads/main/src/main/java/io/github/ollama4j/examples/ChatStreamingWithTokenConcatenationExample.java" />
 <!-- ::::tip[LLM Response]
 >
 > The
 >
 > The capital
 >
 > The capital of
 >
 > The capital of France
 >
 > The capital of France is
 >
 > The capital of France is Paris
 >
 > The capital of France is Paris.
 >
 :::: -->
 <TypewriterTextarea
-    textContent="'The Great Gatsby' by F. Scott Fitzgerald is a complex and multifaceted novel that explores themes of wealth, class, love, loss, and the American Dream. It is a landmark work of American literature that examines the social and psychological consequences of the American Dream's unattainability and its impact on the lives of its characters."
+    textContent='The capital of France is Paris.'
-    typingSpeed={5}
+    typingSpeed={30}
    pauseBetweenSentences={1200}
-    height='140px'
+    height='55px'
    width='100%'
 />
@ -75,30 +93,25 @@ You will get a response similar to:
 You will get a response as:
-:::tip[LLM Response]
+::::tip[LLM Response]
-Shhh!
+> Shhh!
-:::
+::::
 ## Create a conversation about an image (requires a vision model)
 Let's use this image:
 <img src="https://t3.ftcdn.net/jpg/02/96/63/80/360_F_296638053_0gUVA4WVBKceGsIr7LNqRWSnkusi07dq.jpg" alt="Img" style={{ maxWidth: '250px', height: 'auto', display: 'block', margin: '1rem 0' }} />
 <CodeEmbed src="https://raw.githubusercontent.com/ollama4j/ollama4j-examples/refs/heads/main/src/main/java/io/github/ollama4j/examples/ChatWithImage.java" />
 You will get a response similar to:
-:::tip[LLM Response]
+::::tip[LLM Response]
-**First Answer:** The image shows a dog sitting on the bow of a boat that is docked in calm water. The boat has two
+> First Answer: The image shows a dog sitting on the bow of a boat that is docked in calm water. The boat has two
-levels, with the lower level containing seating and what appears to be an engine cover. The dog seems relaxed and
+> levels, with the lower level containing seating and what appears to be an engine cover. The dog seems relaxed and
-comfortable on the boat, looking out over the water. The background suggests it might be late afternoon or early
+> comfortable on the boat, looking out over the water. The background suggests it might be late afternoon or early
-evening, given the warm lighting and the low position of the sun in the sky.
+> evening, given the warm lighting and the low position of the sun in the sky.
-
+>
-**Second Answer:** Based on the image, it's difficult to definitively determine the breed of the dog. However, the dog
+> Second Answer: Based on the image, it's difficult to definitively determine the breed of the dog. However, the dog
-appears to be medium-sized with a short coat and a brown coloration, which might suggest that it is a **_Golden Retriever_**
+> appears to be medium-sized with a short coat and a brown coloration, which might suggest that it is a Golden Retriever
-or a similar breed. Without more details like ear shape and tail length, it's not possible to identify the exact breed
+> or a similar breed. Without more details like ear shape and tail length, it's not possible to identify the exact breed
-confidently.
+> confidently.
-:::
+::::
--- a/docs/docs/apis-generate/custom-roles.md
+++ b/docs/docs/apis-generate/custom-roles.md
@ -16,16 +16,16 @@ _Base roles are `SYSTEM`, `USER`, `ASSISTANT`, `TOOL`._
 #### Add new role
 ```java
-import io.github.ollama4j.Ollama;
+import io.github.ollama4j.OllamaAPI;
 import io.github.ollama4j.models.chat.OllamaChatMessageRole;
 public class Main {
    public static void main(String[] args) {
        String host = "http://localhost:11434/";
-        Ollama ollama = new Ollama(host);
+        OllamaAPI ollamaAPI = new OllamaAPI(host);
-        OllamaChatMessageRole customRole = ollama.addCustomRole("custom-role");
+        OllamaChatMessageRole customRole = ollamaAPI.addCustomRole("custom-role");
    }
 }
 ```
@ -33,16 +33,16 @@ public class Main {
 #### List roles
 ```java
-import io.github.ollama4j.Ollama;
+import io.github.ollama4j.OllamaAPI;
 import io.github.ollama4j.models.chat.OllamaChatMessageRole;
 public class Main {
    public static void main(String[] args) {
        String host = "http://localhost:11434/";
-        Ollama ollama = new Ollama(host);
+        OllamaAPI ollamaAPI = new OllamaAPI(host);
-        List<OllamaChatMessageRole> roles = ollama.listRoles();
+        List<OllamaChatMessageRole> roles = ollamaAPI.listRoles();
    }
 }
 ```
@ -50,16 +50,16 @@ public class Main {
 #### Get role
 ```java
-import io.github.ollama4j.Ollama;
+import io.github.ollama4j.OllamaAPI;
 import io.github.ollama4j.models.chat.OllamaChatMessageRole;
 public class Main {
    public static void main(String[] args) {
        String host = "http://localhost:11434/";
-        Ollama ollama = new Ollama(host);
+        OllamaAPI ollamaAPI = new OllamaAPI(host);
-        List<OllamaChatMessageRole> roles = ollama.getRole("custom-role");
+        List<OllamaChatMessageRole> roles = ollamaAPI.getRole("custom-role");
    }
 }
 ```
--- a/docs/docs/apis-generate/generate-async.md
+++ b/docs/docs/apis-generate/generate-async.md
@ -1,14 +1,11 @@
 ---
-sidebar_position: 6
+sidebar_position: 2
 ---
 import CodeEmbed from '@site/src/components/CodeEmbed';
 import TypewriterTextarea from '@site/src/components/TypewriterTextarea';
 # Generate (Async)
 ### Generate response from a model asynchronously
 This API lets you ask questions to the LLMs in a asynchronous way.
 This is particularly helpful when you want to issue a generate request to the LLM and collect the response in the
 background (such as threads) without blocking your code until the response arrives from the model.
@ -18,10 +15,8 @@ the [completion](https://github.com/jmorganca/ollama/blob/main/docs/api.md#gener
 <CodeEmbed src="https://raw.githubusercontent.com/ollama4j/ollama4j-examples/refs/heads/main/src/main/java/io/github/ollama4j/examples/GenerateAsync.java" />
-You will get a response similar to:
+::::tip[LLM Response]
-
+Here are the participating teams in the 2019 ICC Cricket World Cup:
 <TypewriterTextarea
 textContent={`Here are the participating teams in the 2019 ICC Cricket World Cup:
 1. Australia
 2. Bangladesh
@ -31,54 +26,5 @@ textContent={`Here are the participating teams in the 2019 ICC Cricket World Cup
 6. England
 7. South Africa
 8. West Indies (as a team)
-9. Afghanistan`}
+9. Afghanistan
-   typingSpeed={10}
+::::
   pauseBetweenSentences={1200}
   height="auto"
   width="100%"
   style={{ whiteSpace: 'pre-line' }}
   />
 ### Generate response from a model asynchronously with thinking and response streamed
 <CodeEmbed src="https://raw.githubusercontent.com/ollama4j/ollama4j-examples/refs/heads/main/src/main/java/io/github/ollama4j/examples/GenerateAsyncWithThinking.java" />
 <TypewriterTextarea
 textContent={`WE NEED TO ANSWER THE QUESTION: "HOW LONG DOES IT TAKE FOR THE LIGHT FROM THE SUN TO REACH EARTH?" THE USER LIKELY EXPECTS THE TIME IN SECONDS, MINUTES, OR HOURS. LIGHT TRAVELS AT SPEED OF LIGHT (299,792,458 M/S). DISTANCE BETWEEN SUN AND EARTH IS ABOUT 1 AU (~149.6 MILLION KM). SO TRAVEL TIME = 1 AU / C ≈ 500 SECONDS ≈ 8.3 MINUTES. MORE PRECISELY, 8 MINUTES AND 20 SECONDS. PROVIDE CONTEXT: AVERAGE DISTANCE, VARYING DUE TO ELLIPTICAL ORBIT. SO ANSWER: ABOUT 8 MINUTES 20 SECONDS. ALSO MENTION THAT DUE TO VARIATION: FROM 8:07 TO 8:20. PROVIDE DETAILS. ALSO MENTION THAT WE REFER TO THE TIME LIGHT TAKES TO TRAVEL 1 ASTRONOMICAL UNIT.
 ALSO MIGHT MENTION: FOR MORE PRECISE: 499 SECONDS = 8 MIN 19 S. VARIATION DUE TO EARTH'S ORBIT: FROM 8 MIN 6 S TO 8 MIN 20 S. SO ANSWER.
 LET'S CRAFT AN EXPLANATION.
 the sun’s light takes a little over **eight minutes** to get to earth.
 | quantity | value |
 |----------|-------|
 | distance (average) | 1 astronomical unit (au) ≈ 149,600,000 km |
 | speed of light | \(c = 299,792,458\) m s⁻¹ |
 | light‑travel time | \(\displaystyle \frac{1\ \text{au}}{c} \approx 499\ \text{s}\) |
 499 seconds is **8 min 19 s**.
 because the earth’s orbit is slightly elliptical, the distance varies from about 147 million km (at perihelion) to 152 million km (at aphelion). this gives a light‑travel time that ranges roughly from **8 min 6 s** to **8 min 20 s**. thus, when we look at the sun, we’re seeing it as it was about eight minutes agoComplete thinking response: We need to answer the question: "How long does it take for the light from the Sun to reach Earth?" The user likely expects the time in seconds, minutes, or hours. Light travels at speed of light (299,792,458 m/s). Distance between Sun and Earth is about 1 AU (~149.6 million km). So travel time = 1 AU / c ≈ 500 seconds ≈ 8.3 minutes. More precisely, 8 minutes and 20 seconds. Provide context: average distance, varying due to elliptical orbit. So answer: about 8 minutes 20 seconds. Also mention that due to variation: from 8:07 to 8:20. Provide details. Also mention that we refer to the time light takes to travel 1 astronomical unit.
 Also might mention: For more precise: 499 seconds = 8 min 19 s. Variation due to Earth's orbit: from 8 min 6 s to 8 min 20 s. So answer.
 Let's craft an explanation.
 Complete response: The Sun’s light takes a little over **eight minutes** to get to Earth.
 | Quantity | Value |
 |----------|-------|
 | Distance (average) | 1 astronomical unit (AU) ≈ 149,600,000 km |
 | Speed of light | \(c = 299,792,458\) m s⁻¹ |
 | Light‑travel time | \(\displaystyle \frac{1\ \text{AU}}{c} \approx 499\ \text{s}\) |
 499 seconds is **8 min 19 s**.
 Because the Earth’s orbit is slightly elliptical, the distance varies from about 147 million km (at perihelion) to 152 million km (at aphelion). This gives a light‑travel time that ranges roughly from **8 min 6 s** to **8 min 20 s**. Thus, when we look at the Sun, we’re seeing it as it was about eight minutes ago.`}
   typingSpeed={5}
   pauseBetweenSentences={1200}
   height="auto"
   width="100%"
   style={{ whiteSpace: 'pre-line' }}
   />
--- a/docs/docs/apis-generate/generate-embeddings.md
+++ b/docs/docs/apis-generate/generate-embeddings.md
--- a/docs/docs/apis-generate/generate-thinking.md
+++ b/docs/docs/apis-generate/generate-thinking.md
@ -1,55 +0,0 @@
 ---
 sidebar_position: 3
 ---
 import CodeEmbed from '@site/src/components/CodeEmbed';
 import TypewriterTextarea from '@site/src/components/TypewriterTextarea';
 # Generate with Thinking
 This API allows to generate responses from an LLM while also retrieving the model's "thinking" process separately from the final answer. The "thinking" tokens represent the model's internal reasoning or planning before it produces the actual response. This can be useful for debugging, transparency, or simply understanding how the model arrives at its answers.
 You can use this feature to receive both the thinking and the response as separate outputs, either as a complete result or streamed token by token. The examples below show how to use the API to access both the thinking and the response, and how to display them in your application.
 ### Generate response with thinking and receive the thinking and response text separately
 <CodeEmbed src="https://raw.githubusercontent.com/ollama4j/ollama4j-examples/refs/heads/main/src/main/java/io/github/ollama4j/examples/GenerateWithThinking.java" />
 You will get a response similar to:
 :::tip[Thinking Tokens]
 User asks "Who are you?" It's a request for identity. As ChatGPT, we should explain that I'm an AI developed by OpenAI, etc. Provide friendly explanation.
 :::
 :::tip[Response Tokens]
 I’m ChatGPT, a large language model created by OpenAI. I’m designed to understand and generate natural‑language text, so I can answer questions, help with writing, explain concepts, brainstorm ideas, and chat about almost any topic. I don’t have a personal life or consciousness—I’m a tool that processes input and produces responses based on patterns in the data I was trained on. If you have any questions about how I work or what I can do, feel free to ask!
 :::
 ### Generate response and receive the thinking and response tokens streamed
 <CodeEmbed src="https://raw.githubusercontent.com/ollama4j/ollama4j-examples/refs/heads/main/src/main/java/io/github/ollama4j/examples/GenerateWithThinkingStreamed.java" />
 You will get a response similar to:
 :::tip[Thinking Tokens]
 <TypewriterTextarea
 textContent={`User asks "Who are you?" It's a request for identity. As ChatGPT, we should explain that I'm an AI developed by OpenAI, etc. Provide friendly explanation.`}
 typingSpeed={10}
 pauseBetweenSentences={1200}
 height="auto"
 width="100%"
 style={{ whiteSpace: 'pre-line' }}
 />
 :::
 :::tip[Response Tokens]
 <TypewriterTextarea
 textContent={`I’m ChatGPT, a large language model created by OpenAI. I’m designed to understand and generate natural‑language text, so I can answer questions, help with writing, explain concepts, brainstorm ideas, and chat about almost any topic. I don’t have a personal life or consciousness—I’m a tool that processes input and produces responses based on patterns in the data I was trained on. If you have any questions about how I work or what I can do, feel free to ask!`}
 typingSpeed={10}
 pauseBetweenSentences={1200}
 height="auto"
 width="100%"
 style={{ whiteSpace: 'pre-line' }}
 />
 :::
--- a/docs/docs/apis-generate/generate-with-image-files.md
+++ b/docs/docs/apis-generate/generate-with-image-files.md
@ -0,0 +1,33 @@
 ---
 sidebar_position: 3
 ---
 import CodeEmbed from '@site/src/components/CodeEmbed';
 # Generate with Image Files
 This API lets you ask questions along with the image files to the LLMs.
 This API corresponds to
 the [completion](https://github.com/jmorganca/ollama/blob/main/docs/api.md#generate-a-completion) API.
 :::note
 Executing this on Ollama server running in CPU-mode will take longer to generate response. Hence, GPU-mode is
 recommended.
 :::
 ## Synchronous mode
 If you have this image downloaded and you pass the path to the downloaded image to the following code:
 ![Img](https://t3.ftcdn.net/jpg/02/96/63/80/360_F_296638053_0gUVA4WVBKceGsIr7LNqRWSnkusi07dq.jpg)
 <CodeEmbed src="https://raw.githubusercontent.com/ollama4j/ollama4j-examples/refs/heads/main/src/main/java/io/github/ollama4j/examples/GenerateWithImageFile.java" />
 You will get a response similar to:
 ::::tip[LLM Response]
 > This image features a white boat with brown cushions, where a dog is sitting on the back of the boat. The dog seems to
 > be enjoying its time outdoors, perhaps on a lake.
 ::::
--- a/docs/docs/apis-generate/generate-with-image-urls.md
+++ b/docs/docs/apis-generate/generate-with-image-urls.md
@ -0,0 +1,33 @@
 ---
 sidebar_position: 4
 ---
 import CodeEmbed from '@site/src/components/CodeEmbed';
 # Generate with Image URLs
 This API lets you ask questions along with the image files to the LLMs.
 This API corresponds to
 the [completion](https://github.com/jmorganca/ollama/blob/main/docs/api.md#generate-a-completion) API.
 :::note
 Executing this on Ollama server running in CPU-mode will take longer to generate response. Hence, GPU-mode is
 recommended.
 :::
 ## Ask (Sync)
 Passing the link of this image the following code:
 ![Img](https://t3.ftcdn.net/jpg/02/96/63/80/360_F_296638053_0gUVA4WVBKceGsIr7LNqRWSnkusi07dq.jpg)
 <CodeEmbed src="https://raw.githubusercontent.com/ollama4j/ollama4j-examples/refs/heads/main/src/main/java/io/github/ollama4j/examples/GenerateWithImageURL.java" />
 You will get a response similar to:
 ::::tip[LLM Response]
 > This image features a white boat with brown cushions, where a dog is sitting on the back of the boat. The dog seems to
 > be enjoying its time outdoors, perhaps on a lake.
 ::::
--- a/docs/docs/apis-generate/generate-with-images.md
+++ b/docs/docs/apis-generate/generate-with-images.md
@ -1,61 +0,0 @@
 ---
 sidebar_position: 4
 ---
 import CodeEmbed from '@site/src/components/CodeEmbed';
 # Generate with Images
 This API lets you ask questions along with the image files to the LLMs.
 This API corresponds to
 the [completion](https://github.com/jmorganca/ollama/blob/main/docs/api.md#generate-a-completion) API.
 :::note
 Executing this on Ollama server running in CPU-mode will take longer to generate response. Hence, GPU-mode is
 recommended.
 :::
 ## Synchronous mode
 If you have this image downloaded and you pass the path to the downloaded image to the following code:
 ![Img](https://t3.ftcdn.net/jpg/02/96/63/80/360_F_296638053_0gUVA4WVBKceGsIr7LNqRWSnkusi07dq.jpg)
 <CodeEmbed src="https://raw.githubusercontent.com/ollama4j/ollama4j-examples/refs/heads/main/src/main/java/io/github/ollama4j/examples/GenerateWithImageFile.java" />
 You will get a response similar to:
 :::tip[LLM Response]
 This image features a white boat with brown cushions, where a dog is sitting on the back of the boat. The dog seems to
 be enjoying its time outdoors, perhaps on a lake.
 :::
 # Generate with Image URLs
 This API lets you ask questions along with the image files to the LLMs.
 This API corresponds to
 the [completion](https://github.com/jmorganca/ollama/blob/main/docs/api.md#generate-a-completion) API.
 :::note
 Executing this on Ollama server running in CPU-mode will take longer to generate response. Hence, GPU-mode is
 recommended.
 :::
 ## Ask (Sync)
 Passing the link of this image the following code:
 ![Img](https://t3.ftcdn.net/jpg/02/96/63/80/360_F_296638053_0gUVA4WVBKceGsIr7LNqRWSnkusi07dq.jpg)
 <CodeEmbed src="https://raw.githubusercontent.com/ollama4j/ollama4j-examples/refs/heads/main/src/main/java/io/github/ollama4j/examples/GenerateWithImageURL.java" />
 You will get a response similar to:
 :::tip[LLM Response]
 This image features a white boat with brown cushions, where a dog is sitting on the back of the boat. The dog seems to
 be enjoying its time outdoors, perhaps on a lake.
 :::
--- a/docs/docs/apis-generate/generate-with-tools.md
+++ b/docs/docs/apis-generate/generate-with-tools.md
@ -1,5 +1,5 @@
 ---
-sidebar_position: 5
+sidebar_position: 6
 ---
 import CodeEmbed from '@site/src/components/CodeEmbed';
@ -79,7 +79,7 @@ Now put it all together by registering the tools and prompting with tools.
 Run this full example and you will get a response similar to:
-:::tip[LLM Response]
+::::tip[LLM Response]
 [Result of executing tool 'current-fuel-price']: Current price of petrol in Bengaluru is Rs.103/L
@ -88,4 +88,4 @@ Run this full example and you will get a response similar to:
 [Result of executing tool 'get-employee-details']: Employee Details `{ID: 6bad82e6-b1a1-458f-a139-e3b646e092b1, Name:
 Rahul Kumar, Address: King St, Hyderabad, India, Phone: 9876543210}`
-:::
+::::
--- a/docs/docs/apis-generate/generate.md
+++ b/docs/docs/apis-generate/generate.md
@ -1,11 +1,11 @@
 ---
-sidebar_position: 2
+sidebar_position: 1
 ---
 import CodeEmbed from '@site/src/components/CodeEmbed';
 import TypewriterTextarea from '@site/src/components/TypewriterTextarea';
-# Generate
+# Generate (Sync)
 This API lets you ask questions to the LLMs in a synchronous way.
 This API corresponds to
@ -22,10 +22,10 @@ to [this](/apis-extras/options-builder).
 You will get a response similar to:
-:::tip[LLM Response]
+::::tip[LLM Response]
-I am a model of an AI trained by Mistral AI. I was designed to assist with a wide range of tasks, from answering
+> I am a large language model created by Alibaba Cloud. My purpose is to assist users in generating text, answering
-questions to helping with complex computations and research. How can I help you toda
+> questions, and completing tasks. I aim to be user-friendly and easy to understand for everyone who interacts with me.
-:::
+::::
 ### Try asking a question, receiving the answer streamed
@ -33,6 +33,22 @@ questions to helping with complex computations and research. How can I help you
 You will get a response similar to:
 <!-- ::::tip[LLM Response]
 > The
 >
 > The capital
 >
 > The capital of
 >
 > The capital of France
 >
 > The capital of France is
 >
 > The capital of France is Paris
 >
 > The capital of France is Paris.
 :::: -->
 <TypewriterTextarea
    textContent='The capital of France is Paris.'
    typingSpeed={30}
@ -45,25 +61,23 @@ width='100%'
 ### With response as a `Map`
-<CodeEmbed src="https://raw.githubusercontent.com/ollama4j/ollama4j-examples/refs/heads/main/src/main/java/io/github/ollama4j/examples/GenerateStructuredOutput.java" />
+<CodeEmbed src="https://raw.githubusercontent.com/ollama4j/ollama4j-examples/refs/heads/main/src/main/java/io/github/ollama4j/examples/StructuredOutput.java" />
 You will get a response similar to:
-:::tip[LLM Response]
+::::tip[LLM Response]
 ```json
 {
-  "heroName" : "Batman",
+    "available": true,
-  "ageOfPerson" : 30
+    "age": 22
 }
 ```
-
+::::
 :::
 ### With response mapped to specified class type
-<CodeEmbed src="https://raw.githubusercontent.com/ollama4j/ollama4j-examples/refs/heads/main/src/main/java/io/github/ollama4j/examples/GenerateStructuredOutputMappedToObject.java" />
+<CodeEmbed src="https://raw.githubusercontent.com/ollama4j/ollama4j-examples/refs/heads/main/src/main/java/io/github/ollama4j/examples/StructuredOutputMappedToObject.java" />
-:::tip[LLM Response]
+::::tip[LLM Response]
-HeroInfo(heroName=Batman, ageOfPerson=30)
+Person(age=28, available=false)
-:::
+::::
--- a/docs/docs/apis-generate/prompt-builder.md
+++ b/docs/docs/apis-generate/prompt-builder.md
@ -1,5 +1,5 @@
 ---
-sidebar_position: 2
+sidebar_position: 10
 ---
 # Prompt Builder
@ -8,7 +8,7 @@ This is designed for prompt engineering. It allows you to easily build the promp
 inferences.
 ```java
-import io.github.ollama4j.Ollama;
+import io.github.ollama4j.OllamaAPI;
 import io.github.ollama4j.models.response.OllamaResult;
 import io.github.ollama4j.types.OllamaModelType;
 import io.github.ollama4j.utils.OptionsBuilder;
@ -18,8 +18,8 @@ public class Main {
    public static void main(String[] args) throws Exception {
        String host = "http://localhost:11434/";
-        Ollama ollama = new Ollama(host);
+        OllamaAPI ollamaAPI = new OllamaAPI(host);
-        ollama.setRequestTimeoutSeconds(10);
+        ollamaAPI.setRequestTimeoutSeconds(10);
        String model = OllamaModelType.PHI;
@ -43,7 +43,7 @@ public class Main {
                        .add("How do I read a file in Go and print its contents to stdout?");
        boolean raw = false;
-        OllamaResult response = ollama.generate(model, promptBuilder.build(), raw, new OptionsBuilder().build());
+        OllamaResult response = ollamaAPI.generate(model, promptBuilder.build(), raw, new OptionsBuilder().build());
        System.out.println(response.getResponse());
    }
 }
@ -51,7 +51,6 @@ public class Main {
 You will get a response similar to:
 :::tip[LLM Response]
 ```go
 package main
@ -73,4 +72,3 @@ func readFile(fileName string) {
    }
 }
 ```
 :::
--- a/docs/docs/apis-model-management/_category_.json
+++ b/docs/docs/apis-model-management/_category_.json
@ -1,5 +1,5 @@
 {
-    "label": "APIs - Manage Models",
+    "label": "APIs - Model Management",
    "position": 2,
    "link": {
        "type": "generated-index",
--- a/docs/docs/apis-model-management/list-library-models.md
+++ b/docs/docs/apis-model-management/list-library-models.md
@ -0,0 +1,70 @@
 ---
 sidebar_position: 1
 ---
 import CodeEmbed from '@site/src/components/CodeEmbed';
 # Models from Ollama Library
 These API retrieves a list of models directly from the Ollama library.
 ### List Models from Ollama Library
 This API fetches available models from the Ollama library page, including details such as the model's name, pull count,
 popular tags, tag count, and the last update time.
 <CodeEmbed
 src='https://raw.githubusercontent.com/ollama4j/ollama4j-examples/refs/heads/main/src/main/java/io/github/ollama4j/examples/ListLibraryModels.java'>
 </CodeEmbed>
 The following is the sample output:
 ```
 [
    LibraryModel(name=llama3.2-vision, description=Llama 3.2 Vision is a collection of instruction-tuned image reasoning generative models in 11B and 90B sizes., pullCount=21.1K, totalTags=9, popularTags=[vision, 11b, 90b], lastUpdated=yesterday),
    LibraryModel(name=llama3.2, description=Meta's Llama 3.2 goes small with 1B and 3B models., pullCount=2.4M, totalTags=63, popularTags=[tools, 1b, 3b], lastUpdated=6 weeks ago)
 ]
 ```
 ### Get Tags of a Library Model
 This API Fetches the tags associated with a specific model from Ollama library.
 <CodeEmbed
 src='https://raw.githubusercontent.com/ollama4j/ollama4j-examples/refs/heads/main/src/main/java/io/github/ollama4j/examples/GetLibraryModelTags.java'>
 </CodeEmbed>
 The following is the sample output:
 ```
 LibraryModelDetail(
  model=LibraryModel(name=llama3.2-vision, description=Llama 3.2 Vision is a collection of instruction-tuned image reasoning generative models in 11B and 90B sizes., pullCount=21.1K, totalTags=9, popularTags=[vision, 11b, 90b], lastUpdated=yesterday),
  tags=[
        LibraryModelTag(name=llama3.2-vision, tag=latest, size=7.9GB, lastUpdated=yesterday),
        LibraryModelTag(name=llama3.2-vision, tag=11b, size=7.9GB, lastUpdated=yesterday),
        LibraryModelTag(name=llama3.2-vision, tag=90b, size=55GB, lastUpdated=yesterday)
    ]
 )
 ```
 ### Find a model from Ollama library
 This API finds a specific model using model `name` and `tag` from Ollama library.
 <CodeEmbed
 src='https://raw.githubusercontent.com/ollama4j/ollama4j-examples/refs/heads/main/src/main/java/io/github/ollama4j/examples/FindLibraryModel.java'>
 </CodeEmbed>
 The following is the sample output:
 ```
 LibraryModelTag(name=qwen2.5, tag=7b, size=4.7GB, lastUpdated=7 weeks ago)
 ```
 ### Pull model using `LibraryModelTag`
 You can use `LibraryModelTag` to pull models into Ollama server.
 <CodeEmbed
 src='https://raw.githubusercontent.com/ollama4j/ollama4j-examples/refs/heads/main/src/main/java/io/github/ollama4j/examples/PullLibraryModelTags.java'>
 </CodeEmbed>
--- a/docs/docs/apis-model-management/list-models.md
+++ b/docs/docs/apis-model-management/list-models.md
@ -4,7 +4,7 @@ sidebar_position: 2
 import CodeEmbed from '@site/src/components/CodeEmbed';
-# List Library Models
+# List Local Models
 This API lets you list downloaded/available models on the Ollama server.
--- a/docs/docs/apis-model-management/pull-model.md
+++ b/docs/docs/apis-model-management/pull-model.md
@ -13,3 +13,11 @@ src='https://raw.githubusercontent.com/ollama4j/ollama4j-examples/refs/heads/mai
 </CodeEmbed>
 Once downloaded, you can see them when you use [list models](./list-models) API.
 :::info
 You can even pull models using Ollama model library APIs. This looks up the models directly on the Ollama model library page. Refer
 to [this](./list-library-models#pull-model-using-librarymodeltag).
 :::
--- a/docs/docs/intro.md
+++ b/docs/docs/intro.md
@ -112,14 +112,14 @@ or use other suitable implementations.
 Create a new Java class in your project and add this code.
 ```java
-import io.github.ollama4j.Ollama;
+import io.github.ollama4j.OllamaAPI;
-public class OllamaTest {
+public class OllamaAPITest {
    public static void main(String[] args) {
-        Ollama ollama = new Ollama();
+        OllamaAPI ollamaAPI = new OllamaAPI();
-        boolean isOllamaServerReachable = ollama.ping();
+        boolean isOllamaServerReachable = ollamaAPI.ping();
        System.out.println("Is Ollama server running: " + isOllamaServerReachable);
    }
@ -130,16 +130,18 @@ This uses the default Ollama host as `http://localhost:11434`.
 Specify a different Ollama host that you want to connect to.
 ```java
-import io.github.ollama4j.Ollama;
+import io.github.ollama4j.OllamaAPI;
-public class OllamaTest {
+public class OllamaAPITest {
    public static void main(String[] args) {
        String host = "http://localhost:11434/";
-        Ollama ollama = new Ollama(host);
+        OllamaAPI ollamaAPI = new OllamaAPI(host);
-        boolean isOllamaServerReachable = ollama.ping();
+        ollamaAPI.setVerbose(true);
        boolean isOllamaServerReachable = ollamaAPI.ping();
        System.out.println("Is Ollama server running: " + isOllamaServerReachable);
    }
--- a/docs/docs/metrics.md
+++ b/docs/docs/metrics.md
@ -1,90 +0,0 @@
 ---
 sidebar_position: 5
 title: Metrics
 ---
 import CodeEmbed from '@site/src/components/CodeEmbed';
 # Metrics
 :::warning[Note]
 This is work in progress
 :::
 Monitoring and understanding the performance of your models and requests is crucial for optimizing and maintaining your
 applications. The Ollama4j library provides built-in support for collecting and exposing various metrics, such as
 request counts, response times, and error rates. These metrics can help you:
 - Track usage patterns and identify bottlenecks
 - Monitor the health and reliability of your services
 - Set up alerts for abnormal behavior
 - Gain insights for scaling and optimization
 ## Available Metrics
 Ollama4j exposes several key metrics, including:
 - **Total Requests**: The number of requests processed by the model.
 - **Response Time**: The time taken to generate a response for each request.
 - **Error Rate**: The percentage of requests that resulted in errors.
 - **Active Sessions**: The number of concurrent sessions or users.
 These metrics can be accessed programmatically or integrated with monitoring tools such as Prometheus or Grafana for
 visualization and alerting.
 ## Example Metrics Dashboard
 Below is an example of a metrics dashboard visualizing some of these key statistics:
 ![Img](https://raw.githubusercontent.com/ollama4j/ollama4j/main/metrics.png)
 ## Example: Accessing Metrics in Java
 You can easily access and display metrics in your Java application using Ollama4j.
 Make sure you have added the `simpleclient_httpserver` dependency in your app for the app to be able to expose the
 metrics via `/metrics` endpoint:
 ```xml
 <dependency>
    <groupId>io.prometheus</groupId>
    <artifactId>simpleclient_httpserver</artifactId>
    <version>0.16.0</version>
 </dependency>
 ```
 Here is a sample code snippet demonstrating how to retrieve and print metrics on Grafana:
 <CodeEmbed src="https://raw.githubusercontent.com/ollama4j/ollama4j-examples/refs/heads/main/src/main/java/io/github/ollama4j/examples/MetricsExample.java" />
 This will start a simple HTTP server with `/metrics` endpoint enabled. Metrics will now available
 at: http://localhost:8080/metrics
 ## Integrating with Monitoring Tools
 ### Grafana
 Use the following sample `docker-compose` file to host a basic Grafana container.
 <CodeEmbed src="https://raw.githubusercontent.com/ollama4j/ollama4j-examples/refs/heads/main/docker/docker-compose.yml" />
 And run:
 ```shell
 docker-compose -f path/to/your/docker-compose.yml up
 ```
 This starts Granfana at http://localhost:3000
 [//]: # (To integrate Ollama4j metrics with external monitoring systems, you can export the metrics endpoint and configure your)
 [//]: # (monitoring tool to scrape or collect the data. Refer to the [integration guide]&#40;../integration/monitoring.md&#41; for)
 [//]: # (detailed instructions.)
 [//]: # ()
 [//]: # (For more information on customizing and extending metrics, see the [API documentation]&#40;../api/metrics.md&#41;.)
--- a/docs/docusaurus.config.js
+++ b/docs/docusaurus.config.js
@ -24,6 +24,7 @@ const config = {
    projectName: 'ollama4j', // Usually your repo name.
    onBrokenLinks: 'throw',
    onBrokenMarkdownLinks: 'warn',
    // Even if you don't use internationalization, you can use this field to set
    // useful metadata like html lang. For example, if your site is Chinese, you
@ -174,9 +175,6 @@ const config = {
        }),
    markdown: {
        mermaid: true,
        hooks: {
            onBrokenMarkdownLinks: 'warn'
        }
    },
    themes: ['@docusaurus/theme-mermaid']
 };
--- a/docs/package-lock.json
+++ b/docs/package-lock.json
--- a/docs/package.json
+++ b/docs/package.json
@ -14,23 +14,23 @@
        "write-heading-ids": "docusaurus write-heading-ids"
    },
    "dependencies": {
-        "@docsearch/js": "^4.1.0",
+        "@docsearch/js": "^3.9.0",
-        "@docusaurus/core": "^3.9.0",
+        "@docusaurus/core": "^3.4.0",
-        "@docusaurus/plugin-google-gtag": "^3.9.1",
+        "@docusaurus/plugin-google-gtag": "^3.4.0",
-        "@docusaurus/preset-classic": "^3.9.1",
+        "@docusaurus/preset-classic": "^3.4.0",
-        "@docusaurus/theme-mermaid": "^3.9.1",
+        "@docusaurus/theme-mermaid": "^3.4.0",
-        "@iconify/react": "^6.0.2",
+        "@iconify/react": "^5.2.1",
-        "@mdx-js/react": "^3.1.1",
+        "@mdx-js/react": "^3.0.0",
-        "clsx": "^2.1.1",
+        "clsx": "^2.0.0",
        "font-awesome": "^4.7.0",
-        "prism-react-renderer": "^2.4.1",
+        "prism-react-renderer": "^2.3.0",
-        "react": "^19.2.0",
+        "react": "^18.0.0",
-        "react-dom": "^19.2.0",
+        "react-dom": "^18.0.0",
        "react-icons": "^5.5.0",
        "react-image-gallery": "^1.4.0"
    },
    "devDependencies": {
-        "@docusaurus/module-type-aliases": "^3.9.1",
+        "@docusaurus/module-type-aliases": "^3.4.0",
        "@docusaurus/types": "^3.4.0"
    },
    "browserslist": {
--- a/docs/src/components/TypewriterTextarea/index.js
+++ b/docs/src/components/TypewriterTextarea/index.js
@ -1,54 +1,53 @@
 import React, { useEffect, useState, useRef } from 'react';
-const TypewriterTextarea = ({
+const TypewriterTextarea = ({ textContent, typingSpeed = 50, pauseBetweenSentences = 1000, height = '200px', width = '100%', align = 'left' }) => {
-  textContent,
+  const [text, setText] = useState('');
-  typingSpeed = 50,
+  const [sentenceIndex, setSentenceIndex] = useState(0);
  pauseBetweenSentences = 1000,
  height = '200px',
  width = '100%',
  align = 'left',
  style = {},
 }) => {
  const [displayedText, setDisplayedText] = useState('');
  const [charIndex, setCharIndex] = useState(0);
  const sentences = textContent ? textContent.split('\n') : [];
  const isTyping = useRef(false);
  // Flatten textContent to a string, preserving \n
  const fullText = textContent || '';
  useEffect(() => {
-    if (!fullText) return;
+    if (!textContent) return;
    if (!isTyping.current) {
      isTyping.current = true;
    }
-    if (charIndex > fullText.length) {
+    if (sentenceIndex >= sentences.length) {
      // Reset to start from the beginning
      setSentenceIndex(0);
      setCharIndex(0);
-      setDisplayedText('');
+      setText('');
      return;
    }
-    if (charIndex < fullText.length) {
+    const currentSentence = sentences[sentenceIndex];
    if (charIndex < currentSentence.length) {
      const timeout = setTimeout(() => {
-        setDisplayedText(fullText.slice(0, charIndex + 1));
+        setText((prevText) => prevText + currentSentence[charIndex]);
        setCharIndex((prevCharIndex) => prevCharIndex + 1);
-      }, fullText[charIndex] === '\n' ? typingSpeed : typingSpeed);
+      }, typingSpeed);
      return () => clearTimeout(timeout);
    } else {
-      // Wait a bit, then restart
+      // Wait a bit, then go to the next sentence
      const timeout = setTimeout(() => {
        setSentenceIndex((prev) => prev + 1);
        setCharIndex(0);
        setDisplayedText('');
      }, pauseBetweenSentences);
      return () => clearTimeout(timeout);
    }
-    // eslint-disable-next-line
+  }, [charIndex, sentenceIndex, sentences, typingSpeed, pauseBetweenSentences, textContent]);
  }, [charIndex, fullText, typingSpeed, pauseBetweenSentences]);
  return (
-    <div
+    <textarea
      value={text}
      readOnly
      rows={10}
      cols={5}
      style={{
        width: typeof width === 'number' ? `${width}px` : width,
        height: height,
@ -61,12 +60,8 @@ const TypewriterTextarea = ({
        resize: 'none',
        whiteSpace: 'pre-wrap',
        color: 'black',
        overflow: 'auto',
        ...style,
      }}
-    >
+    />
      {displayedText}
    </div>
  );
 };
--- a/metrics.png
+++ b/metrics.png
--- a/ollama4j-new.jpeg
+++ b/ollama4j-new.jpeg
--- a/pom.xml
+++ b/pom.xml
@ -17,9 +17,9 @@
        <project.build.outputTimestamp>${git.commit.time}
        </project.build.outputTimestamp><!-- populated via git-commit-id-plugin -->
        <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
-        <maven-surefire-plugin.version>3.5.4</maven-surefire-plugin.version>
+        <maven-surefire-plugin.version>3.0.0-M5</maven-surefire-plugin.version>
-        <maven-failsafe-plugin.version>3.5.4</maven-failsafe-plugin.version>
+        <maven-failsafe-plugin.version>3.0.0-M5</maven-failsafe-plugin.version>
-        <lombok.version>1.18.40</lombok.version>
+        <lombok.version>1.18.38</lombok.version>
    </properties>
    <developers>
@ -76,7 +76,7 @@
            <plugin>
                <groupId>org.apache.maven.plugins</groupId>
                <artifactId>maven-javadoc-plugin</artifactId>
-                <version>3.12.0</version>
+                <version>3.11.2</version>
                <configuration>
                    <!-- to disable the "missing" warnings. Remove the doclint to enable warnings-->
                    <doclint>all,-missing</doclint>
@ -135,7 +135,7 @@
            <plugin>
                <groupId>org.apache.maven.plugins</groupId>
                <artifactId>maven-gpg-plugin</artifactId>
-                <version>3.2.8</version>
+                <version>1.5</version>
                <executions>
                    <execution>
                        <id>sign-artifacts</id>
@ -150,7 +150,7 @@
            <plugin>
                <groupId>io.github.git-commit-id</groupId>
                <artifactId>git-commit-id-maven-plugin</artifactId>
-                <version>9.0.2</version>
+                <version>9.0.1</version>
                <executions>
                    <execution>
                        <goals>
@ -163,76 +163,13 @@
                    <dateFormatTimeZone>Etc/UTC</dateFormatTimeZone>
                </configuration>
            </plugin>
            <plugin>
                <groupId>com.diffplug.spotless</groupId>
                <artifactId>spotless-maven-plugin</artifactId>
                <version>3.0.0</version>
                <configuration>
                    <formats>
                        <!-- you can define as many formats as you want, each is independent -->
                        <format>
                            <!-- define the files to apply to -->
                            <includes>
                                <include>.gitattributes</include>
                                <include>.gitignore</include>
                            </includes>
                            <!-- define the steps to apply to those files -->
                            <trimTrailingWhitespace/>
                            <endWithNewline/>
                            <indent>
                                <tabs>true</tabs>
                                <spacesPerTab>4</spacesPerTab>
                            </indent>
                        </format>
                    </formats>
                    <!-- define a language-specific format -->
                    <java>
                        <!-- no need to specify files, inferred automatically, but you can if you want -->
                        <!-- apply a specific flavor of google-java-format and reflow long strings -->
                        <googleJavaFormat>
                            <version>1.28.0</version>
                            <style>AOSP</style>
                            <reflowLongStrings>true</reflowLongStrings>
                            <formatJavadoc>false</formatJavadoc>
                        </googleJavaFormat>
                        <!-- make sure every file has the following copyright header.
                          optionally, Spotless can set copyright years by digging
                          through git history (see "license" section below) -->
                        <licenseHeader>
                            <content>
 <![CDATA[
 /*
 * Ollama4j - Java library for interacting with Ollama server.
 * Copyright (c) $YEAR Amith Koujalgi and contributors.
 *
 * Licensed under the MIT License (the "License");
 * you may not use this file except in compliance with the License.
 *
 */
 ]]>
                            </content>  <!-- or <file>${project.basedir}/license-header</file> -->
                        </licenseHeader>
                    </java>
                </configuration>
                <executions>
                    <execution>
                        <goals>
                            <goal>check</goal>
                        </goals>
                        <phase>compile</phase>
                    </execution>
                </executions>
            </plugin>
        </plugins>
        <pluginManagement>
            <plugins>
                <plugin>
                    <artifactId>maven-compiler-plugin</artifactId>
-                    <version>3.14.1</version>
+                    <version>3.14.0</version>
                </plugin>
                <plugin>
                    <artifactId>maven-jar-plugin</artifactId>
@ -252,46 +189,45 @@
        <dependency>
            <groupId>org.jsoup</groupId>
            <artifactId>jsoup</artifactId>
-            <version>1.21.2</version>
+            <version>1.18.1</version>
        </dependency>
        <dependency>
            <groupId>com.fasterxml.jackson.core</groupId>
            <artifactId>jackson-databind</artifactId>
-            <version>2.20.0</version>
+            <version>2.17.1</version>
        </dependency>
        <dependency>
            <groupId>com.fasterxml.jackson.datatype</groupId>
            <artifactId>jackson-datatype-jsr310</artifactId>
-            <version>2.20.0</version>
+            <version>2.17.1</version>
        </dependency>
        <dependency>
            <groupId>ch.qos.logback</groupId>
            <artifactId>logback-classic</artifactId>
-            <version>1.5.18</version>
+            <version>1.5.6</version>
            <scope>test</scope>
        </dependency>
        <dependency>
            <groupId>org.slf4j</groupId>
            <artifactId>slf4j-api</artifactId>
-            <version>2.0.17</version>
+            <version>2.0.9</version>
        </dependency>
        <dependency>
            <groupId>org.junit.jupiter</groupId>
            <artifactId>junit-jupiter-api</artifactId>
-            <version>5.13.4</version>
+            <version>5.10.0</version>
            <scope>test</scope>
        </dependency>
        <dependency>
            <groupId>org.mockito</groupId>
            <artifactId>mockito-core</artifactId>
-            <version>5.20.0</version>
+            <version>4.1.0</version>
            <scope>test</scope>
        </dependency>
        <dependency>
            <groupId>org.json</groupId>
            <artifactId>json</artifactId>
-            <version>20250517</version>
+            <version>20240205</version>
            <scope>test</scope>
        </dependency>
@ -304,22 +240,9 @@
        <dependency>
            <groupId>org.testcontainers</groupId>
            <artifactId>nginx</artifactId>
-            <version>1.21.3</version>
+            <version>1.20.0</version>
            <scope>test</scope>
        </dependency>
        <!-- Prometheus metrics dependencies -->
        <dependency>
            <groupId>io.prometheus</groupId>
            <artifactId>simpleclient</artifactId>
            <version>0.16.0</version>
        </dependency>
        <dependency>
            <groupId>com.google.guava</groupId>
            <artifactId>guava</artifactId>
            <version>33.5.0-jre</version>
        </dependency>
    </dependencies>
    <distributionManagement>
@ -345,7 +268,7 @@
                    <plugin>
                        <groupId>org.sonatype.central</groupId>
                        <artifactId>central-publishing-maven-plugin</artifactId>
-                        <version>0.8.0</version>
+                        <version>0.5.0</version>
                        <extensions>true</extensions>
                        <configuration>
                            <publishingServerId>mvn-repo-id</publishingServerId>
@ -371,7 +294,7 @@
                    <plugin>
                        <groupId>org.jacoco</groupId>
                        <artifactId>jacoco-maven-plugin</artifactId>
-                        <version>0.8.13</version>
+                        <version>0.8.11</version>
                        <executions>
                            <execution>
                                <goals>
@ -390,7 +313,7 @@
                    <plugin>
                        <groupId>org.apache.maven.plugins</groupId>
                        <artifactId>maven-gpg-plugin</artifactId>
-                        <version>3.2.8</version>
+                        <version>1.5</version>
                        <executions>
                            <execution>
                                <id>sign-artifacts</id>
@ -420,7 +343,7 @@
                    <plugin>
                        <groupId>org.apache.maven.plugins</groupId>
                        <artifactId>maven-gpg-plugin</artifactId>
-                        <version>3.2.8</version>
+                        <version>1.5</version>
                        <executions>
                            <execution>
                                <id>sign-artifacts</id>
@ -449,7 +372,7 @@
                    <plugin>
                        <groupId>org.apache.maven.plugins</groupId>
                        <artifactId>maven-gpg-plugin</artifactId>
-                        <version>3.2.8</version>
+                        <version>3.1.0</version>
                        <executions>
                            <execution>
                                <id>sign-artifacts</id>
@ -471,7 +394,7 @@
                    <plugin>
                        <groupId>org.sonatype.plugins</groupId>
                        <artifactId>nexus-staging-maven-plugin</artifactId>
-                        <version>1.7.0</version>
+                        <version>1.6.13</version>
                        <extensions>true</extensions>
                        <configuration>
                            <serverId>ossrh</serverId>
@ -482,7 +405,7 @@
                    <plugin>
                        <groupId>org.jacoco</groupId>
                        <artifactId>jacoco-maven-plugin</artifactId>
-                        <version>0.8.13</version>
+                        <version>0.8.7</version>
                        <executions>
                            <execution>
                                <goals>
--- a/src/main/java/io/github/ollama4j/Ollama.java
+++ b/src/main/java/io/github/ollama4j/Ollama.java
--- a/src/main/java/io/github/ollama4j/OllamaAPI.java
+++ b/src/main/java/io/github/ollama4j/OllamaAPI.java
--- a/src/main/java/io/github/ollama4j/exceptions/OllamaBaseException.java
+++ b/src/main/java/io/github/ollama4j/exceptions/OllamaBaseException.java
@ -0,0 +1,8 @@
 package io.github.ollama4j.exceptions;
 public class OllamaBaseException extends Exception {
    public OllamaBaseException(String s) {
        super(s);
    }
 }
--- a/src/main/java/io/github/ollama4j/exceptions/OllamaException.java
+++ b/src/main/java/io/github/ollama4j/exceptions/OllamaException.java
@ -1,20 +0,0 @@
 /*
 * Ollama4j - Java library for interacting with Ollama server.
 * Copyright (c) 2025 Amith Koujalgi and contributors.
 *
 * Licensed under the MIT License (the "License");
 * you may not use this file except in compliance with the License.
 *
 */
 package io.github.ollama4j.exceptions;
 public class OllamaException extends Exception {
    public OllamaException(String message) {
        super(message);
    }
    public OllamaException(String message, Exception exception) {
        super(message, exception);
    }
 }
--- a/src/main/java/io/github/ollama4j/exceptions/RoleNotFoundException.java
+++ b/src/main/java/io/github/ollama4j/exceptions/RoleNotFoundException.java
@ -1,11 +1,3 @@
 /*
 * Ollama4j - Java library for interacting with Ollama server.
 * Copyright (c) 2025 Amith Koujalgi and contributors.
 *
 * Licensed under the MIT License (the "License");
 * you may not use this file except in compliance with the License.
 *
 */
 package io.github.ollama4j.exceptions;
 public class RoleNotFoundException extends Exception {
--- a/src/main/java/io/github/ollama4j/exceptions/ToolInvocationException.java
+++ b/src/main/java/io/github/ollama4j/exceptions/ToolInvocationException.java
@ -1,11 +1,3 @@
 /*
 * Ollama4j - Java library for interacting with Ollama server.
 * Copyright (c) 2025 Amith Koujalgi and contributors.
 *
 * Licensed under the MIT License (the "License");
 * you may not use this file except in compliance with the License.
 *
 */
 package io.github.ollama4j.exceptions;
 public class ToolInvocationException extends Exception {
--- a/src/main/java/io/github/ollama4j/exceptions/ToolNotFoundException.java
+++ b/src/main/java/io/github/ollama4j/exceptions/ToolNotFoundException.java
@ -1,11 +1,3 @@
 /*
 * Ollama4j - Java library for interacting with Ollama server.
 * Copyright (c) 2025 Amith Koujalgi and contributors.
 *
 * Licensed under the MIT License (the "License");
 * you may not use this file except in compliance with the License.
 *
 */
 package io.github.ollama4j.exceptions;
 public class ToolNotFoundException extends Exception {
--- a/src/main/java/io/github/ollama4j/impl/ConsoleOutputChatTokenHandler.java
+++ b/src/main/java/io/github/ollama4j/impl/ConsoleOutputChatTokenHandler.java
@ -1,18 +0,0 @@
 /*
 * Ollama4j - Java library for interacting with Ollama server.
 * Copyright (c) 2025 Amith Koujalgi and contributors.
 *
 * Licensed under the MIT License (the "License");
 * you may not use this file except in compliance with the License.
 *
 */
 package io.github.ollama4j.impl;
 import io.github.ollama4j.models.chat.OllamaChatStreamObserver;
 public final class ConsoleOutputChatTokenHandler extends OllamaChatStreamObserver {
    public ConsoleOutputChatTokenHandler() {
        setThinkingStreamHandler(new ConsoleOutputGenerateTokenHandler());
        setResponseStreamHandler(new ConsoleOutputGenerateTokenHandler());
    }
 }
--- a/src/main/java/io/github/ollama4j/impl/ConsoleOutputGenerateTokenHandler.java
+++ b/src/main/java/io/github/ollama4j/impl/ConsoleOutputGenerateTokenHandler.java
@ -1,18 +0,0 @@
 /*
 * Ollama4j - Java library for interacting with Ollama server.
 * Copyright (c) 2025 Amith Koujalgi and contributors.
 *
 * Licensed under the MIT License (the "License");
 * you may not use this file except in compliance with the License.
 *
 */
 package io.github.ollama4j.impl;
 import io.github.ollama4j.models.generate.OllamaGenerateTokenHandler;
 public class ConsoleOutputGenerateTokenHandler implements OllamaGenerateTokenHandler {
    @Override
    public void accept(String message) {
        System.out.print(message);
    }
 }
--- a/src/main/java/io/github/ollama4j/impl/ConsoleOutputStreamHandler.java
+++ b/src/main/java/io/github/ollama4j/impl/ConsoleOutputStreamHandler.java
@ -0,0 +1,10 @@
 package io.github.ollama4j.impl;
 import io.github.ollama4j.models.generate.OllamaStreamHandler;
 public class ConsoleOutputStreamHandler implements OllamaStreamHandler {
    @Override
    public void accept(String message) {
        System.out.print(message);
    }
 }
--- a/src/main/java/io/github/ollama4j/metrics/MetricsRecorder.java
+++ b/src/main/java/io/github/ollama4j/metrics/MetricsRecorder.java
@ -1,129 +0,0 @@
 /*
 * Ollama4j - Java library for interacting with Ollama server.
 * Copyright (c) 2025 Amith Koujalgi and contributors.
 *
 * Licensed under the MIT License (the "License");
 * you may not use this file except in compliance with the License.
 *
 */
 package io.github.ollama4j.metrics;
 import com.google.common.base.Throwables;
 import io.prometheus.client.Counter;
 import io.prometheus.client.Histogram;
 import java.util.Map;
 public class MetricsRecorder {
    // Corrected: Removed duplicate "format" label and ensured label count matches usage
    private static final Counter requests =
            Counter.build()
                    .name("ollama_api_requests_total")
                    .help("Total requests to Ollama API")
                    .labelNames(
                            "endpoint",
                            "model",
                            "raw",
                            "streaming",
                            "thinking",
                            "http_status",
                            "options",
                            "format")
                    .register();
    private static final Histogram requestLatency =
            Histogram.build()
                    .name("ollama_api_request_duration_seconds")
                    .help("Request latency in seconds")
                    .labelNames(
                            "endpoint",
                            "model",
                            "raw",
                            "streaming",
                            "thinking",
                            "http_status",
                            "options",
                            "format")
                    .register();
    private static final Histogram responseSize =
            Histogram.build()
                    .name("ollama_api_response_size_bytes")
                    .help("Response size in bytes")
                    .labelNames("endpoint", "model", "options")
                    .register();
    public static void record(
            String endpoint,
            String model,
            boolean raw,
            boolean thinking,
            boolean streaming,
            Map<String, Object> options,
            Object format,
            long startTime,
            int responseHttpStatus,
            Object response) {
        long endTime = System.currentTimeMillis();
        String httpStatus = String.valueOf(responseHttpStatus);
        String formatString = "";
        if (format instanceof String) {
            formatString = (String) format;
        } else if (format instanceof Map) {
            formatString = mapToString((Map<String, Object>) format);
        } else if (format != null) {
            formatString = format.toString();
        }
        // Ensure the number of labels matches the labelNames above (8 labels)
        requests.labels(
                        endpoint,
                        safe(model),
                        String.valueOf(raw),
                        String.valueOf(streaming),
                        String.valueOf(thinking),
                        httpStatus,
                        safe(mapToString(options)),
                        safe(formatString))
                .inc();
        double durationSeconds = (endTime - startTime) / 1000.0;
        // Ensure the number of labels matches the labelNames above (8 labels)
        requestLatency
                .labels(
                        endpoint,
                        safe(model),
                        String.valueOf(raw),
                        String.valueOf(streaming),
                        String.valueOf(thinking),
                        httpStatus,
                        safe(mapToString(options)),
                        safe(formatString))
                .observe(durationSeconds);
        // Record response size (only if response is a string or json-like object)
        if (response != null) {
            if (response instanceof Exception) {
                response = Throwables.getStackTraceAsString((Throwable) response);
            }
            int size = response.toString().length();
            responseSize.labels(endpoint, safe(model), safe(mapToString(options))).observe(size);
        }
    }
    // Utility method to convert options Map to string (you can adjust this for more detailed
    // representation)
    private static String mapToString(Map<String, Object> map) {
        if (map == null || map.isEmpty()) {
            return "none";
        }
        // Convert the map to a string (can be customized to fit the use case)
        return map.toString();
    }
    private static String safe(String value) {
        return (value == null || value.isEmpty()) ? "none" : value;
    }
 }
--- a/src/main/java/io/github/ollama4j/models/chat/OllamaChatMessage.java
+++ b/src/main/java/io/github/ollama4j/models/chat/OllamaChatMessage.java
@ -1,31 +1,21 @@
 /*
 * Ollama4j - Java library for interacting with Ollama server.
 * Copyright (c) 2025 Amith Koujalgi and contributors.
 *
 * Licensed under the MIT License (the "License");
 * you may not use this file except in compliance with the License.
 *
 */
 package io.github.ollama4j.models.chat;
 import static io.github.ollama4j.utils.Utils.getObjectMapper;
 import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
 import com.fasterxml.jackson.annotation.JsonProperty;
 import com.fasterxml.jackson.core.JsonProcessingException;
 import com.fasterxml.jackson.databind.annotation.JsonSerialize;
 import io.github.ollama4j.utils.FileToBase64Serializer;
 import java.util.List;
 import lombok.*;
 import java.util.List;
 import static io.github.ollama4j.utils.Utils.getObjectMapper;
 /**
 * Defines a single Message to be used inside a chat request against the ollama /api/chat endpoint.
 *
- * @see <a
+ * @see <a href="https://github.com/ollama/ollama/blob/main/docs/api.md#generate-a-chat-completion">Generate chat completion</a>
 *     href="https://github.com/ollama/ollama/blob/main/docs/api.md#generate-a-chat-completion">Generate
 *     chat completion</a>
 */
@SuppressWarnings("NullableProblems")
@Data
@AllArgsConstructor
@RequiredArgsConstructor
@ -33,11 +23,11 @@ import lombok.*;
@JsonIgnoreProperties(ignoreUnknown = true)
 public class OllamaChatMessage {
    @NonNull private OllamaChatMessageRole role;
    @JsonProperty("content")
    @NonNull
-    private String response;
+    private OllamaChatMessageRole role;
    @NonNull
    private String content;
    private String thinking;
--- a/src/main/java/io/github/ollama4j/models/chat/OllamaChatMessageRole.java
+++ b/src/main/java/io/github/ollama4j/models/chat/OllamaChatMessageRole.java
@ -1,18 +1,11 @@
 /*
 * Ollama4j - Java library for interacting with Ollama server.
 * Copyright (c) 2025 Amith Koujalgi and contributors.
 *
 * Licensed under the MIT License (the "License");
 * you may not use this file except in compliance with the License.
 *
 */
 package io.github.ollama4j.models.chat;
 import com.fasterxml.jackson.annotation.JsonValue;
 import io.github.ollama4j.exceptions.RoleNotFoundException;
 import lombok.Getter;
 import java.util.ArrayList;
 import java.util.List;
 import lombok.Getter;
 /**
 * Defines the possible Chat Message roles.
@ -26,7 +19,8 @@ public class OllamaChatMessageRole {
    public static final OllamaChatMessageRole ASSISTANT = new OllamaChatMessageRole("assistant");
    public static final OllamaChatMessageRole TOOL = new OllamaChatMessageRole("tool");
-    @JsonValue private final String roleName;
+    @JsonValue
    private final String roleName;
    private OllamaChatMessageRole(String roleName) {
        this.roleName = roleName;
@ -34,6 +28,8 @@ public class OllamaChatMessageRole {
    }
    public static OllamaChatMessageRole newCustomRole(String roleName) {
 //        OllamaChatMessageRole customRole = new OllamaChatMessageRole(roleName);
 //        roles.add(customRole);
        return new OllamaChatMessageRole(roleName);
    }
--- a/src/main/java/io/github/ollama4j/models/chat/OllamaChatRequest.java
+++ b/src/main/java/io/github/ollama4j/models/chat/OllamaChatRequest.java
@ -1,24 +1,13 @@
 /*
 * Ollama4j - Java library for interacting with Ollama server.
 * Copyright (c) 2025 Amith Koujalgi and contributors.
 *
 * Licensed under the MIT License (the "License");
 * you may not use this file except in compliance with the License.
 *
 */
 package io.github.ollama4j.models.chat;
 import io.github.ollama4j.models.request.OllamaCommonRequest;
 import io.github.ollama4j.tools.Tools;
 import io.github.ollama4j.utils.OllamaRequestBody;
 import io.github.ollama4j.utils.Options;
 import java.io.File;
 import java.util.ArrayList;
 import java.util.Collections;
 import java.util.List;
 import lombok.Getter;
 import lombok.Setter;
 import java.util.List;
 /**
 * Defines a Request to use against the ollama /api/chat endpoint.
 *
@ -30,28 +19,14 @@ import lombok.Setter;
@Setter
 public class OllamaChatRequest extends OllamaCommonRequest implements OllamaRequestBody {
-    private List<OllamaChatMessage> messages = Collections.emptyList();
+    private List<OllamaChatMessage> messages;
-    private List<Tools.Tool> tools;
+    private List<Tools.PromptFuncDefinition> tools;
    private boolean think;
-    /**
+    public OllamaChatRequest() {
-     * Controls whether tools are automatically executed.
+    }
     *
     * <p>
     * If set to {@code true} (the default), tools will be automatically
     * used/applied by the
     * library. If set to {@code false}, tool calls will be returned to the client
     * for manual
     * handling.
     *
     * <p>
     * Disabling this should be an explicit operation.
     */
    private boolean useTools = true;
    public OllamaChatRequest() {}
    public OllamaChatRequest(String model, boolean think, List<OllamaChatMessage> messages) {
        this.model = model;
@ -64,116 +39,8 @@ public class OllamaChatRequest extends OllamaCommonRequest implements OllamaRequ
        if (!(o instanceof OllamaChatRequest)) {
            return false;
        }
        return this.toString().equals(o.toString());
    }
    // --- Builder-like fluent API methods ---
    public static OllamaChatRequest builder() {
        OllamaChatRequest req = new OllamaChatRequest();
        req.setMessages(new ArrayList<>());
        return req;
    }
    public OllamaChatRequest withModel(String model) {
        this.setModel(model);
        return this;
    }
    public OllamaChatRequest withMessage(OllamaChatMessageRole role, String content) {
        return withMessage(role, content, Collections.emptyList());
    }
    public OllamaChatRequest withMessage(
            OllamaChatMessageRole role, String content, List<OllamaChatToolCalls> toolCalls) {
        if (this.messages == null || this.messages == Collections.EMPTY_LIST) {
            this.messages = new ArrayList<>();
        }
        this.messages.add(new OllamaChatMessage(role, content, null, toolCalls, null));
        return this;
    }
    public OllamaChatRequest withMessage(
            OllamaChatMessageRole role,
            String content,
            List<OllamaChatToolCalls> toolCalls,
            List<File> images) {
        if (this.messages == null || this.messages == Collections.EMPTY_LIST) {
            this.messages = new ArrayList<>();
        }
        List<byte[]> imagesAsBytes = new ArrayList<>();
        if (images != null) {
            for (File image : images) {
                try {
                    imagesAsBytes.add(java.nio.file.Files.readAllBytes(image.toPath()));
                } catch (java.io.IOException e) {
                    throw new RuntimeException(
                            "Failed to read image file: " + image.getAbsolutePath(), e);
                }
            }
        }
        this.messages.add(new OllamaChatMessage(role, content, null, toolCalls, imagesAsBytes));
        return this;
    }
    public OllamaChatRequest withMessages(List<OllamaChatMessage> messages) {
        this.setMessages(messages);
        return this;
    }
    public OllamaChatRequest withOptions(Options options) {
        if (options != null) {
            this.setOptions(options.getOptionsMap());
        }
        return this;
    }
    public OllamaChatRequest withGetJsonResponse() {
        this.setFormat("json");
        return this;
    }
    public OllamaChatRequest withTemplate(String template) {
        this.setTemplate(template);
        return this;
    }
    public OllamaChatRequest withStreaming() {
        this.setStream(true);
        return this;
    }
    public OllamaChatRequest withKeepAlive(String keepAlive) {
        this.setKeepAlive(keepAlive);
        return this;
    }
    public OllamaChatRequest withThinking(boolean think) {
        this.setThink(think);
        return this;
    }
    public OllamaChatRequest withUseTools(boolean useTools) {
        this.setUseTools(useTools);
        return this;
    }
    public OllamaChatRequest withTools(List<Tools.Tool> tools) {
        this.setTools(tools);
        return this;
    }
    public OllamaChatRequest build() {
        return this;
    }
    public void reset() {
        // Only clear the messages, keep model and think as is
        if (this.messages == null || this.messages == Collections.EMPTY_LIST) {
            this.messages = new ArrayList<>();
        } else {
            this.messages.clear();
        }
    }
 }
--- a/src/main/java/io/github/ollama4j/models/chat/OllamaChatRequestBuilder.java
+++ b/src/main/java/io/github/ollama4j/models/chat/OllamaChatRequestBuilder.java
@ -0,0 +1,121 @@
 package io.github.ollama4j.models.chat;
 import io.github.ollama4j.utils.Options;
 import io.github.ollama4j.utils.Utils;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import java.io.File;
 import java.io.IOException;
 import java.net.URISyntaxException;
 import java.nio.file.Files;
 import java.util.ArrayList;
 import java.util.Collections;
 import java.util.List;
 import java.util.stream.Collectors;
 /**
 * Helper class for creating {@link OllamaChatRequest} objects using the builder-pattern.
 */
 public class OllamaChatRequestBuilder {
    private static final Logger LOG = LoggerFactory.getLogger(OllamaChatRequestBuilder.class);
    private OllamaChatRequestBuilder(String model, List<OllamaChatMessage> messages) {
        request = new OllamaChatRequest(model, false, messages);
    }
    private OllamaChatRequest request;
    public static OllamaChatRequestBuilder getInstance(String model) {
        return new OllamaChatRequestBuilder(model, new ArrayList<>());
    }
    public OllamaChatRequest build() {
        return request;
    }
    public void reset() {
        request = new OllamaChatRequest(request.getModel(), request.isThink(), new ArrayList<>());
    }
    public OllamaChatRequestBuilder withMessage(OllamaChatMessageRole role, String content) {
        return withMessage(role, content, Collections.emptyList());
    }
    public OllamaChatRequestBuilder withMessage(OllamaChatMessageRole role, String content, List<OllamaChatToolCalls> toolCalls) {
        List<OllamaChatMessage> messages = this.request.getMessages();
        messages.add(new OllamaChatMessage(role, content, null, toolCalls, null));
        return this;
    }
    public OllamaChatRequestBuilder withMessage(OllamaChatMessageRole role, String content, List<OllamaChatToolCalls> toolCalls, List<File> images) {
        List<OllamaChatMessage> messages = this.request.getMessages();
        List<byte[]> binaryImages = images.stream().map(file -> {
            try {
                return Files.readAllBytes(file.toPath());
            } catch (IOException e) {
                LOG.warn("File '{}' could not be accessed, will not add to message!", file.toPath(), e);
                return new byte[0];
            }
        }).collect(Collectors.toList());
        messages.add(new OllamaChatMessage(role, content, null, toolCalls, binaryImages));
        return this;
    }
    public OllamaChatRequestBuilder withMessage(OllamaChatMessageRole role, String content, List<OllamaChatToolCalls> toolCalls, String... imageUrls) {
        List<OllamaChatMessage> messages = this.request.getMessages();
        List<byte[]> binaryImages = null;
        if (imageUrls.length > 0) {
            binaryImages = new ArrayList<>();
            for (String imageUrl : imageUrls) {
                try {
                    binaryImages.add(Utils.loadImageBytesFromUrl(imageUrl));
                } catch (URISyntaxException e) {
                    LOG.warn("URL '{}' could not be accessed, will not add to message!", imageUrl, e);
                } catch (IOException e) {
                    LOG.warn("Content of URL '{}' could not be read, will not add to message!", imageUrl, e);
                }
            }
        }
        messages.add(new OllamaChatMessage(role, content, null, toolCalls, binaryImages));
        return this;
    }
    public OllamaChatRequestBuilder withMessages(List<OllamaChatMessage> messages) {
        return new OllamaChatRequestBuilder(request.getModel(), messages);
    }
    public OllamaChatRequestBuilder withOptions(Options options) {
        this.request.setOptions(options.getOptionsMap());
        return this;
    }
    public OllamaChatRequestBuilder withGetJsonResponse() {
        this.request.setReturnFormatJson(true);
        return this;
    }
    public OllamaChatRequestBuilder withTemplate(String template) {
        this.request.setTemplate(template);
        return this;
    }
    public OllamaChatRequestBuilder withStreaming() {
        this.request.setStream(true);
        return this;
    }
    public OllamaChatRequestBuilder withKeepAlive(String keepAlive) {
        this.request.setKeepAlive(keepAlive);
        return this;
    }
    public OllamaChatRequestBuilder withThinking(boolean think) {
        this.request.setThink(think);
        return this;
    }
 }
--- a/src/main/java/io/github/ollama4j/models/chat/OllamaChatResponseModel.java
+++ b/src/main/java/io/github/ollama4j/models/chat/OllamaChatResponseModel.java
@ -1,25 +1,18 @@
 /*
 * Ollama4j - Java library for interacting with Ollama server.
 * Copyright (c) 2025 Amith Koujalgi and contributors.
 *
 * Licensed under the MIT License (the "License");
 * you may not use this file except in compliance with the License.
 *
 */
 package io.github.ollama4j.models.chat;
 import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
 import com.fasterxml.jackson.annotation.JsonProperty;
 import java.util.List;
 import lombok.Data;
 import java.util.List;
@Data
@JsonIgnoreProperties(ignoreUnknown = true)
 public class OllamaChatResponseModel {
    private String model;
    private @JsonProperty("created_at") String createdAt;
    private @JsonProperty("done_reason") String doneReason;
    private OllamaChatMessage message;
    private boolean done;
    private String error;
    private List<Integer> context;
    private @JsonProperty("total_duration") Long totalDuration;
    private @JsonProperty("load_duration") Long loadDuration;
@ -27,6 +20,4 @@ public class OllamaChatResponseModel {
    private @JsonProperty("eval_duration") Long evalDuration;
    private @JsonProperty("prompt_eval_count") Integer promptEvalCount;
    private @JsonProperty("eval_count") Integer evalCount;
    private String error;
    private OllamaChatMessage message;
 }
--- a/src/main/java/io/github/ollama4j/models/chat/OllamaChatResult.java
+++ b/src/main/java/io/github/ollama4j/models/chat/OllamaChatResult.java
@ -1,19 +1,12 @@
 /*
 * Ollama4j - Java library for interacting with Ollama server.
 * Copyright (c) 2025 Amith Koujalgi and contributors.
 *
 * Licensed under the MIT License (the "License");
 * you may not use this file except in compliance with the License.
 *
 */
 package io.github.ollama4j.models.chat;
 import static io.github.ollama4j.utils.Utils.getObjectMapper;
 import com.fasterxml.jackson.core.JsonProcessingException;
 import java.util.List;
 import lombok.Getter;
 import java.util.List;
 import static io.github.ollama4j.utils.Utils.getObjectMapper;
 /**
 * Specific chat-API result that contains the chat history sent to the model and appends the answer as {@link OllamaChatResult} given by the
 * {@link OllamaChatMessageRole#ASSISTANT} role.
@ -25,8 +18,7 @@ public class OllamaChatResult {
    private final OllamaChatResponseModel responseModel;
-    public OllamaChatResult(
+    public OllamaChatResult(OllamaChatResponseModel responseModel, List<OllamaChatMessage> chatHistory) {
            OllamaChatResponseModel responseModel, List<OllamaChatMessage> chatHistory) {
        this.chatHistory = chatHistory;
        this.responseModel = responseModel;
        appendAnswerToChatHistory(responseModel);
@ -44,4 +36,19 @@ public class OllamaChatResult {
            throw new RuntimeException(e);
        }
    }
    @Deprecated
    public String getResponse(){
        return responseModel != null ? responseModel.getMessage().getContent() : "";
    }
    @Deprecated
    public int getHttpStatusCode(){
        return 200;
    }
    @Deprecated
    public long getResponseTime(){
        return responseModel != null ? responseModel.getTotalDuration() : 0L;
    }
 }
--- a/src/main/java/io/github/ollama4j/models/chat/OllamaChatStreamObserver.java
+++ b/src/main/java/io/github/ollama4j/models/chat/OllamaChatStreamObserver.java
@ -1,24 +1,15 @@
 /*
 * Ollama4j - Java library for interacting with Ollama server.
 * Copyright (c) 2025 Amith Koujalgi and contributors.
 *
 * Licensed under the MIT License (the "License");
 * you may not use this file except in compliance with the License.
 *
 */
 package io.github.ollama4j.models.chat;
-import io.github.ollama4j.models.generate.OllamaGenerateTokenHandler;
+import io.github.ollama4j.models.generate.OllamaStreamHandler;
-import lombok.AllArgsConstructor;
+import io.github.ollama4j.models.generate.OllamaTokenHandler;
-import lombok.NoArgsConstructor;
+import lombok.RequiredArgsConstructor;
 import lombok.Setter;
-@Setter
+@RequiredArgsConstructor
-@NoArgsConstructor
+public class OllamaChatStreamObserver implements OllamaTokenHandler {
-@AllArgsConstructor
+    private final OllamaStreamHandler thinkingStreamHandler;
-public class OllamaChatStreamObserver implements OllamaChatTokenHandler {
+    private final OllamaStreamHandler responseStreamHandler;
-    private OllamaGenerateTokenHandler thinkingStreamHandler;
+
-    private OllamaGenerateTokenHandler responseStreamHandler;
+    private String message = "";
    @Override
    public void accept(OllamaChatResponseModel token) {
@ -27,15 +18,34 @@ public class OllamaChatStreamObserver implements OllamaChatTokenHandler {
        }
        String thinking = token.getMessage().getThinking();
-        String response = token.getMessage().getResponse();
+        String content = token.getMessage().getContent();
        boolean hasThinking = thinking != null && !thinking.isEmpty();
-        boolean hasResponse = response != null && !response.isEmpty();
+        boolean hasContent = !content.isEmpty();
-        if (!hasResponse && hasThinking && thinkingStreamHandler != null) {
+//        if (hasThinking && !hasContent) {
 ////            message += thinking;
 //            message = thinking;
 //        } else {
 ////            message += content;
 //            message = content;
 //        }
 //
 //        responseStreamHandler.accept(message);
        if (!hasContent && hasThinking && thinkingStreamHandler != null) {
            // message = message + thinking;
            // use only new tokens received, instead of appending the tokens to the previous
            // ones and sending the full string again
            thinkingStreamHandler.accept(thinking);
-        } else if (hasResponse) {
+        } else if (hasContent && responseStreamHandler != null) {
-            responseStreamHandler.accept(response);
+            // message = message + response;
            // use only new tokens received, instead of appending the tokens to the previous
            // ones and sending the full string again
            responseStreamHandler.accept(content);
        }
    }
 }
--- a/src/main/java/io/github/ollama4j/models/chat/OllamaChatTokenHandler.java
+++ b/src/main/java/io/github/ollama4j/models/chat/OllamaChatTokenHandler.java
@ -1,13 +0,0 @@
 /*
 * Ollama4j - Java library for interacting with Ollama server.
 * Copyright (c) 2025 Amith Koujalgi and contributors.
 *
 * Licensed under the MIT License (the "License");
 * you may not use this file except in compliance with the License.
 *
 */
 package io.github.ollama4j.models.chat;
 import java.util.function.Consumer;
 public interface OllamaChatTokenHandler extends Consumer<OllamaChatResponseModel> {}
--- a/src/main/java/io/github/ollama4j/models/chat/OllamaChatToolCalls.java
+++ b/src/main/java/io/github/ollama4j/models/chat/OllamaChatToolCalls.java
@ -1,11 +1,3 @@
 /*
 * Ollama4j - Java library for interacting with Ollama server.
 * Copyright (c) 2025 Amith Koujalgi and contributors.
 *
 * Licensed under the MIT License (the "License");
 * you may not use this file except in compliance with the License.
 *
 */
 package io.github.ollama4j.models.chat;
 import io.github.ollama4j.tools.OllamaToolCallsFunction;
@ -19,4 +11,6 @@ import lombok.NoArgsConstructor;
 public class OllamaChatToolCalls {
    private OllamaToolCallsFunction function;
 }
--- a/src/main/java/io/github/ollama4j/models/embeddings/OllamaEmbedRequestBuilder.java
+++ b/src/main/java/io/github/ollama4j/models/embeddings/OllamaEmbedRequestBuilder.java
@ -1,14 +1,7 @@
-/*
+package io.github.ollama4j.models.embeddings;
 * Ollama4j - Java library for interacting with Ollama server.
 * Copyright (c) 2025 Amith Koujalgi and contributors.
 *
 * Licensed under the MIT License (the "License");
 * you may not use this file except in compliance with the License.
 *
 */
 package io.github.ollama4j.models.embed;
 import io.github.ollama4j.utils.Options;
 import java.util.List;
 /**
@ -16,10 +9,10 @@ import java.util.List;
 */
 public class OllamaEmbedRequestBuilder {
-    private final OllamaEmbedRequest request;
+    private final OllamaEmbedRequestModel request;
    private OllamaEmbedRequestBuilder(String model, List<String> input) {
-        this.request = new OllamaEmbedRequest(model, input);
+        this.request = new OllamaEmbedRequestModel(model,input);
    }
    public static OllamaEmbedRequestBuilder getInstance(String model, String... input){
@ -41,7 +34,7 @@ public class OllamaEmbedRequestBuilder {
        return this;
    }
-    public OllamaEmbedRequest build() {
+    public OllamaEmbedRequestModel build() {
        return this.request;
    }
 }
--- a/src/main/java/io/github/ollama4j/models/embeddings/OllamaEmbedRequestModel.java
+++ b/src/main/java/io/github/ollama4j/models/embeddings/OllamaEmbedRequestModel.java
@ -1,29 +1,26 @@
-/*
+package io.github.ollama4j.models.embeddings;
 * Ollama4j - Java library for interacting with Ollama server.
 * Copyright (c) 2025 Amith Koujalgi and contributors.
 *
 * Licensed under the MIT License (the "License");
 * you may not use this file except in compliance with the License.
 *
 */
 package io.github.ollama4j.models.embed;
 import static io.github.ollama4j.utils.Utils.getObjectMapper;
 import com.fasterxml.jackson.annotation.JsonProperty;
 import com.fasterxml.jackson.core.JsonProcessingException;
 import lombok.Data;
 import lombok.NoArgsConstructor;
 import lombok.NonNull;
 import lombok.RequiredArgsConstructor;
 import java.util.List;
 import java.util.Map;
 import lombok.*;
-@SuppressWarnings("NullableProblems")
+import static io.github.ollama4j.utils.Utils.getObjectMapper;
@Data
@RequiredArgsConstructor
@NoArgsConstructor
-public class OllamaEmbedRequest {
+public class OllamaEmbedRequestModel {
-    @NonNull private String model;
+    @NonNull
    private String model;
-    @NonNull private List<String> input;
+    @NonNull
    private List<String> input;
    private Map<String, Object> options;
--- a/src/main/java/io/github/ollama4j/models/embeddings/OllamaEmbedResponseModel.java
+++ b/src/main/java/io/github/ollama4j/models/embeddings/OllamaEmbedResponseModel.java
@ -1,20 +1,13 @@
-/*
+package io.github.ollama4j.models.embeddings;
 * Ollama4j - Java library for interacting with Ollama server.
 * Copyright (c) 2025 Amith Koujalgi and contributors.
 *
 * Licensed under the MIT License (the "License");
 * you may not use this file except in compliance with the License.
 *
 */
 package io.github.ollama4j.models.embed;
 import com.fasterxml.jackson.annotation.JsonProperty;
 import java.util.List;
 import lombok.Data;
 import java.util.List;
@SuppressWarnings("unused")
@Data
-public class OllamaEmbedResult {
+public class OllamaEmbedResponseModel {
    @JsonProperty("model")
    private String model;
--- a/src/main/java/io/github/ollama4j/models/embeddings/OllamaEmbeddingResponseModel.java
+++ b/src/main/java/io/github/ollama4j/models/embeddings/OllamaEmbeddingResponseModel.java
@ -0,0 +1,14 @@
 package io.github.ollama4j.models.embeddings;
 import com.fasterxml.jackson.annotation.JsonProperty;
 import lombok.Data;
 import java.util.List;
@SuppressWarnings("unused")
@Data
@Deprecated(since="1.0.90")
 public class OllamaEmbeddingResponseModel {
    @JsonProperty("embedding")
    private List<Double> embedding;
 }
--- a/src/main/java/io/github/ollama4j/models/embeddings/OllamaEmbeddingsRequestBuilder.java
+++ b/src/main/java/io/github/ollama4j/models/embeddings/OllamaEmbeddingsRequestBuilder.java
@ -0,0 +1,32 @@
 package io.github.ollama4j.models.embeddings;
 import io.github.ollama4j.utils.Options;
@Deprecated(since="1.0.90")
 public class OllamaEmbeddingsRequestBuilder {
    private OllamaEmbeddingsRequestBuilder(String model, String prompt){
        request = new OllamaEmbeddingsRequestModel(model, prompt);
    }
    private OllamaEmbeddingsRequestModel request;
    public static OllamaEmbeddingsRequestBuilder getInstance(String model, String prompt){
        return new OllamaEmbeddingsRequestBuilder(model, prompt);
    }
    public OllamaEmbeddingsRequestModel build(){
        return request;
    }
    public OllamaEmbeddingsRequestBuilder withOptions(Options options){
        this.request.setOptions(options.getOptionsMap());
        return this;
    }
    public OllamaEmbeddingsRequestBuilder withKeepAlive(String keepAlive){
        this.request.setKeepAlive(keepAlive);
        return this;
    }
 }
--- a/src/main/java/io/github/ollama4j/models/embeddings/OllamaEmbeddingsRequestModel.java
+++ b/src/main/java/io/github/ollama4j/models/embeddings/OllamaEmbeddingsRequestModel.java
@ -0,0 +1,36 @@
 package io.github.ollama4j.models.embeddings;
 import com.fasterxml.jackson.annotation.JsonProperty;
 import com.fasterxml.jackson.core.JsonProcessingException;
 import lombok.Data;
 import lombok.NoArgsConstructor;
 import lombok.NonNull;
 import lombok.RequiredArgsConstructor;
 import java.util.Map;
 import static io.github.ollama4j.utils.Utils.getObjectMapper;
@Data
@RequiredArgsConstructor
@NoArgsConstructor
@Deprecated(since="1.0.90")
 public class OllamaEmbeddingsRequestModel {
  @NonNull
  private String model;
  @NonNull
  private String prompt;
  protected Map<String, Object> options;
  @JsonProperty(value = "keep_alive")
  private String keepAlive;
  @Override
  public String toString() {
    try {
      return getObjectMapper().writerWithDefaultPrettyPrinter().writeValueAsString(this);
    } catch (JsonProcessingException e) {
      throw new RuntimeException(e);
    }
  }
 }
--- a/src/main/java/io/github/ollama4j/models/generate/OllamaGenerateRequest.java
+++ b/src/main/java/io/github/ollama4j/models/generate/OllamaGenerateRequest.java
@ -1,41 +1,27 @@
 /*
 * Ollama4j - Java library for interacting with Ollama server.
 * Copyright (c) 2025 Amith Koujalgi and contributors.
 *
 * Licensed under the MIT License (the "License");
 * you may not use this file except in compliance with the License.
 *
 */
 package io.github.ollama4j.models.generate;
 import io.github.ollama4j.models.request.OllamaCommonRequest;
 import io.github.ollama4j.tools.Tools;
 import io.github.ollama4j.utils.OllamaRequestBody;
 import io.github.ollama4j.utils.Options;
 import java.io.File;
 import java.io.IOException;
 import java.nio.file.Files;
 import java.util.ArrayList;
 import java.util.Base64;
 import java.util.List;
 import java.util.Map;
 import lombok.Getter;
 import lombok.Setter;
 import java.util.List;
@Getter
@Setter
 public class OllamaGenerateRequest extends OllamaCommonRequest implements OllamaRequestBody{
  private String prompt;
  private List<String> images;
  private String system;
  private String context;
  private boolean raw;
  private boolean think;
    private boolean useTools;
    private List<Tools.Tool> tools;
-    public OllamaGenerateRequest() {}
+  public OllamaGenerateRequest() {
  }
  public OllamaGenerateRequest(String model, String prompt) {
    this.model = model;
@ -48,105 +34,13 @@ public class OllamaGenerateRequest extends OllamaCommonRequest implements Ollama
    this.images = images;
  }
    // --- Builder-style methods ---
    public static OllamaGenerateRequest builder() {
        return new OllamaGenerateRequest();
    }
    public OllamaGenerateRequest withPrompt(String prompt) {
        this.setPrompt(prompt);
        return this;
    }
    public OllamaGenerateRequest withTools(List<Tools.Tool> tools) {
        this.setTools(tools);
        return this;
    }
    public OllamaGenerateRequest withModel(String model) {
        this.setModel(model);
        return this;
    }
    public OllamaGenerateRequest withGetJsonResponse() {
        this.setFormat("json");
        return this;
    }
    public OllamaGenerateRequest withOptions(Options options) {
        this.setOptions(options.getOptionsMap());
        return this;
    }
    public OllamaGenerateRequest withTemplate(String template) {
        this.setTemplate(template);
        return this;
    }
    public OllamaGenerateRequest withStreaming(boolean streaming) {
        this.setStream(streaming);
        return this;
    }
    public OllamaGenerateRequest withKeepAlive(String keepAlive) {
        this.setKeepAlive(keepAlive);
        return this;
    }
    public OllamaGenerateRequest withRaw(boolean raw) {
        this.setRaw(raw);
        return this;
    }
    public OllamaGenerateRequest withThink(boolean think) {
        this.setThink(think);
        return this;
    }
    public OllamaGenerateRequest withUseTools(boolean useTools) {
        this.setUseTools(useTools);
        return this;
    }
    public OllamaGenerateRequest withFormat(Map<String, Object> format) {
        this.setFormat(format);
        return this;
    }
    public OllamaGenerateRequest withSystem(String system) {
        this.setSystem(system);
        return this;
    }
    public OllamaGenerateRequest withContext(String context) {
        this.setContext(context);
        return this;
    }
    public OllamaGenerateRequest withImagesBase64(List<String> images) {
        this.setImages(images);
        return this;
    }
    public OllamaGenerateRequest withImages(List<File> imageFiles) throws IOException {
        List<String> images = new ArrayList<>();
        for (File imageFile : imageFiles) {
            images.add(Base64.getEncoder().encodeToString(Files.readAllBytes(imageFile.toPath())));
        }
        this.setImages(images);
        return this;
    }
    public OllamaGenerateRequest build() {
        return this;
    }
    @Override
  public boolean equals(Object o) {
    if (!(o instanceof OllamaGenerateRequest)) {
      return false;
    }
    return this.toString().equals(o.toString());
  }
 }
--- a/src/main/java/io/github/ollama4j/models/generate/OllamaGenerateRequestBuilder.java
+++ b/src/main/java/io/github/ollama4j/models/generate/OllamaGenerateRequestBuilder.java
@ -0,0 +1,55 @@
 package io.github.ollama4j.models.generate;
 import io.github.ollama4j.utils.Options;
 /**
 * Helper class for creating {@link OllamaGenerateRequest}
 * objects using the builder-pattern.
 */
 public class OllamaGenerateRequestBuilder {
    private OllamaGenerateRequestBuilder(String model, String prompt){
        request = new OllamaGenerateRequest(model, prompt);
    }
    private OllamaGenerateRequest request;
    public static OllamaGenerateRequestBuilder getInstance(String model){
        return new OllamaGenerateRequestBuilder(model,"");
    }
    public OllamaGenerateRequest build(){
        return request;
    }
    public OllamaGenerateRequestBuilder withPrompt(String prompt){
        request.setPrompt(prompt);
        return this;
    }
    public OllamaGenerateRequestBuilder withGetJsonResponse(){
        this.request.setReturnFormatJson(true);
        return this;
    }
    public OllamaGenerateRequestBuilder withOptions(Options options){
        this.request.setOptions(options.getOptionsMap());
        return this;
    }
    public OllamaGenerateRequestBuilder withTemplate(String template){
        this.request.setTemplate(template);
        return this;
    }
    public OllamaGenerateRequestBuilder withStreaming(){
        this.request.setStream(true);
        return this;
    }
    public OllamaGenerateRequestBuilder withKeepAlive(String keepAlive){
        this.request.setKeepAlive(keepAlive);
        return this;
    }
 }
--- a/src/main/java/io/github/ollama4j/models/generate/OllamaGenerateResponseModel.java
+++ b/src/main/java/io/github/ollama4j/models/generate/OllamaGenerateResponseModel.java
@ -1,32 +1,25 @@
 /*
 * Ollama4j - Java library for interacting with Ollama server.
 * Copyright (c) 2025 Amith Koujalgi and contributors.
 *
 * Licensed under the MIT License (the "License");
 * you may not use this file except in compliance with the License.
 *
 */
 package io.github.ollama4j.models.generate;
 import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
 import com.fasterxml.jackson.annotation.JsonProperty;
 import java.util.List;
 import lombok.Data;
 import java.util.List;
@Data
@JsonIgnoreProperties(ignoreUnknown = true)
 public class OllamaGenerateResponseModel {
    private String model;
    private @JsonProperty("created_at") String createdAt;
-    private @JsonProperty("done_reason") String doneReason;
+    private String response;
    private String thinking;
    private boolean done;
    private @JsonProperty("done_reason") String doneReason;
    private List<Integer> context;
    private @JsonProperty("total_duration") Long totalDuration;
    private @JsonProperty("load_duration") Long loadDuration;
    private @JsonProperty("prompt_eval_duration") Long promptEvalDuration;
    private @JsonProperty("eval_duration") Long evalDuration;
    private @JsonProperty("prompt_eval_count") Integer promptEvalCount;
    private @JsonProperty("prompt_eval_duration") Long promptEvalDuration;
    private @JsonProperty("eval_count") Integer evalCount;
-    private String response;
+    private @JsonProperty("eval_duration") Long evalDuration;
    private String thinking;
 }
--- a/src/main/java/io/github/ollama4j/models/generate/OllamaGenerateStreamObserver.java
+++ b/src/main/java/io/github/ollama4j/models/generate/OllamaGenerateStreamObserver.java
@ -1,29 +1,20 @@
 /*
 * Ollama4j - Java library for interacting with Ollama server.
 * Copyright (c) 2025 Amith Koujalgi and contributors.
 *
 * Licensed under the MIT License (the "License");
 * you may not use this file except in compliance with the License.
 *
 */
 package io.github.ollama4j.models.generate;
 import java.util.ArrayList;
 import java.util.List;
 import lombok.Getter;
@Getter
 public class OllamaGenerateStreamObserver {
-    private final OllamaGenerateTokenHandler thinkingStreamHandler;
+
-    private final OllamaGenerateTokenHandler responseStreamHandler;
+    private final OllamaStreamHandler thinkingStreamHandler;
    private final OllamaStreamHandler responseStreamHandler;
    private final List<OllamaGenerateResponseModel> responseParts = new ArrayList<>();
-    public OllamaGenerateStreamObserver(
+    private String message = "";
-            OllamaGenerateTokenHandler thinkingStreamHandler,
+
-            OllamaGenerateTokenHandler responseStreamHandler) {
+    public OllamaGenerateStreamObserver(OllamaStreamHandler thinkingStreamHandler, OllamaStreamHandler responseStreamHandler) {
        this.thinkingStreamHandler = thinkingStreamHandler;
        this.responseStreamHandler = responseStreamHandler;
        this.thinkingStreamHandler = thinkingStreamHandler;
    }
    public void notify(OllamaGenerateResponseModel currentResponsePart) {
@ -39,8 +30,16 @@ public class OllamaGenerateStreamObserver {
        boolean hasThinking = thinking != null && !thinking.isEmpty();
        if (!hasResponse && hasThinking && thinkingStreamHandler != null) {
            // message = message + thinking;
            // use only new tokens received, instead of appending the tokens to the previous
            // ones and sending the full string again
            thinkingStreamHandler.accept(thinking);
        } else if (hasResponse && responseStreamHandler != null) {
            // message = message + response;
            // use only new tokens received, instead of appending the tokens to the previous
            // ones and sending the full string again
            responseStreamHandler.accept(response);
        }
    }
--- a/src/main/java/io/github/ollama4j/models/generate/OllamaGenerateTokenHandler.java
+++ b/src/main/java/io/github/ollama4j/models/generate/OllamaGenerateTokenHandler.java
@ -1,15 +0,0 @@
 /*
 * Ollama4j - Java library for interacting with Ollama server.
 * Copyright (c) 2025 Amith Koujalgi and contributors.
 *
 * Licensed under the MIT License (the "License");
 * you may not use this file except in compliance with the License.
 *
 */
 package io.github.ollama4j.models.generate;
 import java.util.function.Consumer;
 public interface OllamaGenerateTokenHandler extends Consumer<String> {
    void accept(String message);
 }
--- a/src/main/java/io/github/ollama4j/models/generate/OllamaStreamHandler.java
+++ b/src/main/java/io/github/ollama4j/models/generate/OllamaStreamHandler.java
@ -0,0 +1,7 @@
 package io.github.ollama4j.models.generate;
 import java.util.function.Consumer;
 public interface OllamaStreamHandler extends Consumer<String> {
    void accept(String message);
 }
--- a/src/main/java/io/github/ollama4j/models/generate/OllamaTokenHandler.java
+++ b/src/main/java/io/github/ollama4j/models/generate/OllamaTokenHandler.java
@ -0,0 +1,8 @@
 package io.github.ollama4j.models.generate;
 import io.github.ollama4j.models.chat.OllamaChatResponseModel;
 import java.util.function.Consumer;
 public interface OllamaTokenHandler extends Consumer<OllamaChatResponseModel> {
 }
--- a/src/main/java/io/github/ollama4j/models/ps/ModelsProcessResponse.java
+++ b/src/main/java/io/github/ollama4j/models/ps/ModelsProcessResponse.java
@ -1,29 +1,21 @@
 /*
 * Ollama4j - Java library for interacting with Ollama server.
 * Copyright (c) 2025 Amith Koujalgi and contributors.
 *
 * Licensed under the MIT License (the "License");
 * you may not use this file except in compliance with the License.
 *
 */
 package io.github.ollama4j.models.ps;
 import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
 import com.fasterxml.jackson.annotation.JsonProperty;
 import java.util.List;
 import lombok.Data;
 import lombok.NoArgsConstructor;
 import java.util.List;
@Data
@NoArgsConstructor
@JsonIgnoreProperties(ignoreUnknown = true)
-public class ModelProcessesResult {
+public class ModelsProcessResponse {
    @JsonProperty("models")
    private List<ModelProcess> models;
    @Data
    @NoArgsConstructor
    @JsonIgnoreProperties(ignoreUnknown = true)
    public static class ModelProcess {
        @JsonProperty("name")
        private String name;
@ -41,7 +33,7 @@ public class ModelProcessesResult {
        private ModelDetails details;
        @JsonProperty("expires_at")
-        private String expiresAt;
+        private String expiresAt; // Consider using LocalDateTime if you need to process date/time
        @JsonProperty("size_vram")
        private long sizeVram;
--- a/src/main/java/io/github/ollama4j/models/request/Auth.java
+++ b/src/main/java/io/github/ollama4j/models/request/Auth.java
@ -1,11 +1,3 @@
 /*
 * Ollama4j - Java library for interacting with Ollama server.
 * Copyright (c) 2025 Amith Koujalgi and contributors.
 *
 * Licensed under the MIT License (the "License");
 * you may not use this file except in compliance with the License.
 *
 */
 package io.github.ollama4j.models.request;
 public abstract class Auth {
--- a/src/main/java/io/github/ollama4j/models/request/BasicAuth.java
+++ b/src/main/java/io/github/ollama4j/models/request/BasicAuth.java
@ -1,18 +1,11 @@
 /*
 * Ollama4j - Java library for interacting with Ollama server.
 * Copyright (c) 2025 Amith Koujalgi and contributors.
 *
 * Licensed under the MIT License (the "License");
 * you may not use this file except in compliance with the License.
 *
 */
 package io.github.ollama4j.models.request;
 import java.util.Base64;
 import lombok.AllArgsConstructor;
 import lombok.Data;
 import lombok.EqualsAndHashCode;
 import java.util.Base64;
@Data
@AllArgsConstructor
@EqualsAndHashCode(callSuper = false)
--- a/src/main/java/io/github/ollama4j/models/request/BearerAuth.java
+++ b/src/main/java/io/github/ollama4j/models/request/BearerAuth.java
@ -1,11 +1,3 @@
 /*
 * Ollama4j - Java library for interacting with Ollama server.
 * Copyright (c) 2025 Amith Koujalgi and contributors.
 *
 * Licensed under the MIT License (the "License");
 * you may not use this file except in compliance with the License.
 *
 */
 package io.github.ollama4j.models.request;
 import lombok.AllArgsConstructor;
--- a/src/main/java/io/github/ollama4j/models/request/CustomModelFileContentsRequest.java
+++ b/src/main/java/io/github/ollama4j/models/request/CustomModelFileContentsRequest.java
@ -1,20 +1,11 @@
 /*
 * Ollama4j - Java library for interacting with Ollama server.
 * Copyright (c) 2025 Amith Koujalgi and contributors.
 *
 * Licensed under the MIT License (the "License");
 * you may not use this file except in compliance with the License.
 *
 */
 package io.github.ollama4j.models.request;
 import static io.github.ollama4j.utils.Utils.getObjectMapper;
 import com.fasterxml.jackson.core.JsonProcessingException;
 import lombok.AllArgsConstructor;
 import lombok.Data;
-@SuppressWarnings("SpellCheckingInspection")
+import static io.github.ollama4j.utils.Utils.getObjectMapper;
@Data
@AllArgsConstructor
 public class CustomModelFileContentsRequest {
--- a/src/main/java/io/github/ollama4j/models/request/CustomModelFilePathRequest.java
+++ b/src/main/java/io/github/ollama4j/models/request/CustomModelFilePathRequest.java
@ -1,19 +1,11 @@
 /*
 * Ollama4j - Java library for interacting with Ollama server.
 * Copyright (c) 2025 Amith Koujalgi and contributors.
 *
 * Licensed under the MIT License (the "License");
 * you may not use this file except in compliance with the License.
 *
 */
 package io.github.ollama4j.models.request;
 import static io.github.ollama4j.utils.Utils.getObjectMapper;
 import com.fasterxml.jackson.core.JsonProcessingException;
 import lombok.AllArgsConstructor;
 import lombok.Data;
 import static io.github.ollama4j.utils.Utils.getObjectMapper;
@Data
@AllArgsConstructor
 public class CustomModelFilePathRequest {
--- a/src/main/java/io/github/ollama4j/models/request/CustomModelRequest.java
+++ b/src/main/java/io/github/ollama4j/models/request/CustomModelRequest.java
@ -1,22 +1,16 @@
 /*
 * Ollama4j - Java library for interacting with Ollama server.
 * Copyright (c) 2025 Amith Koujalgi and contributors.
 *
 * Licensed under the MIT License (the "License");
 * you may not use this file except in compliance with the License.
 *
 */
 package io.github.ollama4j.models.request;
 import static io.github.ollama4j.utils.Utils.getObjectMapper;
 import com.fasterxml.jackson.core.JsonProcessingException;
 import java.util.List;
 import java.util.Map;
 import lombok.AllArgsConstructor;
 import lombok.Builder;
 import lombok.Data;
 import java.util.List;
 import java.util.Map;
 import static io.github.ollama4j.utils.Utils.getObjectMapper;
@Data
@AllArgsConstructor
@Builder
@ -26,7 +20,7 @@ public class CustomModelRequest {
    private Map<String, String> files;
    private Map<String, String> adapters;
    private String template;
-    private Object license;
+    private Object license; // Using Object to handle both String and List<String>
    private String system;
    private Map<String, Object> parameters;
    private List<Object> messages;
--- a/Show More
+++ b/Show More